From f98955aab98e0f5a2ee30c424fa303b3cbc63c95 Mon Sep 17 00:00:00 2001
From: Joel Klinger <joel.alexander.klinger@gmail.com>
Date: Tue, 9 Jun 2020 17:44:07 +0100
Subject: [PATCH] [266] Refactor and simplify ES configuration (#275)

* make sure conf dir is empty

* simplified es config

* added orm es config reader

* modified setup_es to pick up new es config

* swapped es_mode for boolean

* aliases now consistent with config

* aliases now automatically located

* added endpoint field to estasks

* added endpoint field to sql2estasks

* [267] Pool ES mappings across datasets (#280)

* changed branch name

* mappings build

* updated docs

* updated docs

* updated docs

* added docstrings

* added dynamic strict to settings

* removed index.json in favour of a single defaults file

* using soft alias until a future PR to minimise changes

* cleaned and sorted json

* [267] Tidy & slim schema transformations (#281)

* pruned deprecated schema transformations

* updated fos fieldname on arxlive

* unified data set schema transformations

* restructured directory

* refactored references to schema_transformation

* refactored references to schema_transformation

* slimmed down transformations, and included entity_type

* pruned ontology

* tidied schemas

* consistency tests

* reverted unrelated json file

* harmonised name fieldsofstudy across arxiv

* added novelty back in

* sorted json

* sorted json

* sorted json

Co-authored-by: Joel Klinger <joel.klinger@nesta.org.uk>

Co-authored-by: Joel Klinger <joel.klinger@nesta.org.uk>

* patched out es config setup from tests

* removed redundant tests

* fixed json formatting

* none included for testing

* picked up bug in test

Co-authored-by: Joel Klinger <joel.klinger@nesta.org.uk>
---
 docs/source/nesta.core.schemas.rst            |   1 +
 docs/source/nesta.core.scripts.rst            |   3 -
 .../arxiv/arxiv_elasticsearch/run.py          |  16 +-
 .../crunchbase_elasticsearch/run.py           |  14 +-
 nesta/core/batchables/eurito/arxiv_eu/run.py  |   8 +-
 .../{crunchbase_eu => companies_eu}/run.py    |   5 +-
 nesta/core/batchables/eurito/cordis_eu/run.py |   8 +-
 .../core/batchables/eurito/patstat-eu/run.py  | 128 ---------
 .../core/batchables/eurito/patstat_eu/run.py  |   4 +-
 .../health_data/nih_abstract_mesh_data/run.py |   4 +-
 .../batchables/health_data/nih_dedupe/run.py  |   4 +-
 .../health_data/nih_process_data/run.py       |  14 +-
 .../meetup/topic_tag_elasticsearch/run.py     |   9 +-
 nesta/core/config/elasticsearch.config        | Bin 2633 -> 0 bytes
 nesta/core/config/elasticsearch.yaml          | Bin 0 -> 1086 bytes
 nesta/core/luigihacks/estask.py               |   8 +-
 nesta/core/luigihacks/sql2estask.py           |  12 +-
 nesta/core/orms/arxiv_es_config.json          | 153 ----------
 nesta/core/orms/crunchbase-eu_es_config.json  | 262 ------------------
 nesta/core/orms/orm_utils.py                  | 261 ++++++++++++-----
 nesta/core/orms/tests/test_orm_utils.py       | 151 ++++------
 nesta/core/routines/arxiv/arxiv_es_tokens.py  |   7 +-
 nesta/core/routines/arxiv/arxiv_lolvelty.py   |  11 +-
 nesta/core/routines/arxiv/arxiv_root_task.py  |  13 +-
 .../crunchbase_elasticsearch_task.py          |   8 +-
 .../crunchbase/crunchbase_lolvelty.py         |   3 +-
 .../crunchbase/crunchbase_root_task.py        |   2 +-
 nesta/core/routines/eurito_es/es_root.py      |   7 +-
 .../nih_data/nih_abstracts_mesh_task.py       |   7 +-
 .../health_data/nih_data/nih_dedupe_task.py   |  25 +-
 .../health_data/nih_data/nih_lolvelty.py      |   1 +
 .../health_data/nih_data/nih_process_task.py  |   6 +-
 .../health_tagging/health_meetup_es_task.py   |   4 +-
 .../meetup/health_tagging/meetup_lolvelty.py  |   1 +
 nesta/core/schemas/README.rst                 |   1 +
 nesta/core/schemas/tier_1/datasets/arxiv.json |  30 ++
 .../schemas/tier_1/datasets/companies.json    |  49 ++++
 .../core/schemas/tier_1/datasets/cordis.json  |  16 ++
 .../core/schemas/tier_1/datasets/meetup.json  |  29 ++
 nesta/core/schemas/tier_1/datasets/nih.json   |  35 +++
 .../core/schemas/tier_1/datasets/patstat.json |  18 ++
 nesta/core/schemas/tier_1/mappings/README.rst | 148 ++++++++++
 .../mappings/datasets/arxiv_mapping.json      |  65 +++++
 .../mappings/datasets/companies_mapping.json} |  37 +--
 .../mappings/datasets/cordis_mapping.json     |  68 +++++
 .../mappings/datasets/meetup_mapping.json}    |  27 +-
 .../mappings/datasets/nih_mapping.json}       |  25 +-
 .../mappings/datasets/patstat_mapping.json}   |  24 +-
 .../tier_1/mappings/defaults/defaults.json    |  24 ++
 .../endpoints/arxlive/arxiv_mapping.json      |  76 +++++
 .../endpoints/eurito-dev/arxiv_mapping.json}  |  79 +-----
 .../eurito-dev/companies_mapping.json         |  14 +
 .../endpoints/eurito-dev/cordis_mapping.json} |  23 +-
 .../endpoints/eurito-dev/patstat_mapping.json |  12 +
 .../endpoints/eurito/arxiv_mapping.json       |  97 +++++++
 .../endpoints/eurito/companies_mapping.json   |  14 +
 .../endpoints/eurito/patstat_mapping.json     |  12 +
 .../endpoints/health-scanner/aliases.json}    |  51 ++--
 .../endpoints/health-scanner/config.yaml      |   3 +
 .../endpoints/health-scanner/nulls.json}      |   0
 .../tier_1/{tier_1.json => ontology.json}     |  49 +---
 .../tier_1/schema_transformations/arxiv.json  |  67 -----
 .../crunchbase_organisation.json              |  11 -
 .../crunchbase_organisation_members.json      | 158 -----------
 .../eurito/arxiv-eu.json                      |  87 ------
 .../eurito/cordis-eu.json                     |  47 ----
 .../eurito/crunchbase-eu.json                 | 162 -----------
 .../eurito/patstat-eu.json                    |  55 ----
 .../tier_1/schema_transformations/github.json |  72 -----
 .../tier_1/schema_transformations/meetup.json |  79 ------
 .../meetup_members.json                       |  12 -
 .../tier_1/schema_transformations/nih.json    | 103 -------
 .../schema_transformations/worldbank.json     |  83 ------
 .../core/schemas/tier_1/tests/test_aliases.py |  35 +++
 .../core/schemas/tier_1/tests/test_format.py  |  44 +++
 .../schemas/tier_1/tests/test_ontology.py     |  48 ++++
 .../schemas/tier_1/tests/test_validate.py     |  87 ------
 .../schemas/tier_1/{ => tests}/tidy_schema.py |   2 +-
 nesta/packages/biorxiv/collect_biorxiv.py     |  35 ---
 .../packages/biorxiv/test_collect_biorxiv.py  |  25 --
 nesta/packages/decorators/schema_transform.py |  30 +-
 .../decorators/tests/test_schema_transform.py |   9 +-
 nesta/packages/geo_utils/country_iso_code.py  |  15 +-
 requirements.txt                              |   1 +
 84 files changed, 1238 insertions(+), 2215 deletions(-)
 rename nesta/core/batchables/eurito/{crunchbase_eu => companies_eu}/run.py (97%)
 delete mode 100644 nesta/core/batchables/eurito/patstat-eu/run.py
 delete mode 100644 nesta/core/config/elasticsearch.config
 create mode 100644 nesta/core/config/elasticsearch.yaml
 delete mode 100644 nesta/core/orms/arxiv_es_config.json
 delete mode 100644 nesta/core/orms/crunchbase-eu_es_config.json
 create mode 100644 nesta/core/schemas/tier_1/datasets/arxiv.json
 create mode 100644 nesta/core/schemas/tier_1/datasets/companies.json
 create mode 100644 nesta/core/schemas/tier_1/datasets/cordis.json
 create mode 100644 nesta/core/schemas/tier_1/datasets/meetup.json
 create mode 100644 nesta/core/schemas/tier_1/datasets/nih.json
 create mode 100644 nesta/core/schemas/tier_1/datasets/patstat.json
 create mode 100644 nesta/core/schemas/tier_1/mappings/README.rst
 create mode 100644 nesta/core/schemas/tier_1/mappings/datasets/arxiv_mapping.json
 rename nesta/core/{orms/crunchbase_es_config.json => schemas/tier_1/mappings/datasets/companies_mapping.json} (89%)
 create mode 100644 nesta/core/schemas/tier_1/mappings/datasets/cordis_mapping.json
 rename nesta/core/{orms/meetup_es_config.json => schemas/tier_1/mappings/datasets/meetup_mapping.json} (87%)
 rename nesta/core/{orms/nih_es_config.json => schemas/tier_1/mappings/datasets/nih_mapping.json} (90%)
 rename nesta/core/{orms/patstat-eu_es_config.json => schemas/tier_1/mappings/datasets/patstat_mapping.json} (85%)
 create mode 100644 nesta/core/schemas/tier_1/mappings/defaults/defaults.json
 create mode 100644 nesta/core/schemas/tier_1/mappings/endpoints/arxlive/arxiv_mapping.json
 rename nesta/core/{orms/arxiv-eu_es_config.json => schemas/tier_1/mappings/endpoints/eurito-dev/arxiv_mapping.json} (56%)
 create mode 100644 nesta/core/schemas/tier_1/mappings/endpoints/eurito-dev/companies_mapping.json
 rename nesta/core/{orms/cordis-eu_es_config.json => schemas/tier_1/mappings/endpoints/eurito-dev/cordis_mapping.json} (81%)
 create mode 100644 nesta/core/schemas/tier_1/mappings/endpoints/eurito-dev/patstat_mapping.json
 create mode 100644 nesta/core/schemas/tier_1/mappings/endpoints/eurito/arxiv_mapping.json
 create mode 100644 nesta/core/schemas/tier_1/mappings/endpoints/eurito/companies_mapping.json
 create mode 100644 nesta/core/schemas/tier_1/mappings/endpoints/eurito/patstat_mapping.json
 rename nesta/core/schemas/tier_1/{aliases/health_scanner.json => mappings/endpoints/health-scanner/aliases.json} (63%)
 create mode 100644 nesta/core/schemas/tier_1/mappings/endpoints/health-scanner/config.yaml
 rename nesta/core/schemas/tier_1/{field_null_mappings/health_scanner.json => mappings/endpoints/health-scanner/nulls.json} (100%)
 rename nesta/core/schemas/tier_1/{tier_1.json => ontology.json} (68%)
 delete mode 100644 nesta/core/schemas/tier_1/schema_transformations/arxiv.json
 delete mode 100644 nesta/core/schemas/tier_1/schema_transformations/crunchbase_organisation.json
 delete mode 100644 nesta/core/schemas/tier_1/schema_transformations/crunchbase_organisation_members.json
 delete mode 100644 nesta/core/schemas/tier_1/schema_transformations/eurito/arxiv-eu.json
 delete mode 100644 nesta/core/schemas/tier_1/schema_transformations/eurito/cordis-eu.json
 delete mode 100644 nesta/core/schemas/tier_1/schema_transformations/eurito/crunchbase-eu.json
 delete mode 100644 nesta/core/schemas/tier_1/schema_transformations/eurito/patstat-eu.json
 delete mode 100644 nesta/core/schemas/tier_1/schema_transformations/github.json
 delete mode 100644 nesta/core/schemas/tier_1/schema_transformations/meetup.json
 delete mode 100644 nesta/core/schemas/tier_1/schema_transformations/meetup_members.json
 delete mode 100644 nesta/core/schemas/tier_1/schema_transformations/nih.json
 delete mode 100644 nesta/core/schemas/tier_1/schema_transformations/worldbank.json
 create mode 100644 nesta/core/schemas/tier_1/tests/test_aliases.py
 create mode 100644 nesta/core/schemas/tier_1/tests/test_format.py
 create mode 100644 nesta/core/schemas/tier_1/tests/test_ontology.py
 delete mode 100644 nesta/core/schemas/tier_1/tests/test_validate.py
 rename nesta/core/schemas/tier_1/{ => tests}/tidy_schema.py (91%)
 delete mode 100644 nesta/packages/biorxiv/collect_biorxiv.py
 delete mode 100644 nesta/packages/biorxiv/test_collect_biorxiv.py

diff --git a/docs/source/nesta.core.schemas.rst b/docs/source/nesta.core.schemas.rst
index d6cda5a6..ae43472c 100644
--- a/docs/source/nesta.core.schemas.rst
+++ b/docs/source/nesta.core.schemas.rst
@@ -1 +1,2 @@
 .. include:: ../../nesta/core/schemas/README.rst
+.. include:: ../../nesta/core/schemas/tier_1/mappings/README.rst
diff --git a/docs/source/nesta.core.scripts.rst b/docs/source/nesta.core.scripts.rst
index 3522e96d..cb91def7 100644
--- a/docs/source/nesta.core.scripts.rst
+++ b/docs/source/nesta.core.scripts.rst
@@ -1,4 +1 @@
-Scripts
-=======
-
 .. include:: ../../nesta/core/scripts/README.rst
diff --git a/nesta/core/batchables/arxiv/arxiv_elasticsearch/run.py b/nesta/core/batchables/arxiv/arxiv_elasticsearch/run.py
index 19106279..40bb4010 100644
--- a/nesta/core/batchables/arxiv/arxiv_elasticsearch/run.py
+++ b/nesta/core/batchables/arxiv/arxiv_elasticsearch/run.py
@@ -20,7 +20,6 @@
 from datetime import datetime as dt
 
 from nesta.core.orms.orm_utils import db_session, get_mysql_engine
-from nesta.core.orms.orm_utils import load_json_from_pathstub
 from nesta.core.orms.orm_utils import object_to_dict
 from nesta.core.orms.arxiv_orm import Article
 from nesta.core.orms.grid_orm import Institute
@@ -76,10 +75,7 @@ def run():
     ngrammer = Ngrammer(database="production")
 
     # es setup
-    strans_kwargs={'filename':'arxiv.json',
-                   'from_key':'tier_0',
-                   'to_key':'tier_1',
-                   'ignore':['id']}
+    strans_kwargs={'filename':'arxiv.json', 'ignore':['id']}
     es = ElasticsearchPlus(hosts=es_host,
                            port=es_port,
                            aws_auth_region=aws_auth_region,
@@ -164,9 +160,9 @@ def run():
             countries = set(grid_countries[inst_id]
                             for inst_id in good_institutes
                             if inst_id in grid_countries)
-            row['categories'], _, _ = hierarchy_field(cats)
-            row['fos'], _, _ = hierarchy_field(fos)
-            row['countries'], _, _ = hierarchy_field(countries)
+            row['nested_categories'], _, _ = hierarchy_field(cats)
+            row['fields_of_study'], _, _ = hierarchy_field(fos)
+            row['nested_location'], _, _ = hierarchy_field(countries)
 
             # Pull out international institute info
             has_mn = any(is_multinational(inst,
@@ -216,8 +212,8 @@ def run():
 
     if 'BATCHPAR_outinfo' not in os.environ:
         from nesta.core.orms.orm_utils import setup_es
-        es, es_config = setup_es('dev', True, True,
-                                 dataset='arxiv')
+        es, es_config = setup_es(endpoint='arxlive', dataset='arxiv',
+                                 production=False, drop_and_recreate=True)                                 
         environ = {'batch_file': ('ArxivESTask-2019-09-19-'
                                   'False-1568888970724721.json'),
                    'config': ('/home/ec2-user/nesta-eu/nesta/'
diff --git a/nesta/core/batchables/crunchbase/crunchbase_elasticsearch/run.py b/nesta/core/batchables/crunchbase/crunchbase_elasticsearch/run.py
index 6a1a5646..c25d6fc9 100644
--- a/nesta/core/batchables/crunchbase/crunchbase_elasticsearch/run.py
+++ b/nesta/core/batchables/crunchbase/crunchbase_elasticsearch/run.py
@@ -58,12 +58,8 @@ def run():
     continent_lookup[None] = None
 
     # es setup
-    field_null_mapping = load_json_from_pathstub("tier_1/field_null_mappings/",
-                                                 "health_scanner.json")
-    strans_kwargs={'filename':'crunchbase_organisation_members.json',
-                   'from_key':'tier_0',
-                   'to_key':'tier_1',
-                   'ignore':['id']}
+    field_null_mapping = load_json_from_pathstub("health-scanner", "nulls.json")
+    strans_kwargs = {'filename': 'companies.json', 'ignore': ['id']}
     es = ElasticsearchPlus(hosts=es_host,
                            port=es_port,
                            aws_auth_region=aws_auth_region,
@@ -162,9 +158,9 @@ def run():
 
     if 'BATCHPAR_outinfo' not in os.environ:
         from nesta.core.orms.orm_utils import setup_es
-        es, es_config = setup_es('dev', True, True,
-                                 dataset='crunchbase',
-                                 aliases='health_scanner')
+        es, es_config = setup_es(production=False, endpoint='health-scanner',
+                                 dataset='companies',
+                                 drop_and_recreate=True)
 
         environ = {"AWSBATCHTEST": "",
                    'BATCHPAR_batch_file': 'crunchbase_to_es-15597291977144725.json', 
diff --git a/nesta/core/batchables/eurito/arxiv_eu/run.py b/nesta/core/batchables/eurito/arxiv_eu/run.py
index 19be1961..643c3595 100644
--- a/nesta/core/batchables/eurito/arxiv_eu/run.py
+++ b/nesta/core/batchables/eurito/arxiv_eu/run.py
@@ -54,9 +54,7 @@ def run():
     
     # es setup
     logging.info('Connecting to ES')
-    strans_kwargs={'filename':'eurito/arxiv-eu.json',
-                   'from_key':'tier_0', 'to_key':'tier_1',
-                   'ignore':['id']}
+    strans_kwargs = {'filename': 'arxiv.json', 'ignore': ['id']}
     es = ElasticsearchPlus(hosts=es_host,
                            port=es_port,
                            aws_auth_region=aws_auth_region,
@@ -202,8 +200,8 @@ def run():
     set_log_level()
     if 'BATCHPAR_outinfo' not in os.environ:
         from nesta.core.orms.orm_utils import setup_es
-        es, es_config = setup_es('dev', True, True,
-                                 dataset='arxiv-eu')
+        es, es_config = setup_es(production=False, endpoint='eurito', 
+                                 dataset='arxiv', drop_and_recreate=True)
         environ = {'config': ('/home/ec2-user/nesta-eu/nesta/'
                               'core/config/mysqldb.config'),
                    'batch_file' : ('arxiv-eu_EURITO-ElasticsearchTask-'
diff --git a/nesta/core/batchables/eurito/crunchbase_eu/run.py b/nesta/core/batchables/eurito/companies_eu/run.py
similarity index 97%
rename from nesta/core/batchables/eurito/crunchbase_eu/run.py
rename to nesta/core/batchables/eurito/companies_eu/run.py
index 652f3752..5c63436c 100644
--- a/nesta/core/batchables/eurito/crunchbase_eu/run.py
+++ b/nesta/core/batchables/eurito/companies_eu/run.py
@@ -61,10 +61,7 @@ def run():
     eu_countries = get_eu_countries()
 
     # es setup
-    strans_kwargs={'filename':'eurito/crunchbase-eu.json',
-                   'from_key':'tier_0',
-                   'to_key':'tier_1',
-                   'ignore':['id']}
+    strans_kwargs = {'filename': 'companies.json', 'ignore': ['id']}
     es = ElasticsearchPlus(hosts=es_host,
                            port=es_port,
                            aws_auth_region=aws_auth_region,
diff --git a/nesta/core/batchables/eurito/cordis_eu/run.py b/nesta/core/batchables/eurito/cordis_eu/run.py
index 01b2b948..d64fa1d2 100644
--- a/nesta/core/batchables/eurito/cordis_eu/run.py
+++ b/nesta/core/batchables/eurito/cordis_eu/run.py
@@ -88,9 +88,7 @@ def run():
 
     # es setup
     logging.info('Connecting to ES')
-    strans_kwargs={'filename':'eurito/cordis-eu.json',
-                   'from_key':'tier_0', 'to_key':'tier_1',
-                   'ignore':['id']}
+    strans_kwargs = {'filename': 'cordis.json', 'ignore': ['id']}
     es = ElasticsearchPlus(hosts=es_host,
                            port=es_port,
                            aws_auth_region=aws_auth_region,
@@ -132,8 +130,8 @@ def run():
     if 'BATCHPAR_outinfo' not in os.environ:
         from nesta.core.orms.orm_utils import setup_es
         from nesta.core.luigihacks.misctools import find_filepath_from_pathstub
-        es, es_config = setup_es('dev', True, True,
-                                 dataset='cordis-eu')
+        es, es_config = setup_es(production=False, endpoint='eurito', 
+                                 dataset='cordis', drop_and_recreate=True)
         environ = {'config': find_filepath_from_pathstub('mysqldb.config'),
                    'batch_file' : ('cordis-eu_EURITO-ElasticsearchTask-'
                                    '2020-04-10-True-15865345336407135.json'),
diff --git a/nesta/core/batchables/eurito/patstat-eu/run.py b/nesta/core/batchables/eurito/patstat-eu/run.py
deleted file mode 100644
index f45d7953..00000000
--- a/nesta/core/batchables/eurito/patstat-eu/run.py
+++ /dev/null
@@ -1,128 +0,0 @@
-from ast import literal_eval
-import boto3
-import json
-import logging
-import os
-
-from nesta.core.luigihacks.elasticsearchplus import ElasticsearchPlus
-from nesta.core.luigihacks.luigi_logging import set_log_level
-from nesta.core.orms.orm_utils import db_session, get_mysql_engine
-from nesta.core.orms.orm_utils import load_json_from_pathstub
-from nesta.core.orms.orm_utils import object_to_dict
-from nesta.core.orms.patstat_eu_orm import ApplnFamily
-from nesta.core.orms.patstat_2019_05_13 import *
-from nesta.packages.geo_utils.lookup import get_eu_countries
-
-
-def select_text(objs, lang_field, text_field):
-    if len(objs) == 0:
-        return None
-    _objs = [t for t in objs if t[lang_field] == 'en']
-    if len(_objs) == 0:
-        _objs = objs    
-    obj = sorted(_objs, key=lambda x: len(x), reverse=True)[0]
-    return obj[text_field]
-
-
-def metadata(orm, session, appln_ids, field_selector=None):
-    if field_selector is None:
-        field_selector = orm.appln_id
-    _filter = field_selector.in_(appln_ids)
-    return [object_to_dict(_obj) for _obj in
-            session.query(orm).filter(_filter).all()]
-
-
-def run():
-    test = literal_eval(os.environ["BATCHPAR_test"])
-    bucket = os.environ['BATCHPAR_bucket']
-    batch_file = os.environ['BATCHPAR_batch_file']
-
-    db_name = os.environ["BATCHPAR_db_name"]
-    es_host = os.environ['BATCHPAR_outinfo']
-    es_port = int(os.environ['BATCHPAR_out_port'])
-    es_index = os.environ['BATCHPAR_out_index']
-    es_type = os.environ['BATCHPAR_out_type']
-    entity_type = os.environ["BATCHPAR_entity_type"]
-    aws_auth_region = os.environ["BATCHPAR_aws_auth_region"]
-
-    # database setup
-    logging.info('Retrieving engine connection')
-    engine = get_mysql_engine("BATCHPAR_config", "mysqldb",
-                              db_name)
-    _engine = get_mysql_engine("BATCHPAR_config", "readonly",
-                               "patstat_2019_05_13")
-
-    # es setup
-    logging.info('Connecting to ES')
-    strans_kwargs={'filename':'eurito/patstat-eu.json',
-                   'from_key':'tier_0', 'to_key':'tier_1',
-                   'ignore':['id']}
-    es = ElasticsearchPlus(hosts=es_host,
-                           port=es_port,
-                           aws_auth_region=aws_auth_region,
-                           no_commit=("AWSBATCHTEST" in
-                                      os.environ),
-                           entity_type=entity_type,
-                           strans_kwargs=strans_kwargs,
-                           auto_translate=True,
-                           auto_translate_kwargs={'min_len':20},
-                           null_empty_str=True,
-                           coordinates_as_floats=True,
-                           do_sort=True,
-                           ngram_fields=['textBody_abstract_patent'])
-
-    # collect file
-    logging.info('Retrieving patent family ids')
-    nrows = 20 if test else None
-    s3 = boto3.resource('s3')
-    obj = s3.Object(bucket, batch_file)
-    docdb_fam_ids = json.loads(obj.get()['Body']._raw_stream.read())
-    logging.info(f"{len(docdb_fam_ids)} patent family IDs "
-                 "retrieved from s3")
-
-    eu_countries = get_eu_countries()
-
-    logging.info('Processing rows')
-    _filter = ApplnFamily.docdb_family_id.in_(docdb_fam_ids)
-    with db_session(engine) as session:
-        for obj in session.query(ApplnFamily).filter(_filter).all():
-            row = object_to_dict(obj)
-            appln_ids = row.pop('appln_id')
-            with db_session(_engine) as _session:
-                _titles = metadata(Tls202ApplnTitle, _session, appln_ids)
-                _abstrs = metadata(Tls203ApplnAbstr, _session, appln_ids)
-                ipcs = metadata(Tls209ApplnIpc, _session, appln_ids)
-                nace2s = metadata(Tls229ApplnNace2, _session, appln_ids)
-                techs = metadata(Tls230ApplnTechnField, _session, appln_ids)
-                # Get persons
-                _pers_applns = metadata(Tls207PersAppln, _session, appln_ids)
-                pers_ids = set(pa['person_id'] for pa in _pers_applns)
-                persons = metadata(Tls906Person, _session, pers_ids,
-                                   field_selector=Tls906Person.person_id)
-
-            title = select_text(_titles, 'appln_title_lg', 'appln_title')
-            abstr = select_text(_abstrs, 'appln_abstract_lg', 'appln_abstract')
-
-            # Get names from lookups
-            ipcs = list(set(i['ipc_class_symbol'].split()[0] for i in ipcs))
-            nace2s = list(set(n['nace2_code'] for n in nace2s))
-            techs = list(set(t['techn_field_nr'] for t in techs))
-            ctrys = list(set(p['person_ctry_code'] for p in persons))
-            nuts = list(set(p['nuts'] for p in persons))
-            is_eu = any(c in eu_countries for c in ctrys)
-            
-            # Index the data
-            row = dict(title=title, abstract=abstr, ipc=ipcs, nace2=nace2s,
-                       tech=techs, ctry=ctrys, nuts=nuts, is_eu=is_eu, **row)
-            uid = row.pop('docdb_family_id')
-            _row = es.index(index=es_index, doc_type=es_type,
-                            id=uid, body=row)
-
-
-    logging.warning("Batch job complete.")
-
-
-if __name__ == "__main__":
-    set_log_level()
-    logging.info('Starting...')
-    run()
diff --git a/nesta/core/batchables/eurito/patstat_eu/run.py b/nesta/core/batchables/eurito/patstat_eu/run.py
index 8871850f..3638501f 100644
--- a/nesta/core/batchables/eurito/patstat_eu/run.py
+++ b/nesta/core/batchables/eurito/patstat_eu/run.py
@@ -63,9 +63,7 @@ def run():
 
     # es setup
     logging.info('Connecting to ES')
-    strans_kwargs={'filename':'eurito/patstat-eu.json',
-                   'from_key':'tier_0', 'to_key':'tier_1',
-                   'ignore':['id']}
+    strans_kwargs = {'filename': 'patstat.json', 'ignore': ['id']}
     es = ElasticsearchPlus(hosts=es_host,
                            port=es_port,
                            aws_auth_region=aws_auth_region,
diff --git a/nesta/core/batchables/health_data/nih_abstract_mesh_data/run.py b/nesta/core/batchables/health_data/nih_abstract_mesh_data/run.py
index 93be1e88..4c3decea 100644
--- a/nesta/core/batchables/health_data/nih_abstract_mesh_data/run.py
+++ b/nesta/core/batchables/health_data/nih_abstract_mesh_data/run.py
@@ -68,9 +68,7 @@ def run():
     dupes = format_duplicate_map(dupes)
 
     # Set up elastic search connection
-    field_null_mapping = load_json_from_pathstub("tier_1/"
-                                                 "field_null_mappings/",
-                                                 "health_scanner.json")
+    field_null_mapping = load_json_from_pathstub("health-scanner", "nulls.json")
     es = ElasticsearchPlus(hosts=es_config['host'],
                            port=es_config['port'],
                            aws_auth_region=es_config['region'],
diff --git a/nesta/core/batchables/health_data/nih_dedupe/run.py b/nesta/core/batchables/health_data/nih_dedupe/run.py
index 3bd14f61..b9c4c3eb 100644
--- a/nesta/core/batchables/health_data/nih_dedupe/run.py
+++ b/nesta/core/batchables/health_data/nih_dedupe/run.py
@@ -61,9 +61,7 @@ def run():
     art_ids = json.loads(ids_obj.get()['Body']._raw_stream.read())
     logging.info(f'Processing {len(art_ids)} article ids')
 
-    field_null_mapping = load_json_from_pathstub(("tier_1/"
-                                                  "field_null_mappings/"),
-                                                 "health_scanner.json")
+    field_null_mapping = load_json_from_pathstub("health-scanner", "nulls.json")
     es = ElasticsearchPlus(hosts=es_host,
                            port=es_port,
                            aws_auth_region=aws_auth_region,
diff --git a/nesta/core/batchables/health_data/nih_process_data/run.py b/nesta/core/batchables/health_data/nih_process_data/run.py
index e8f0a905..dc1d9c78 100644
--- a/nesta/core/batchables/health_data/nih_process_data/run.py
+++ b/nesta/core/batchables/health_data/nih_process_data/run.py
@@ -25,7 +25,6 @@
 def run():
     start_index = os.environ["BATCHPAR_start_index"]
     end_index = os.environ["BATCHPAR_end_index"]
-    #mysqldb_config = os.environ["BATCHPAR_config"]
     es_host = os.environ["BATCHPAR_outinfo"]
     es_port = os.environ["BATCHPAR_out_port"]
     es_index = os.environ["BATCHPAR_out_index"]
@@ -87,13 +86,8 @@ def run():
     df['total_cost_currency'] = 'USD'
 
     # output to elasticsearch
-    field_null_mapping = load_json_from_pathstub("tier_1/field_null_mappings/",
-                                                 "health_scanner.json")
-    strans_kwargs={'filename':'nih.json',
-                   'from_key':'tier_0',
-                   'to_key':'tier_1',
-                   'ignore':['application_id']}
-
+    field_null_mapping = load_json_from_pathstub("health-scanner", "nulls.json")
+    strans_kwargs = {'filename': 'nih.json', 'ignore': ['application_id']}
     es = ElasticsearchPlus(hosts=es_host,
                            port=es_port,
                            aws_auth_region=aws_auth_region,
@@ -143,15 +137,15 @@ def run():
         pars = {'start_index': '2001360',
                 'end_index': '2003940',
                 'db': 'dev',
+                'done': 'False',
                 'config': (f'{os.environ["HOME"]}/nesta/nesta/'
                            'core/config/mysqldb.config'),
-                'done': 'False',
                 'outinfo': ('https://search-health-scanner-'
                             '5cs7g52446h7qscocqmiky5dn4.'
                             'eu-west-2.es.amazonaws.com'),
                 'out_index': 'nih_dev',
                 'out_type': '_doc',
-                'out_port': '_doc',
+                'out_port': '_443',
                 'aws_auth_region': 'eu-west-2',
                 'entity_type': 'paper',
                 'test': 'False'}
diff --git a/nesta/core/batchables/meetup/topic_tag_elasticsearch/run.py b/nesta/core/batchables/meetup/topic_tag_elasticsearch/run.py
index a6a80ccc..e60eda35 100644
--- a/nesta/core/batchables/meetup/topic_tag_elasticsearch/run.py
+++ b/nesta/core/batchables/meetup/topic_tag_elasticsearch/run.py
@@ -71,13 +71,8 @@ def run():
     mesh_terms = format_mesh_terms(df_mesh)
 
     # Setup ES+
-    field_null_mapping = load_json_from_pathstub(("tier_1/"
-                                                  "field_null_mappings/"),
-                                                 "health_scanner.json")
-    strans_kwargs={'filename':'meetup.json',
-                   'from_key':'tier_0',
-                   'to_key':'tier_1',
-                   'ignore':[]}
+    field_null_mapping = load_json_from_pathstub("health-scanner", "nulls.json")
+    strans_kwargs = {'filename': 'meetup.json'}
     es = ElasticsearchPlus(hosts=es_host,
                            port=es_port,
                            aws_auth_region=aws_auth_region,
diff --git a/nesta/core/config/elasticsearch.config b/nesta/core/config/elasticsearch.config
deleted file mode 100644
index d1ae5c3c48022f0d9b8c3f549f3e7027c7ecce2b..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 2633
zcmV-P3byqCM@dveQdv+`09X^`PK9BP^n`^&kT!TY@ITgGY1N74p(O)tHcSRDL|4k8
z9C4vomK9z2-%ZgBSyIVJ)sdsE@MGABA6r8tmP=9X*;YLT2pw%^8Nb-Y4LL0`n;DLZ
z^u~zr@yIbCN6ZSFcB!G%s~{zXlSd_yhvlh4AtuHFgU(YSTiZXf-@mF^8JkCRGw*m7
zgV2!99fK;pQTnIS4&LuTZ0|sM=9-=)H#m5w671jhp6T?Qr;pFfYbwv|Z2$E(L+aPg
zA(pJaa`gu+g(^0%0aD^@yn=iZm~v^=SNb&j%oEmIBu?5jMZeyOtFrM9@1nn{uSAuN
zBMdqAlP@KdPvm;038&B)$@`9R<2~t3sY2x2tP__FnR5MqS|3EGQ^2Zr{~BV1$I}q(
zw%|Q)AEBIu7H&;Kf~ix-Wx;w-DJEr$nVTA#lmPR_cG6tX&u|6MGBO}r6ESI1bcc?X
zMJwQpI0X!)+hu{$P4Be+VDKC4mRKeVx2n~+`Y57sFp(zI9Wr1szsS#(RfT8+xAk5=
zR!14}j@GO=PXS`1T1hhP=EhL|e4Kq-YA~x}L3q@q16-@YKDJw!5(GFIV_d&&($sAS
zq#T!>%z4B@|5@p_#&X+aBW9)|AJ9>UIvWuL@r}Jbryg-)J{FCaGO#*s-^~L4LE#de
zuRE01y~3boAA0@vi(tmQatkqwGSTSI`45j)nG`4BX~+Y@)f8LD*emI0C@epW>CHCA
z2a}y*(Of4O_Mo4K-{41{d)?`+$NEkUfztx$!bCLujcRieAylWvn0+`0*}?2In;m={
z-zZWxaikY5Mfb2961yK=3zAAQ#h1bRy?@d|Uo6;!LubIf>&XCjd&!w??uF*WKJR;7
z6qH+tEf<9l#)Px81Q^!$_#|E`pDb8o+nT6N#n{h(^X66Iw^+#o)eG|R?HScY8oNp^
zaOGsA*bT1B%~KED!GC~0-Te}zBB9<43V;!c)1FmfV$x16?7}yEQF!u*{cW(9-Jq;q
zR@zo>tSZr^W;w3t?Y`_Fc~BLj?{@wnCN;M1-gjC6L!qM55C6LIP*PKM2n>~Up_8E9
zR<5xwXkMQ~$vly>R#KIS-J=uQo+Vu$is2+q-tOl4d9&CnLV0#AOju47ki0^4qTCkZ
z{%N!1qm>8kKkD<=?@9c{%u+0nLMo}wOH@y=neUNY=twR&vZhgeV!cgp)?E0A%usK-
z7a`zZPPh9a$Q*eF2;~vXn$lv0h@Agn+}a;pgy^tab%K<=f&lE|)3{1Fb9#g|UoL=*
zufU95MT>szOZK8$ZjoX`QUJ9v@y{T*GcALgGtEq<U-T7KvV06N7*MoH+nKfI>Z&4o
zr%crLn0HA;QRzsY+(4q?6z^gglUcTT=zv9Lsm}NOB^o$ztecF`F$cnfw()Poe#8$&
zOw$xTSj!uUF194qG69KN(lPvbw!dbblv#tFSynND{Gnxn+600AwiI`x!Q8bDHO5Z|
z;0tdzMSVL-DRBK-wYAsM&;`PF_|~=wO0J(t?-&W<C(9gekag=gCpOTSB;8`M`X>Cg
z{{VfymsSe>(jP8;_~Vf*9B_R32;-`%yxe9rnDD2M;n38FIlAt<^AA!O+CC8sc}wA#
zvr}<`PYdO{Q2J~D6itev4uttOy3p9~StPlgz0xbe@P5=)FdhM@`?Al<)aYbLwGBl4
zR<(@h-Vb?SP$X=pvWD`HL2TEBqgcDWn07ggQV;{XsHvkGk{6tsT8~(?zG`#FHNT;}
z;D7IfvLcwvca;J;P#NPhG)NXXU`18X59qRU$f5uc12&1WuEeH27jte`(i_IuQQnS0
z)ElB&*b{=*f;t0~^D+3=xY5RV?i<p7kpE5Q6xWRdQ$4#Peo!wCLky0b+9jf;53LE_
zJPVAF>sPYDba;IBJ&YqbuXd5#DD51^2q|BcVQI~?m`b7%YMR_FY)bDgkR@zuy$a9F
z8EMl*t#FY=O1ve|JWz1v427bzo~?CB{W9{_<EzuuO~7Z*_kf$=3_D02v;|S5ZC!R)
zn3w|h0F5PKEgIFJ#aI7fv|X%fwf4|Q)SvIEmz>JwFonqLTXK)hK74wo>T+C<YcehG
zO&rY{6tw~Cc|hJvRG|dEaJJfXy#WuuFxhE@*0?W|eLENIPmmNp!NEKxpQB+WGPB!A
zO5E+BOvLZzZ~<ZDn?={QK0Vj5rh%W*SdWi#7Dp%+)#wp`g{O}7&v#r739%+J2<#iY
zD+NhC)v({R!^kNutMlqf`}kW;LqgAuHyW&4DZU)T*x)S;9c%2*YUXTrUUb9ta8Cj+
z+jma#o%b-HJ)Pf;H@&uMk<Aew0B3`At~y}V$KXF@9BEQw6>)12z)#f&-AUo!w4(9!
z0{7>9jHKWsjD&LRu<wLqr$w%hk9--Yzg`~~IB92BNmrsl#KpNRPnVW9M`R(d`{+t{
z4&}PMr5+W55hu$h!6Dg%PMN@6`@J8K)4w=1KsSdc_2fRqm!o}4U_yThYrR2f#s|1U
zC)GD70M%d)RG|5R#^^Q*H!Zs^Frux%pO9v@uBL)o<#5T=xo6LFwHRS0+rjIDEUm3B
zHgjT5F7|j$=b0`fBBY_i3s%ke(eOM8yo$(oMhJYKG@ZM6ZTxO()<_3}Vrc3wbzbmy
z;UCa5OM2N{o<v4P5Z^`}$P@94?+tF;zT>=P9Q7_{hQ;(wcts5t<dqeEHiZJews8iX
zM6i6j;|fY@s2VUe7*(K^eLw?|cgE@;H6csK1TU5|S`W4n6otl@_=20XByHtyPsgK0
zQ}l1ZKwnQgRC+o*SuTNj;rIxXx=HC;Ofn$G!l@MWQk{f7t$(J91uH@9A+VH?=YkKA
zpIIe4o|=9s-I=AEhTepJdrAtjUi2Jv;5$?kiz3pK@C-_0MMsxz`2s71Te7#h5L%8H
z{4JI&i$HZ7fgH2n*1Y`o^9Pa+g0U2DFs8Dx8t}@wSy`@{<c#fmJ-Fw~Ao8!<T%~$`
zhl3n_9PVZK<`o7nD%h+`ckJ#^tnyHih{kAc=Js$|0<h>ihLv&Z9r}UhJ{0dYRDglX
zb|c-^OMN6=MD?z7`l*kY@c2GYYVO2Y(~0ysb1v7Uwv@<KrX8I!gxnsBZGSV<58F$f
zB@C5dbIEZs1J?yp^}UkhYw?A&Z!VKzp~<|$Ki&N2cJ4B{f!LgooI?qRV!dKu>>{`c
zZxBt}1u>PHO>^;XRNyEpSQK{}e}-@`x3ef`0Eb}lp;DE*gK0%9YQ)tXPQQ|zG*#HS
zfR=eCtuPzyrjyYIiAumpkIXtFEDu+26lVa}Qfc-cX{1?oJ2h=y`JUoM`|p+qm<87S
z^~#kKiyI<5^_8W+`?;+^m+RbAjSORASd`FEvTCa-^Tuc$L1l+U8pxk!TGEx3Md%)W
rX4p}<XGp7cj*P6o;W-v)f2tfSZKbd+ft>k)v_FG~xxM<(D76UY8}22i

diff --git a/nesta/core/config/elasticsearch.yaml b/nesta/core/config/elasticsearch.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..deae86aed21c60d04431b501b899e15b99bfac53
GIT binary patch
literal 1086
zcmV-E1i||NM@dveQdv+`01X!mSSVzV1~km4#c;^#h|O0mnxso3M_a&r1Pw&OVW>t|
zr^5=w9&D7WA6K4z-@F7{W*C~k2pU#f@W}W+3-K=eTkGjr5;?Zhg>Tzu?hAO!HSO~t
zoe=M<`@`ar!)B8QsK>n9%&+%6Tm{S06E8y=B|hcEQBK+F%2}cYkXzD6I-?Ml18TAj
z#pTucwWC(>5X=rURs=~n&+E^%4v+L9InC($g<X>`CHs1l%Rw6i$9zF70aX4;l_VUo
z&ikfK^+~d2m4qY)WI7k5&s(RY_H~{~2ZB1D9U79%y5`Q>{4Yos55UW3RmLiFgVVP@
zse&0O8>fc8CdOI$MVKY2(M=<c?**Zz{%`SAB9Evn%8dG3%iU!>u6rcUX?7gGd<n<t
zbW$aO8(UbnE3a_HxQ2Z;IcPm4J2CYLwys85LM6$Cuvedw{G47?tm{RV3>i}D()4~Z
z`Pb^SxU1wjCKgtW0X=$Z;Yz6&WHcM^^hGaM4mm+U)w6aiKSg1PM(=WGX!Fjj_L3V{
zD0j%S8vEIdC;JlT@hu=#ho+t|rREsHzICTdfy?}?Rce?oA5wF+!#6MOBoKp(-Jj73
z+cGHt5eX}{@>3k?oBsBT?rQ!T-S0F+qnj_l*@N!y_ZWrDUCfSfyS2s*{J8%)P-G5x
z`hxR<R#I5prnraJ$tMEi8*<`tWARS(mPX8kKEQqsKBdhhLeKIs5DkS^If_($<3TiY
z%8X*&n?k`gJbBJ}C`a;VeQSF}3Q+o1HN{IxNv8558$y^Er~Cx1C<q0X()MI!#x_${
z;i~M^=958JXBVT~oFCMgcQt$A!3Q1ePof4c@cl!z33LV;G8g#!5CZft;~UU{>WUT8
zhoxTcm4tssdG-ay=M)fYA1m!^H_HEB;o^WONJWRDHl)>;+N8mXyO)M-yn(c`W{UfH
zUO~k>HxV|&?o0s-`MwpZa6}sBc`x3F$bX@(VK3H@ZT^s&c4;9m8nz$)W!`woJV@9u
z^mRV68vXa99~xf4Bk=p-vj}1B`1^B}j*^WvKBMy=1g$8<0P%)r_8h!nOr`f-MqsJ2
zS-L5<Es>M=^u)j2HW#IC#+8|<m9M>Keo=p~-%4p8k(M;4&WLBjZmACgMoH7QkS6CH
zl}595UtXg4ANV6Hei)uLZT*3o6E+;pvnH-)GS0h~Z>&GUuke|oEi<ny@p)#Gc;4fb
zGh3pw?cku|0sI__W)0fd5DGC|)^ibQy~44M<TRM!{Oh+A5F+9%H1+@AMpBN(h1dub
zT8(YDI6xnM<3ZgjJyKTDY^-n>_Nk;v5D9Y{>`{WVYvk_b$Qpbu6MDtEe1^3a5A}FM
z<?f%yvZ=T+#ix+$P*r=e)C#<Uotgc>;+27*0xr<9SrmcE!b6$njJ?#ac><}yEFaFT
E0G?_fGXMYp

literal 0
HcmV?d00001

diff --git a/nesta/core/luigihacks/estask.py b/nesta/core/luigihacks/estask.py
index d6c40a38..a316f3bd 100644
--- a/nesta/core/luigihacks/estask.py
+++ b/nesta/core/luigihacks/estask.py
@@ -17,6 +17,7 @@ class ElasticsearchTask(AutoBatchTask):
     Args:
         routine_id (str): Label for this routine.
         db_config_path (str): Database config path.
+        endpoint (str): AWS domain name of the ES endpoint.
         dataset (str): Name of the ES dataset.
         entity_type (str): Entity type, for :obj:`ElasticsearchPlus`.
         kwargs (dict): Any extra parameters to pass to the batchables.
@@ -27,6 +28,7 @@ class ElasticsearchTask(AutoBatchTask):
     '''
     routine_id = luigi.Parameter()
     db_config_path = luigi.Parameter('mysqldb.config')
+    endpoint = luigi.Parameter()
     dataset = luigi.Parameter()
     entity_type = luigi.Parameter()
     kwargs = luigi.DictParameter(default={})
@@ -72,10 +74,10 @@ def prepare(self):
                             " while in test mode")
 
         # Setup elasticsearch and extract all ids
-        es_mode = 'dev' if self.test else 'prod'
-        es, es_config = setup_es(es_mode, self.test,
-                                 drop_and_recreate=False,
+        es, es_config = setup_es(endpoint=self.endpoint,
                                  dataset=self.dataset,
+                                 production=not self.test,
+                                 drop_and_recreate=False,
                                  increment_version=False)
         ids = get_es_ids(es, es_config, size=10000)  # All ids in this index
         ids = ids - self._done_ids  # Don't repeat done ids
diff --git a/nesta/core/luigihacks/sql2estask.py b/nesta/core/luigihacks/sql2estask.py
index a2f37f29..82ff11c6 100644
--- a/nesta/core/luigihacks/sql2estask.py
+++ b/nesta/core/luigihacks/sql2estask.py
@@ -31,6 +31,7 @@ class Sql2EsTask(autobatch.AutoBatchTask):
         process_batch_size (int): Number of rows to process in a batch.
         drop_and_recreate (bool): If in test mode, drop and recreate the ES index?
         dataset (str): Name of the elasticsearch dataset.
+        endpoint (str): Name of the AWS ES domain endpoint.
         id_field (SqlAlchemy selectable attribute): The ID field attribute.
         filter (SqlAlchemy conditional statement): A conditional statement, to be passed
                                                    to query.filter(). This allows for
@@ -45,8 +46,8 @@ class Sql2EsTask(autobatch.AutoBatchTask):
     db_section = luigi.Parameter(default="mysqldb")
     process_batch_size = luigi.IntParameter(default=10000)
     drop_and_recreate = luigi.BoolParameter(default=False)
-    aliases = luigi.Parameter(default=None)
     dataset = luigi.Parameter()
+    endpoint = luigi.Parameter()
     id_field = luigi.Parameter()
     filter = luigi.Parameter(default=None)
     entity_type = luigi.Parameter()
@@ -75,11 +76,10 @@ def prepare(self):
                                   database)
 
         # Elasticsearch setup
-        es_mode = 'dev' if self.test else 'prod'
-        es, es_config = setup_es(es_mode, self.test,
-                                 self.drop_and_recreate,
+        es, es_config = setup_es(endpoint=self.endpoint,
                                  dataset=self.dataset,
-                                 aliases=self.aliases)
+                                 production=not self.test,
+                                 drop_and_recreate=self.drop_and_recreate)
 
         # Get set of existing ids from elasticsearch via scroll
         existing_ids = get_es_ids(es, es_config)
@@ -122,7 +122,7 @@ def prepare(self):
                 'routine_id': self.routine_id
             }
             params.update(self.kwargs)
-            
+
             logging.info(params)
             job_params.append(params)
             if self.test and count > 1:
diff --git a/nesta/core/orms/arxiv_es_config.json b/nesta/core/orms/arxiv_es_config.json
deleted file mode 100644
index ba7a5eef..00000000
--- a/nesta/core/orms/arxiv_es_config.json
+++ /dev/null
@@ -1,153 +0,0 @@
-{
-    "mappings": {
-        "_doc": {
-            "dynamic": "strict",
-            "properties": {
-                "booleanFlag_multinational_article": {
-                    "type": "boolean"
-                },
-                "count_citations_article": {
-                    "type": "integer"
-                },
-                "date_created_article": {
-                    "type": "date"
-                },
-                "id_digitalObjectIdentifier_article": {
-                    "type": "keyword"
-                },
-                "json_category_article": {
-                    "properties": {
-                        "ancestors": {
-                            "type": "keyword"
-                        },
-                        "level": {
-                            "type": "integer"
-                        },
-                        "order": {
-                            "type": "integer"
-                        },
-                        "value": {
-                            "type": "keyword"
-                        }
-                    },
-                    "type": "nested"
-                },
-                "json_fieldOfStudy_article": {
-                    "properties": {
-                        "ancestors": {
-                            "type": "keyword"
-                        },
-                        "level": {
-                            "type": "integer"
-                        },
-                        "order": {
-                            "type": "integer"
-                        },
-                        "value": {
-                            "type": "keyword"
-                        }
-                    },
-                    "type": "nested"
-                },
-                "json_location_article": {
-                    "properties": {
-                        "ancestors": {
-                            "type": "keyword"
-                        },
-                        "level": {
-                            "type": "integer"
-                        },
-                        "order": {
-                            "type": "integer"
-                        },
-                        "value": {
-                            "type": "keyword"
-                        }
-                    },
-                    "type": "nested"
-                },
-                "metric_citations_article": {
-                    "type": "float"
-                },
-                "metric_novelty_article": {
-                    "type": "float"
-                },
-                "terms_authors_article": {
-                    "analyzer": "terms_analyzer",
-                    "fields": {
-                        "keyword": {
-                            "type": "keyword"
-                        }
-                    },
-                    "type": "text"
-                },
-                "terms_institutes_article": {
-                    "analyzer": "terms_analyzer",
-                    "fields": {
-                        "keyword": {
-                            "type": "keyword"
-                        }
-                    },
-                    "type": "text"
-                },
-                "terms_tokens_article": {
-                    "type": "keyword"
-                },
-                "textBody_abstract_article": {
-                    "fields": {
-                        "keyword": {
-                            "type": "keyword"
-                        }
-                    },
-                    "type": "text"
-                },
-                "title_of_article": {
-                    "fields": {
-                        "keyword": {
-                            "type": "keyword"
-                        }
-                    },
-                    "type": "text"
-                },
-                "type_of_entity": {
-                    "fields": {
-                        "keyword": {
-                            "type": "keyword"
-                        }
-                    },
-                    "type": "text"
-                },
-                "url_of_article": {
-                    "fields": {
-                        "keyword": {
-                            "type": "keyword"
-                        }
-                    },
-                    "type": "text"
-                },
-                "year_of_article": {
-                    "type": "integer"
-                }
-            }
-        }
-    },
-    "settings": {
-        "analysis": {
-            "analyzer": {
-                "terms_analyzer": {
-                    "filter": [
-                        "standard",
-                        "lowercase",
-                        "stop"
-                    ],
-                    "tokenizer": "standard",
-                    "type": "custom"
-                }
-            }
-        },
-        "index": {
-            "number_of_replicas": "1",
-            "number_of_shards": "5"
-        }
-    }
-}
diff --git a/nesta/core/orms/crunchbase-eu_es_config.json b/nesta/core/orms/crunchbase-eu_es_config.json
deleted file mode 100644
index f42aef6a..00000000
--- a/nesta/core/orms/crunchbase-eu_es_config.json
+++ /dev/null
@@ -1,262 +0,0 @@
-{
-    "mappings": {
-        "_doc": {
-            "dynamic": "strict",
-            "properties": {
-                "_cost_usd2018_organisation": {
-                    "type": "float"
-                },
-                "_terms_sdg_summary": {
-                    "type": "keyword"
-                },
-                "address_of_organisation": {
-                    "type": "keyword"
-                },
-                "booleanFlag_eu_organisation": {
-                    "type": "boolean"
-                },
-                "booleanFlag_health_organisation": {
-                    "type": "boolean"
-                },
-                "coordinate_of_city": {
-                    "type": "geo_point"
-                },
-                "cost_of_funding": {
-                    "type": "long"
-                },
-                "count_employee_organisation": {
-                    "type": "keyword"
-                },
-                "count_rounds_funding": {
-                    "type": "integer"
-                },
-                "currency_of_funding": {
-                    "type": "keyword"
-                },
-                "date_birth_organisation": {
-                    "format": "yyyy-MM-dd",
-                    "type": "date"
-                },
-                "date_death_organisation": {
-                    "format": "yyyy-MM-dd",
-                    "type": "date"
-                },
-                "date_last_funding": {
-                    "format": "yyyy-MM-dd",
-                    "type": "date"
-                },
-                "date_updated_organisation": {
-                    "format": "yyyy-MM-dd",
-                    "type": "date"
-                },
-                "id_continent_organisation": {
-                    "type": "keyword"
-                },
-                "id_iso2_country": {
-                    "type": "keyword"
-                },
-                "id_iso3_country": {
-                    "type": "keyword"
-                },
-                "id_isoNumeric_country": {
-                    "type": "integer"
-                },
-                "id_of_continent": {
-                    "type": "keyword"
-                },
-                "id_parent_organisation": {
-                    "type": "keyword"
-                },
-                "id_state_organisation": {
-                    "type": "keyword"
-                },
-                "metric_novelty_organisation": {
-                    "type": "float"
-                },
-                "name_of_organisation": {
-                    "fields": {
-                        "keyword": {
-                            "type": "keyword"
-                        }
-                    },
-                    "type": "text"
-                },
-                "placeName_city_organisation": {
-                    "fields": {
-                        "keyword": {
-                            "type": "keyword"
-                        }
-                    },
-                    "type": "text"
-                },
-                "placeName_continent_organisation": {
-                    "fields": {
-                        "keyword": {
-                            "type": "keyword"
-                        }
-                    },
-                    "type": "text"
-                },
-                "placeName_country_organisation": {
-                    "fields": {
-                        "keyword": {
-                            "type": "keyword"
-                        }
-                    },
-                    "type": "text"
-                },
-                "placeName_region_organisation": {
-                    "fields": {
-                        "keyword": {
-                            "type": "keyword"
-                        }
-                    },
-                    "type": "text"
-                },
-                "placeName_state_organisation": {
-                    "fields": {
-                        "keyword": {
-                            "type": "keyword"
-                        }
-                    },
-                    "type": "text"
-                },
-                "rank_rhodonite_organisation": {
-                    "type": "float"
-                },
-                "status_of_organisation": {
-                    "type": "keyword"
-                },
-                "terms_alias_organisation": {
-                    "type": "keyword"
-                },
-                "terms_category_organisation": {
-                    "type": "keyword"
-                },
-                "terms_mesh_description": {
-                    "analyzer": "mesh_terms_analyzer",
-                    "fields": {
-                        "keyword": {
-                            "type": "keyword"
-                        }
-                    },
-                    "type": "text"
-                },
-                "terms_of_countryTags": {
-                    "type": "keyword"
-                },
-                "terms_of_funders": {
-                    "analyzer": "mesh_terms_analyzer",
-                    "fields": {
-                        "keyword": {
-                            "type": "keyword"
-                        }
-                    },
-                    "type": "text"
-                },
-                "terms_roles_organisation": {
-                    "analyzer": "mesh_terms_analyzer",
-                    "fields": {
-                        "keyword": {
-                            "type": "keyword"
-                        }
-                    },
-                    "type": "text"
-                },
-                "terms_subcategory_organisation": {
-                    "analyzer": "mesh_terms_analyzer",
-                    "fields": {
-                        "keyword": {
-                            "type": "keyword"
-                        }
-                    },
-                    "type": "text"
-                },
-                "terms_tokens_entity": {
-                    "type": "keyword"
-                },
-                "textBody_descriptive_organisation": {
-                    "fields": {
-                        "keyword": {
-                            "type": "keyword"
-                        }
-                    },
-                    "type": "text"
-                },
-                "textBody_summary_organisation": {
-                    "fields": {
-                        "keyword": {
-                            "type": "keyword"
-                        }
-                    },
-                    "type": "text"
-                },
-                "type_of_entity": {
-                    "type": "keyword"
-                },
-                "type_of_organisation": {
-                    "type": "keyword"
-                },
-                "url_crunchBase_organisation": {
-                    "fields": {
-                        "keyword": {
-                            "type": "keyword"
-                        }
-                    },
-                    "type": "text"
-                },
-                "url_facebook_organisation": {
-                    "fields": {
-                        "keyword": {
-                            "type": "keyword"
-                        }
-                    },
-                    "type": "text"
-                },
-                "url_linkedIn_organisation": {
-                    "fields": {
-                        "keyword": {
-                            "type": "keyword"
-                        }
-                    },
-                    "type": "text"
-                },
-                "url_of_organisation": {
-                    "fields": {
-                        "keyword": {
-                            "type": "keyword"
-                        }
-                    },
-                    "type": "text"
-                },
-                "url_twitter_organisation": {
-                    "fields": {
-                        "keyword": {
-                            "type": "keyword"
-                        }
-                    },
-                    "type": "text"
-                }
-            }
-        }
-    },
-    "settings": {
-        "analysis": {
-            "analyzer": {
-                "mesh_terms_analyzer": {
-                    "filter": [
-                        "standard",
-                        "lowercase",
-                        "stop"
-                    ],
-                    "tokenizer": "standard",
-                    "type": "custom"
-                }
-            }
-        },
-        "index": {
-            "number_of_replicas": "1",
-            "number_of_shards": "5"
-        }
-    }
-}
diff --git a/nesta/core/orms/orm_utils.py b/nesta/core/orms/orm_utils.py
index 5b732d3f..555ce841 100644
--- a/nesta/core/orms/orm_utils.py
+++ b/nesta/core/orms/orm_utils.py
@@ -18,8 +18,11 @@
 import pymysql
 import os
 import json
+import yaml
 import logging
 import time
+from collections import defaultdict
+from collections.abc import Mapping
 
 
 def _get_key_value(obj, key):
@@ -93,68 +96,86 @@ def assert_correct_config(test, config, key):
         raise ValueError(f"In test mode the index '{key}' "
                          "must end with '_dev'")
 
+def default_to_regular(d):
+    """Convert nested defaultdicts to nested dicts. 
+    This is useful when you want to throw KeyErrors, which
+    would be dynamically accepted otherwise.
+    
+    Args:
+        d (nested defaultdict): A nested defaultdict object.
+    Returns:
+        _d (nested dict): A nested dict object.
+    """
+    if isinstance(d, defaultdict):
+        d = {k: default_to_regular(v) for k, v in d.items()}
+    return d
 
-def setup_es(es_mode, test_mode, drop_and_recreate,
-             dataset, aliases=None, increment_version=False):
+
+def parse_es_config(increment_version):
+    """Retrieve the ES config for all endpoints and indexes,
+    including auto-version-incrementing if required.
+
+    Args:
+        increment_version (bool): Move one version up? (NB: no changes to config file on disk)
+    Returns:
+        config: Elasticsearch config dict, for all endpoints and indexes.
+    """
+    raw_config = load_yaml_from_pathstub('config', 'elasticsearch.yaml')
+    config = defaultdict(lambda: defaultdict(dict))
+    for endpoint, endpoint_config in raw_config['endpoints'].items():
+        # Build the base configuration for this endpoint
+        indexes = endpoint_config.pop('indexes')
+        base_config = raw_config['defaults'].copy()  # use defaults as the base...
+        base_config.update(endpoint_config)          # then override with endpoint settings
+        # Add the host to the config
+        scheme = base_config.pop('scheme')
+        _id = base_config.pop('id')
+        rgn = base_config['region']
+        base_config['host'] = f'{scheme}://search-{endpoint}-{_id}.{rgn}.es.amazonaws.com'
+        for dataset, version in indexes.items():
+            prod_idx = f'{dataset}_v' + str(version + increment_version)     # e.g. arxiv_v1 / v2
+            dev_idx = f'{dataset}_dev' + ('0' if increment_version else '')  # e.g. arxiv_dev / dev0
+            config[endpoint][dataset][True] = {'index': prod_idx, **base_config}  # production mode
+            config[endpoint][dataset][False] = {'index': dev_idx, **base_config}  # dev mode    
+    return default_to_regular(config)
+
+
+def setup_es(endpoint, dataset, production,
+             drop_and_recreate=False, increment_version=False):
     """Retrieve the ES connection, ES config and setup the index
     if required.
 
     Args:
-        es_mode (str): One of "prod" or "dev".
-        test_mode (bool): Running in test mode?
-        drop_and_recreate (bool): Drop and recreate ES index?
+        endpoint (str): Name of the AWS ES endpoint.
         dataset (str): Name of the dataset for the ES mapping.
-        aliases (str): Name of the aliases for the ES mapping.
+        production (bool): Running in production mode?
+        drop_and_recreate (bool): Drop and recreate ES index?
         increment_version (bool): Move one version up?
     Returns:
         {es, es_config}: Elasticsearch connection and config dict.
     """
-    if es_mode not in ("prod", "dev"):
-        raise ValueError("es_mode required to be one of "
-                         f"'prod' or 'dev', but '{es_mode}' provided.")
-
-    # Get and check the config
-    key = f"{dataset}_{es_mode}"
-    es_config = get_config('elasticsearch.config', key)
-    assert_correct_config(test_mode, es_config, key)
-
-    # If required, create new index from the old one
-    if increment_version:
-        old_index = es_config['index']
-        if es_mode == 'prod':
-            tag, version = re.findall(r'(\w+)(\d+)', old_index)[0]
-            new_index = f'{tag}{int(version)+1}'
-        else:
-            tag = old_index
-            new_index = f'{old_index}0'
-        es_config['index'] = new_index
-        es_config['old_index'] = old_index
-        if any((new_index == old_index,
-                not old_index.startswith(tag),
-                not new_index.startswith(tag),
-                len(new_index) - len(old_index) > 1)):
-            raise ValueError('Could not create a new valid '
-                             f'index from {old_index}. Tried, '
-                             f'but got {new_index}.')
-
+    es_master_config = parse_es_config(increment_version)
+    es_config = es_master_config[endpoint][dataset][production]
     # Make the ES connection
     es = Elasticsearch(es_config['host'], port=es_config['port'],
                        use_ssl=True, send_get_body_as='POST')
-    # Drop the index if required (must be in test mode to do this)
-    _index = es_config['index']
-    exists = es.indices.exists(index=_index)
-    if drop_and_recreate and test_mode and exists:
-        es.indices.delete(index=_index)
+    # Does the index already exist?
+    index = es_config['index']
+    exists = es.indices.exists(index=index)
+    # Drop index for fresh recreation (if in test mode)
+    if drop_and_recreate and (not production) and exists:
+        es.indices.delete(index=index)
         exists = False
     # Create the index if required
     if not exists:
-        mapping = get_es_mapping(dataset, aliases=aliases)
-        es.indices.create(index=_index, body=mapping)
+        mapping = get_es_mapping(dataset, endpoint)
+        es.indices.create(index=index, body=mapping)
     return es, es_config
 
+
 def get_es_ids(es, es_config, size=1000, query={}):
     '''Get all existing ES document ids for a given config
-    
+
     Args:
         es: Elasticsearch connection.
         es_config (dict): Elasticsearch configuration.
@@ -188,45 +209,137 @@ def load_json_from_pathstub(pathstub, filename, sort_on_load=True):
     return js
 
 
-def get_es_mapping(dataset, aliases):
-    '''Get the configuration from a file in the luigi config path
-    directory, and convert the key-value pairs under the config :code:`header`
-    into a `dict`.
+def load_yaml_from_pathstub(pathstub, filename):
+    """Basic wrapper around :obj:`find_filepath_from_pathstub`
+    which also opens the file (assumed to be yaml).
+
+    Args:
+        pathstub (str): Stub of filepath where the file should be found.
+        filename (str): The filename.
+    Returns:
+        The file contents as a json-like object.
+    """
+    _path = find_filepath_from_pathstub(pathstub)
+    _path = os.path.join(_path, filename)
+    with open(_path) as f:
+        return yaml.safe_load(f)
 
-    Parameters:
-        file_name (str): The configuation file name.
-        header (str): The header key in the config file.
 
+def update_nested(original_dict, update_dict):
+    """Update a nested dictionary with another nested dictionary.
+    Has equivalent behaviour to :obj:`dict.update(self, update_dict)`.
+
+    Args:
+        original_dict (dict): The original dictionary to update.
+        update_dict (dict): The dictionary from which to extract updates.
     Returns:
-        :obj:`dict`
-    '''
-    # Get the mapping and lookup
-    mapping = load_json_from_pathstub("core/orms/",
-                                      f"{dataset}_es_config.json")
-    alias_lookup = {}
-    if aliases is not None:
-        alias_lookup = load_json_from_pathstub("tier_1/aliases/",
-                                               f"{aliases}.json")
-    # Get a list of valid fields for verification
-    fields = mapping["mappings"]["_doc"]["properties"].keys()
-    # Add any aliases to the mapping
+        original_dict (dict): The original dictionary after updates.
+    """
+    for k, v in update_dict.items():
+        if isinstance(v, Mapping):  # Mapping ~= any dict-like object
+            original_dict[k] = update_nested(original_dict.get(k, {}), v)
+        else:
+            original_dict[k] = v
+    return original_dict
+
+
+def _get_es_mapping(dataset, endpoint):
+    """Sequentially apply the mappings from index, settings, the
+    dataset and finally the endpoint. None of these files is strictly
+    required to exist, so an endpoint could conceivably have a dataset
+    unique to itself.
+
+    Args:
+        dataset (str): Name of the dataset for the ES mapping.
+        endpoint (str): Name of the AWS ES endpoint.
+    Returns:
+        :obj:`dict`: The constructed mapping.
+    """
+    mapping = {}
+    for _path, _prefix in [('defaults', 'defaults'),
+                           ('datasets', f'{dataset}_mapping'),
+                           (f'endpoints/{endpoint}', f'{dataset}_mapping')]:
+        try:
+            _mapping = load_json_from_pathstub(f"mappings/{_path}", f"{_prefix}.json")
+        except json.JSONDecodeError as exc:
+            raise ValueError(f'Could not decode "mappings/{_path}/{_prefix}.json"') from exc
+        except FileNotFoundError:
+            continue
+        update_nested(mapping, _mapping)
+    return mapping
+
+
+def _apply_alias(mapping, dataset, endpoint):
+    """Dynamically apply aliases to an Elasticsearch mapping. Note that
+    the mapping is changed in-place.
+
+    Args:
+        mapping (dict): An ES mapping.
+        dataset (str): Name of the dataset for this ES mapping.
+        endpoint (str): Name of the AWS ES endpoint.
+    """
+    ep_path = f"mappings/endpoints/{endpoint}"
+    # Load an alias, if it exists
+    try:
+        alias_lookup = load_json_from_pathstub(ep_path, "aliases.json")
+    except FileNotFoundError:
+        return
+    # Check whether this is a soft or hard alias
+    try:
+        config = load_yaml_from_pathstub(ep_path, "config.yaml")
+        hard_alias = config['hard-alias']
+    except (FileNotFoundError, KeyError):
+        hard_alias = False
+    # Apply the aliases to the mapping properties
+    propts = mapping["mappings"]["_doc"]["properties"]
+    _fields = set()
     for alias, lookup in alias_lookup.items():
         if dataset not in lookup:
             continue
-        # Validate the field
         field = lookup[dataset]
-        if field not in fields:
-            raise ValueError(f"Alias '{alias}' to '{field}' but '{field}'"
-                             "does not exist in the mapping.")
-        # Add the alias to the mapping
-        value = {"type": "alias", "path": lookup[dataset]}
-        mapping["mappings"]["_doc"]["properties"][alias] = value
+        propts[alias] = (propts[field] if hard_alias   # New field same as old for 'hard-alias'
+                         else {"type": "alias", "path": field})  # Otherwise use an ES alias
+        _fields.add(field)
+    # Remove old fields if 'hard alias'
+    if hard_alias:
+        for f in _fields:
+            propts.pop(f)
+
+
+def _prune_nested(mapping):
+    """Recursively remove any fields with null values from
+    a nested dictionary. The input is changed in-place.
+
+    Args:
+        mapping (dict): The dictionary to prune.
+    """
+    for k in list(mapping.keys()):
+        v = mapping[k]
+        if isinstance(v, Mapping):  # Mapping ~= any dict-like
+            _prune_nested(v)
+        elif v is None:
+            mapping.pop(k)
+
+
+def get_es_mapping(dataset, endpoint):
+    '''Load the ES mapping for this dataset and endpoint,
+    including aliases.
+
+    Args:
+        dataset (str): Name of the dataset for the ES mapping.
+        endpoint (str): Name of the AWS ES endpoint.
+    Returns:
+        :obj:`dict`
+    '''
+    mapping = _get_es_mapping(dataset, endpoint)
+    _apply_alias(mapping, dataset, endpoint)
+    _prune_nested(mapping)  # prunes any nested keys with null values
     return mapping
 
 
 def cast_as_sql_python_type(field, data):
     """Cast the data to ensure that it is the python type expected by SQL
-    
+
     Args:
         field (SqlAlchemy field): SqlAlchemy field, to cast the data
         data: A data field to be cast
@@ -241,7 +354,7 @@ def cast_as_sql_python_type(field, data):
     return _data
 
 
-def filter_out_duplicates(db_env, section, database, 
+def filter_out_duplicates(db_env, section, database,
                           Base, _class, data,
                           low_memory=False):
     """Produce a filtered list of data, exluding duplicates and entries that
@@ -303,7 +416,7 @@ def _filter_out_duplicates(session, Base, _class, data,
 
     # Read all pks if in low_memory mode
     if low_memory and not is_auto_pkey:
-        fields = [getattr(_class, pkey.name) 
+        fields = [getattr(_class, pkey.name)
                   for pkey in pkey_cols]
         all_pks = set(session.query(*fields).all())
 
@@ -359,11 +472,11 @@ def insert_data(db_env, section, database, Base,
         :obj:`list` of :obj:`dict` data which could not be imported (optional)
     """
 
-    response = filter_out_duplicates(db_env=db_env, 
+    response = filter_out_duplicates(db_env=db_env,
                                      section=section,
                                      database=database,
-                                     Base=Base, 
-                                     _class=_class, 
+                                     Base=Base,
+                                     _class=_class,
                                      data=data,
                                      low_memory=low_memory)
     objs, existing_objs, failed_objs = response
diff --git a/nesta/core/orms/tests/test_orm_utils.py b/nesta/core/orms/tests/test_orm_utils.py
index 4defe898..d3130c3c 100644
--- a/nesta/core/orms/tests/test_orm_utils.py
+++ b/nesta/core/orms/tests/test_orm_utils.py
@@ -26,36 +26,8 @@
 from nesta.core.orms.orm_utils import db_session_query
 from nesta.core.orms.orm_utils import cast_as_sql_python_type
 
-@pytest.fixture
-def alias_lookup():
-    return {
-        "alias1": {
-            "dataset1": "field1a",
-            "dataset2": "field1b"
-        },
-        "alias2": {
-            "dataset1": "field2a",
-            "dataset2": "field2b"
-        }
-    }
-
-@pytest.fixture
-def mapping():
-    return {
-        'mappings': {
-            '_doc': {
-                'properties': {
-                    'field1a': {'type': 'keyword'},
-                    'field2a': {'type': 'text'},
-                }
-            }
-        }
-    }
-
 
 Base = declarative_base()
-
-
 class DummyModel(Base):
     __tablename__ = 'dummy_model'
 
@@ -229,109 +201,89 @@ def test_db_session_query(self):
         assert n_rows == len(parents) == 1000
 
 def test_load_json_from_pathstub():
-    for ds in ["nih", "crunchbase"]:
-        js = load_json_from_pathstub("core/orms/",
-                                     f"{ds}_es_config.json")
+    for ds in ["nih", "companies"]:
+        js = load_json_from_pathstub("datasets/",
+                                     f"{ds}.json")
         assert len(js) > 0
 
-@mock.patch("nesta.core.orms.orm_utils.load_json_from_pathstub")
-def test_get_es_mapping(mocked_load_json_from_pathstub, alias_lookup,
-                        mapping):
-    mocked_load_json_from_pathstub.side_effect = (mapping,
-                                                  alias_lookup)
-    _mapping = get_es_mapping("dataset1", "blah")
-    alias1 = _mapping["mappings"]["_doc"]["properties"].pop("alias1")
-    alias2 = _mapping["mappings"]["_doc"]["properties"].pop("alias2")
-    assert mapping == _mapping
-    assert alias1 == {'type': 'alias', 'path': 'field1a'}
-    assert alias2 == {'type': 'alias', 'path': 'field2a'}
-
-@mock.patch("nesta.core.orms.orm_utils.load_json_from_pathstub")
-def test_get_es_mapping_bad_alias(mocked_load_json_from_pathstub,
-                                  alias_lookup, mapping):
-    mocked_load_json_from_pathstub.side_effect = (mapping,
-                                                  alias_lookup)
-    with pytest.raises(ValueError):
-        get_es_mapping("dataset2", "blah")
-
-@mock.patch("nesta.core.orms.orm_utils.get_config")
-@mock.patch("nesta.core.orms.orm_utils.assert_correct_config")
-@mock.patch("nesta.core.orms.orm_utils.Elasticsearch")
-@mock.patch("nesta.core.orms.orm_utils.get_es_mapping")
-def test_setup_es_bad_es_mode(mock_get_es_mapping, mock_Elasticsearch,
-                              mock_assert_correct_config, mock_get_config):
-    with pytest.raises(ValueError):
-        setup_es(es_mode="dave", test_mode=False, drop_and_recreate=False,
-                 dataset=None, aliases=None)
-
-
-@mock.patch("nesta.core.orms.orm_utils.get_config")
-@mock.patch("nesta.core.orms.orm_utils.assert_correct_config")
-@mock.patch("nesta.core.orms.orm_utils.Elasticsearch")
-@mock.patch("nesta.core.orms.orm_utils.get_es_mapping")
-def test_setup_es_true_test_delete_called(mock_get_es_mapping,
+PATH = "nesta.core.orms.orm_utils.{}"
+@mock.patch(PATH.format("get_config"))
+@mock.patch(PATH.format("assert_correct_config"))
+@mock.patch(PATH.format("Elasticsearch"))
+@mock.patch(PATH.format("get_es_mapping"))
+@mock.patch(PATH.format("parse_es_config"))
+def test_setup_es_true_test_delete_called(mock_parse_es_config,
+                                          mock_get_es_mapping,
                                           mock_Elasticsearch,
                                           mock_assert_correct_config,
                                           mock_get_config):
     mock_Elasticsearch.return_value.indices.exists.return_value = True
-    setup_es(es_mode="dev", test_mode=True, drop_and_recreate=True,
-             dataset=None, aliases=None)
+    setup_es(endpoint='arxlive', dataset='arxiv', production=False,
+             drop_and_recreate=True)
     assert mock_Elasticsearch.return_value.indices.delete.call_count == 1
     assert mock_Elasticsearch.return_value.indices.create.call_count == 1
 
-@mock.patch("nesta.core.orms.orm_utils.get_config")
-@mock.patch("nesta.core.orms.orm_utils.assert_correct_config")
-@mock.patch("nesta.core.orms.orm_utils.Elasticsearch")
-@mock.patch("nesta.core.orms.orm_utils.get_es_mapping")
-def test_setup_es_true_test_delete_not_called_not_exists(mock_get_es_mapping,
+@mock.patch(PATH.format("get_config"))
+@mock.patch(PATH.format("assert_correct_config"))
+@mock.patch(PATH.format("Elasticsearch"))
+@mock.patch(PATH.format("get_es_mapping"))
+@mock.patch(PATH.format("parse_es_config"))
+def test_setup_es_true_test_delete_not_called_not_exists(mock_parse_es_config,
+                                                         mock_get_es_mapping,
                                                          mock_Elasticsearch,
                                                          mock_assert_correct_config,
                                                          mock_get_config):
     mock_Elasticsearch.return_value.indices.exists.return_value = False
-    setup_es(es_mode="dev", test_mode=True, drop_and_recreate=True,
-             dataset=None, aliases=None)
+    setup_es(drop_and_recreate=True, production=False,
+             endpoint='arxlive', dataset='arxiv')
     assert mock_Elasticsearch.return_value.indices.delete.call_count == 0
     assert mock_Elasticsearch.return_value.indices.create.call_count == 1
 
-@mock.patch("nesta.core.orms.orm_utils.get_config")
-@mock.patch("nesta.core.orms.orm_utils.assert_correct_config")
-@mock.patch("nesta.core.orms.orm_utils.Elasticsearch")
-@mock.patch("nesta.core.orms.orm_utils.get_es_mapping")
-def test_setup_es_false_test_delete_not_called(mock_get_es_mapping,
+@mock.patch(PATH.format("get_config"))
+@mock.patch(PATH.format("assert_correct_config"))
+@mock.patch(PATH.format("Elasticsearch"))
+@mock.patch(PATH.format("get_es_mapping"))
+@mock.patch(PATH.format("parse_es_config"))
+def test_setup_es_false_test_delete_not_called(mock_parse_es_config,
+                                               mock_get_es_mapping,
                                                mock_Elasticsearch,
                                                mock_assert_correct_config,
                                                mock_get_config):
     mock_Elasticsearch.return_value.indices.exists.return_value = False
-    setup_es(es_mode="dev", test_mode=False, drop_and_recreate=True,
-             dataset=None, aliases=None)
+    setup_es(drop_and_recreate=True, production=False,
+             endpoint='arxlive', dataset='arxiv')
     assert mock_Elasticsearch.return_value.indices.delete.call_count == 0
     assert mock_Elasticsearch.return_value.indices.create.call_count == 1
 
-@mock.patch("nesta.core.orms.orm_utils.get_config")
-@mock.patch("nesta.core.orms.orm_utils.assert_correct_config")
-@mock.patch("nesta.core.orms.orm_utils.Elasticsearch")
-@mock.patch("nesta.core.orms.orm_utils.get_es_mapping")
-def test_setup_es_false_reindex_delete_not_called(mock_get_es_mapping,
+@mock.patch(PATH.format("get_config"))
+@mock.patch(PATH.format("assert_correct_config"))
+@mock.patch(PATH.format("Elasticsearch"))
+@mock.patch(PATH.format("get_es_mapping"))
+@mock.patch(PATH.format("parse_es_config"))
+def test_setup_es_false_reindex_delete_not_called(mock_parse_es_config,
+                                                  mock_get_es_mapping,
                                                   mock_Elasticsearch,
                                                   mock_assert_correct_config,
                                                   mock_get_config):
     mock_Elasticsearch.return_value.indices.exists.return_value = False
-    setup_es(es_mode="dev", test_mode=True, drop_and_recreate=False,
-             dataset=None, aliases=None)
+    setup_es(drop_and_recreate=False, production=False,
+             endpoint='arxlive', dataset='arxiv')
     assert mock_Elasticsearch.return_value.indices.delete.call_count == 0
     assert mock_Elasticsearch.return_value.indices.create.call_count == 1
 
-@mock.patch("nesta.core.orms.orm_utils.get_config")
-@mock.patch("nesta.core.orms.orm_utils.assert_correct_config")
-@mock.patch("nesta.core.orms.orm_utils.Elasticsearch")
-@mock.patch("nesta.core.orms.orm_utils.get_es_mapping")
-def test_setup_es_no_create_if_exists(mock_get_es_mapping,
+@mock.patch(PATH.format("get_config"))
+@mock.patch(PATH.format("assert_correct_config"))
+@mock.patch(PATH.format("Elasticsearch"))
+@mock.patch(PATH.format("get_es_mapping"))
+@mock.patch(PATH.format("parse_es_config"))
+def test_setup_es_no_create_if_exists(mock_parse_es_config,
+                                      mock_get_es_mapping,
                                       mock_Elasticsearch,
                                       mock_assert_correct_config,
                                       mock_get_config):
     mock_Elasticsearch.return_value.indices.exists.return_value = True
-    setup_es(es_mode="dev", test_mode=True, drop_and_recreate=False,
-             dataset=None, aliases=None)
+    setup_es(drop_and_recreate=False, production=False,
+             endpoint='arxlive', dataset='arxiv')
     assert mock_Elasticsearch.return_value.indices.delete.call_count == 0
     assert mock_Elasticsearch.return_value.indices.create.call_count == 0
 
@@ -392,9 +344,8 @@ def test_merge_metadata_with_three_bases(primary_base, secondary_base, tertiary_
                                                          'second_table',
                                                          'third_table']
 
-@mock.patch("nesta.core.orms.orm_utils.scan",
-            return_value=[{'_id':1},{'_id':1},
-                          {'_id':22.3},{'_id':3.3}]*134)
+@mock.patch(PATH.format("scan"), return_value=[{'_id':1},{'_id':1},
+                                               {'_id':22.3},{'_id':3.3}]*134)
 def test_get_es_ids(mocked_scan):
     ids = get_es_ids(mock.MagicMock(), mock.MagicMock())
     assert ids == {1, 22.3, 3.3}
diff --git a/nesta/core/routines/arxiv/arxiv_es_tokens.py b/nesta/core/routines/arxiv/arxiv_es_tokens.py
index 34d399a9..f9268562 100644
--- a/nesta/core/routines/arxiv/arxiv_es_tokens.py
+++ b/nesta/core/routines/arxiv/arxiv_es_tokens.py
@@ -16,10 +16,11 @@
 
 class ArxivESTokenTask(ElasticsearchTask):
     def done_ids(self):
-        es_mode = 'dev' if self.test else 'prod'
-        es, es_config = setup_es(es_mode, self.test,
-                                 drop_and_recreate=False,
+        es, es_config = setup_es(es_mode=es_mode,
+                                 endpoint=self.dataset,
                                  dataset=self.dataset,
+                                 production=not self.test,
+                                 drop_and_recreate=False,
                                  increment_version=False)
         field =  "terms_tokens_article"
         ids = get_es_ids(es, es_config, size=10000,
diff --git a/nesta/core/routines/arxiv/arxiv_lolvelty.py b/nesta/core/routines/arxiv/arxiv_lolvelty.py
index b432d9c7..7579b17e 100644
--- a/nesta/core/routines/arxiv/arxiv_lolvelty.py
+++ b/nesta/core/routines/arxiv/arxiv_lolvelty.py
@@ -25,10 +25,10 @@ class ArxivElasticsearchTask(ElasticsearchTask):
     grid_task_kwargs = DictParameterPlus(default={})
 
     def done_ids(self):
-        es_mode = 'dev' if self.test else 'prod'
-        es, es_config = setup_es(es_mode, self.test,
-                                 drop_and_recreate=False,
+        es, es_config = setup_es(endpoint=self.endpoint,
                                  dataset=self.dataset,
+                                 production=not self.test,
+                                 drop_and_recreate=False,
                                  increment_version=False)
         field =  "metric_novelty_article"
         ids = get_es_ids(es, es_config, size=10000,
@@ -42,6 +42,7 @@ def requires(self):
                           process_batch_size=10000,
                           drop_and_recreate=self.drop_and_recreate,
                           dataset='arxiv',
+                          endpoint='arxlive',
                           id_field=Article.id,
                           filter=Article.article_source == 'arxiv',
                           entity_type='article',
@@ -54,8 +55,7 @@ def requires(self):
                           env_files=[f3p('nesta/'),
                                      f3p('config/'
                                          'mysqldb.config'),
-                                     f3p('schema_transformations/'
-                                         'arxiv.json'),
+                                     f3p('datasets/arxiv.json'),
                                      f3p('config/'
                                          'elasticsearch.config')],
                           job_def='py36_amzn1_image',
@@ -86,6 +86,7 @@ def requires(self):
                                        test=test,
                                        index=index,
                                        dataset='arxiv',
+                                       endpoint='arxlive',
                                        entity_type='article',
                                        kwargs=kwargs,
                                        batchable=f3p("batchables/novelty"
diff --git a/nesta/core/routines/arxiv/arxiv_root_task.py b/nesta/core/routines/arxiv/arxiv_root_task.py
index 73c2f0dd..e15328f3 100644
--- a/nesta/core/routines/arxiv/arxiv_root_task.py
+++ b/nesta/core/routines/arxiv/arxiv_root_task.py
@@ -66,19 +66,14 @@ def requires(self):
         test = not self.production
         routine_id = f"ArxivLolveltyTask-{self.date}-{test}"
 
-        # Elasticsearch setup
-        dataset = 'arxiv'
-        _, es_config = setup_es('prod' if self.production else 'dev', 
-                                not self.production,
-                                self.drop_and_recreate,
-                                dataset=dataset)
         yield ArxivElasticsearchTask(date=self.date,
                                      process_batch_size=1000,
                                      routine_id=routine_id,
                                      grid_task_kwargs=grid_task_kwargs,
                                      test=not self.production,
-                                     index=es_config['index'],
+                                     drop_and_recreate=self.drop_and_recreate,
                                      dataset='arxiv',
+                                     endpoint='arxlive',
                                      entity_type='article',
                                      kwargs=kwargs,
                                      batchable=f3p("batchables/novelty"
@@ -134,6 +129,7 @@ def requires(self):
                          process_batch_size=10000,
                          drop_and_recreate=self.drop_and_recreate,
                          dataset='arxiv',
+                         endpoint='arxlive',
                          id_field=Article.id,
                          entity_type='article',
                          db_config_env='MYSQLDB',
@@ -145,8 +141,7 @@ def requires(self):
                          env_files=[f3p('nesta/'),
                                     f3p('config/'
                                         'mysqldb.config'),
-                                    f3p('schema_transformations/'
-                                        'arxiv.json'),
+                                    f3p('datasets/arxiv.json'),
                                     f3p('config/'
                                         'elasticsearch.config')],
                          job_def='py36_amzn1_image',
diff --git a/nesta/core/routines/crunchbase/crunchbase_elasticsearch_task.py b/nesta/core/routines/crunchbase/crunchbase_elasticsearch_task.py
index df9d9af6..6ecebac3 100644
--- a/nesta/core/routines/crunchbase/crunchbase_elasticsearch_task.py
+++ b/nesta/core/routines/crunchbase/crunchbase_elasticsearch_task.py
@@ -78,10 +78,10 @@ def prepare(self):
                                   self.database)
 
         # Elasticsearch setup
-        es_mode = 'dev' if self.test else 'prod'
-        es, es_config = setup_es(es_mode, self.test, self.drop_and_recreate,
-                                 dataset='crunchbase',
-                                 aliases='health_scanner')
+        es, es_config = setup_es(endpoint='health-scanner',
+                                 dataset='companies',
+                                 production=not self.test,
+                                 drop_and_recreate=self.drop_and_recreate)
 
         # Get set of existing ids from elasticsearch via scroll
         scanner = scan(es, query={"_source": False},
diff --git a/nesta/core/routines/crunchbase/crunchbase_lolvelty.py b/nesta/core/routines/crunchbase/crunchbase_lolvelty.py
index 236e2e5e..621c569b 100644
--- a/nesta/core/routines/crunchbase/crunchbase_lolvelty.py
+++ b/nesta/core/routines/crunchbase/crunchbase_lolvelty.py
@@ -37,7 +37,8 @@ def requires(self):
         return LazyElasticsearchTask(routine_id=routine_id,
                                      test=test,
                                      index=index,
-                                     dataset='crunchbase',
+                                     dataset='companies',
+                                     endpoint='health-scanner',
                                      entity_type='company',
                                      kwargs=kwargs,
                                      batchable=f3p("batchables/novelty/lolvelty"),
diff --git a/nesta/core/routines/crunchbase/crunchbase_root_task.py b/nesta/core/routines/crunchbase/crunchbase_root_task.py
index 7eb54db5..d46c864b 100644
--- a/nesta/core/routines/crunchbase/crunchbase_root_task.py
+++ b/nesta/core/routines/crunchbase/crunchbase_root_task.py
@@ -45,7 +45,7 @@ def requires(self):
                                    batchable=f3p("core/batchables/crunchbase/crunchbase_elasticsearch"),
                                    env_files=[f3p("nesta/"),
                                               f3p("config/mysqldb.config"),
-                                              f3p("schema_transformations/crunchbase_organisation_members.json"),
+                                              f3p("datasets/companies.json"),
                                               f3p("config/elasticsearch.config")],
                                    job_def="py36_amzn1_image",
                                    job_name=f"CrunchBaseElasticsearchTask-{_routine_id}",
diff --git a/nesta/core/routines/eurito_es/es_root.py b/nesta/core/routines/eurito_es/es_root.py
index f4c2bd69..90ee0de4 100644
--- a/nesta/core/routines/eurito_es/es_root.py
+++ b/nesta/core/routines/eurito_es/es_root.py
@@ -22,10 +22,11 @@
 def kwarg_maker(dataset, routine_id):
     env_files=[f3p('config/mysqldb.config'),
                f3p('config/elasticsearch.config'),
-               f3p('schema_transformations/eurito/'),
+               f3p(f'tier_1/datasets/{dataset}.json'),
                f3p('nesta')]
     batchable=f3p(f'batchables/eurito/{dataset}_eu')
-    return dict(dataset=f'{dataset}-eu',
+    return dict(dataset=dataset,
+                endpoint='eurito-dev',
                 routine_id=f'{dataset}-eu_{routine_id}',
                 env_files=env_files,
                 batchable=batchable)
@@ -56,7 +57,7 @@ def requires(self):
                               intermediate_bucket=S3_BUCKET)
 
         params = (('arxiv', 'article', Article.id),
-                  ('crunchbase', 'company', Organization.id),
+                  ('companies', 'company', Organization.id),
                   ('patstat', 'patent', ApplnFamily.docdb_family_id),
                   ('cordis', 'project', Project.rcn),)
         for dataset, entity_type, id_field in params:
diff --git a/nesta/core/routines/health_data/nih_data/nih_abstracts_mesh_task.py b/nesta/core/routines/health_data/nih_data/nih_abstracts_mesh_task.py
index 40046baf..0da21aad 100644
--- a/nesta/core/routines/health_data/nih_data/nih_abstracts_mesh_task.py
+++ b/nesta/core/routines/health_data/nih_data/nih_abstracts_mesh_task.py
@@ -134,11 +134,10 @@ def prepare(self):
         db = 'production' if not self.test else 'dev'
 
         # elasticsearch setup
-        es_mode = 'dev' if self.test else 'prod'
-        es, es_config = setup_es(es_mode, self.test, 
-                                 drop_and_recreate=False,
+        es, es_config = setup_es(endpoint='health-scanner',
                                  dataset='nih',
-                                 aliases='health_scanner')
+                                 production=not self.test,
+                                 drop_and_recreate=False)
 
         # s3 setup and file key collection
         bucket = 'innovation-mapping-general'
diff --git a/nesta/core/routines/health_data/nih_data/nih_dedupe_task.py b/nesta/core/routines/health_data/nih_data/nih_dedupe_task.py
index cd1038b3..f2e53a8a 100644
--- a/nesta/core/routines/health_data/nih_data/nih_dedupe_task.py
+++ b/nesta/core/routines/health_data/nih_data/nih_dedupe_task.py
@@ -32,7 +32,7 @@ class DedupeTask(autobatch.AutoBatchTask):
 
     def output(self):
         '''Points to the output database engine'''
-        db_config = get_config(self.db_config_path, 
+        db_config = get_config(self.db_config_path,
                                "mysqldb")
         db_config["database"] = ('dev' if self.test
                                  else 'production')
@@ -68,33 +68,30 @@ def prepare(self):
                             f"{self.process_batch_size}"
                             " while in test mode")
 
-        es_mode = 'dev' if self.test else 'prod'
-        es, es_config = setup_es(es_mode, self.test,
-                                 self.drop_and_recreate,
-                                 dataset='nih',
-                                 aliases='health_scanner',
-                                 increment_version=True)
+        es_kwargs = dict(endpoint='health-scanner',
+                         dataset='nih', production=not self.test)
+        _, _old_config = setup_es(**es_kwargs)
+        es, es_config = setup_es(drop_and_recreate=self.drop_and_recreate,
+                                 increment_version=True, **es_kwargs)
 
         # Count articles from the old index
-        _old_config = es_config.copy()
-        _old_config['index'] = es_config['old_index']
         logging.info(f"Collected article IDs...")
         _ids = get_es_ids(es, _old_config, size=10000)
         logging.info(f"Collected {len(_ids)} IDs")
         done_ids = get_es_ids(es, es_config, size=10000)
 
         # Generate the job params
-        job_params = []        
+        job_params = []
         batches = split_batches(_ids, self.process_batch_size)
         for count, batch in enumerate(batches, 1):
             # Magical '0.3' is the lower end of the deduplication
             # fraction found by inspection
-            done = sum(_id in done_ids 
+            done = sum(_id in done_ids
                        for _id in batch) / len(batch) > 0.3
             # write batch of ids to s3
             batch_file = ''
             if not done:
-                batch_file = put_s3_batch(batch, 
+                batch_file = put_s3_batch(batch,
                                           self.intermediate_bucket,
                                           self.routine_id)
             params = {
@@ -105,13 +102,13 @@ def prepare(self):
                 'outinfo': es_config['host'],
                 'out_port': es_config['port'],
                 'out_index': es_config['index'],
-                'in_index': es_config['old_index'],
+                'in_index': _old_config['index'],
                 'out_type': es_config['type'],
                 'aws_auth_region': es_config['region'],
                 'entity_type': 'paper',
                 'test': self.test,
                 'routine_id': self.routine_id
-            }            
+            }
 
             job_params.append(params)
             if self.test and count > 1:
diff --git a/nesta/core/routines/health_data/nih_data/nih_lolvelty.py b/nesta/core/routines/health_data/nih_data/nih_lolvelty.py
index edb4a98c..f42555a5 100644
--- a/nesta/core/routines/health_data/nih_data/nih_lolvelty.py
+++ b/nesta/core/routines/health_data/nih_data/nih_lolvelty.py
@@ -24,6 +24,7 @@ def requires(self):
                                      test=test,
                                      index=index,
                                      dataset='nih',
+                                     endpoint='health-scanner',
                                      entity_type='paper',
                                      kwargs=kwargs,
                                      batchable=f3p("batchables/novelty/lolvelty"),
diff --git a/nesta/core/routines/health_data/nih_data/nih_process_task.py b/nesta/core/routines/health_data/nih_data/nih_process_task.py
index 7e3cf64a..6b58cf1e 100644
--- a/nesta/core/routines/health_data/nih_data/nih_process_task.py
+++ b/nesta/core/routines/health_data/nih_data/nih_process_task.py
@@ -102,10 +102,10 @@ def prepare(self):
         project_query = session.query(Projects)
 
         # elasticsearch setup
-        es_mode = 'dev' if self.test else 'prod'
-        es, es_config = setup_es(es_mode, self.test, self.drop_and_recreate,
+        es, es_config = setup_es(endpoint='health-scanner',
                                  dataset='nih',
-                                 aliases='health_scanner')
+                                 production=not self.test,
+                                 drop_and_recreate=self.drop_and_recreate)
 
         batches = self.batch_limits(project_query, BATCH_SIZE)
         job_params = []
diff --git a/nesta/core/routines/meetup/health_tagging/health_meetup_es_task.py b/nesta/core/routines/meetup/health_tagging/health_meetup_es_task.py
index 2a60d862..315bb7e4 100644
--- a/nesta/core/routines/meetup/health_tagging/health_meetup_es_task.py
+++ b/nesta/core/routines/meetup/health_tagging/health_meetup_es_task.py
@@ -72,8 +72,8 @@ def requires(self):
                                      date=self.date,
                                      process_batch_size=100,
                                      drop_and_recreate=self.drop_and_recreate,
-                                     aliases='health_scanner',
                                      dataset='meetup',
+                                     endpoint='health-scanner',
                                      id_field=Group.id,
                                      entity_type='meetup',
                                      core_categories=self.core_categories,
@@ -85,7 +85,7 @@ def requires(self):
                                      batchable=f3p("batchables/meetup/topic_tag_elasticsearch"),
                                      env_files=[f3p("nesta/"),
                                                 f3p("config/mysqldb.config"),
-                                                f3p("schema_transformations/meetup.json"),
+                                                f3p("datasets/meetup.json"),
                                                 f3p("config/elasticsearch.config")],
                                      job_def="py36_amzn1_image",
                                      job_name=f"MeetupHealthSql2EsTask-{routine_id}",
diff --git a/nesta/core/routines/meetup/health_tagging/meetup_lolvelty.py b/nesta/core/routines/meetup/health_tagging/meetup_lolvelty.py
index 7fd57c08..10528b5e 100644
--- a/nesta/core/routines/meetup/health_tagging/meetup_lolvelty.py
+++ b/nesta/core/routines/meetup/health_tagging/meetup_lolvelty.py
@@ -35,6 +35,7 @@ def requires(self):
                                      test=test,
                                      index=index,
                                      dataset='meetup',
+                                     endpoint='health-scanner',
                                      entity_type='meetup',
                                      kwargs=kwargs,
                                      batchable=f3p("batchables/novelty/lolvelty"),
diff --git a/nesta/core/schemas/README.rst b/nesta/core/schemas/README.rst
index f61af8e6..1498c7a3 100644
--- a/nesta/core/schemas/README.rst
+++ b/nesta/core/schemas/README.rst
@@ -21,6 +21,7 @@ Valid examples are :code:`date_start_project` and :code:`title_of_project`.
 
 Tier 0 fields are implictly excluded from tier 1 if they are missing from the :code:`schema_transformation` file. Tier 1 schema field names are applied via `nesta.packages.decorator.schema_transform`
 
+
 Tier 2
 ------
 
diff --git a/nesta/core/schemas/tier_1/datasets/arxiv.json b/nesta/core/schemas/tier_1/datasets/arxiv.json
new file mode 100644
index 00000000..e3952667
--- /dev/null
+++ b/nesta/core/schemas/tier_1/datasets/arxiv.json
@@ -0,0 +1,30 @@
+{
+    "entity_type": "article",
+    "tier0_to_tier1": {
+        "_fields_of_study": "terms_fieldsOfStudy_article",
+        "abstract": "textBody_abstract_article",
+        "authors": "terms_authors_article",
+        "categories": "terms_category_article",
+        "citation_count": "count_citations_article",
+        "countries": "terms_countries_article",
+        "created": "date_created_article",
+        "doi": "id_digitalObjectIdentifier_article",
+        "fields_of_study": "json_fieldsOfStudy_article",
+        "has_multinational": "booleanFlag_multinational_article",
+        "id": "id_of_article",
+        "institutes": "terms_institutes_article",
+        "is_eu": "booleanFlag_eu_article",
+        "nested_categories": "json_category_article",
+        "nested_location": "json_location_article",
+        "normalised_citation": "metric_citations_article",
+        "novelty_of_article": "metric_novelty_article",
+        "nuts_0": "terms_nuts0_article",
+        "nuts_1": "terms_nuts1_article",
+        "nuts_2": "terms_nuts2_article",
+        "nuts_3": "terms_nuts3_article",
+        "regions": "terms_regions_article",
+        "title": "title_of_article",
+        "tokens": "terms_tokens_article",
+        "year": "year_of_article"
+    }
+}
\ No newline at end of file
diff --git a/nesta/core/schemas/tier_1/datasets/companies.json b/nesta/core/schemas/tier_1/datasets/companies.json
new file mode 100644
index 00000000..7374c4fa
--- /dev/null
+++ b/nesta/core/schemas/tier_1/datasets/companies.json
@@ -0,0 +1,49 @@
+{
+    "entity_type": "company",
+    "tier0_to_tier1": {
+        "_booleanFlag_autotranslated_entity": "booleanFlag_autotranslated_entity",
+        "_rank_rhodonite_organisation": "rank_rhodonite_organisation",
+        "_terms_iso2lang_entity": "terms_iso2lang_entity",
+        "_terms_of_countryTags": "terms_of_countryTags",
+        "_total_cost_usd2018": "_cost_usd2018_organisation",
+        "address": "address_of_organisation",
+        "aliases": "terms_alias_organisation",
+        "category_group_list": "terms_category_organisation",
+        "category_list": "terms_subcategory_organisation",
+        "cb_url": "url_crunchBase_organisation",
+        "city": "placeName_city_organisation",
+        "closed_on": "date_death_organisation",
+        "company_name": "name_of_organisation",
+        "continent": "id_of_continent",
+        "coordinates": "coordinate_of_city",
+        "country": "placeName_country_organisation",
+        "country_alpha_2": "id_iso2_country",
+        "country_alpha_3": "id_iso3_country",
+        "country_numeric": "id_isoNumeric_country",
+        "currency_of_funding": "currency_of_funding",
+        "employee_count": "count_employee_organisation",
+        "facebook_url": "url_facebook_organisation",
+        "founded_on": "date_birth_organisation",
+        "funding_rounds": "count_rounds_funding",
+        "funding_total_usd": "cost_of_funding",
+        "homepage_url": "url_of_organisation",
+        "investor_names": "terms_of_funders",
+        "is_eu": "booleanFlag_eu_organisation",
+        "is_health": "booleanFlag_health_organisation",
+        "last_funding_on": "date_last_funding",
+        "linkedin_url": "url_linkedIn_organisation",
+        "long_description": "textBody_descriptive_organisation",
+        "mesh_terms": "terms_mesh_description",
+        "parent_id": "id_parent_organisation",
+        "placeName_continent_organisation": "placeName_continent_organisation",
+        "placeName_state_organisation": "placeName_state_organisation",
+        "primary_role": "type_of_organisation",
+        "region": "placeName_region_organisation",
+        "roles": "terms_roles_organisation",
+        "short_description": "textBody_summary_organisation",
+        "state_code": "id_state_organisation",
+        "status": "status_of_organisation",
+        "twitter_url": "url_twitter_organisation",
+        "updated_at": "date_updated_organisation"
+    }
+}
\ No newline at end of file
diff --git a/nesta/core/schemas/tier_1/datasets/cordis.json b/nesta/core/schemas/tier_1/datasets/cordis.json
new file mode 100644
index 00000000..b27e608c
--- /dev/null
+++ b/nesta/core/schemas/tier_1/datasets/cordis.json
@@ -0,0 +1,16 @@
+{
+    "entity_type": "project",
+    "tier0_to_tier1": {
+        "description": "textBody_description_project",
+        "ec_contribution": "cost_ecFunding_project",
+        "end_date_code": "date_ended_project",
+        "framework": "name_framework_project",
+        "link": "url_of_project",
+        "rcn": "id_of_project",
+        "start_date_code": "date_started_project",
+        "status": "status_of_project",
+        "title": "title_of_project",
+        "total_cost": "cost_total_project",
+        "year": "year_of_project"
+    }
+}
\ No newline at end of file
diff --git a/nesta/core/schemas/tier_1/datasets/meetup.json b/nesta/core/schemas/tier_1/datasets/meetup.json
new file mode 100644
index 00000000..6b269458
--- /dev/null
+++ b/nesta/core/schemas/tier_1/datasets/meetup.json
@@ -0,0 +1,29 @@
+{
+    "entity_type": "meetup",
+    "tier0_to_tier1": {
+        "_booleanFlag_autotranslated_entity": "booleanFlag_autotranslated_entity",
+        "_placeName_state_group": "_placeName_state_group",
+        "_rank_rhodonite_group": "rank_rhodonite_group",
+        "_terms_iso2lang_entity": "terms_iso2lang_entity",
+        "_terms_of_countryTags": "terms_of_countryTags",
+        "category_name": "name_of_category",
+        "city": "placeName_city_group",
+        "continent": "placeName_continent_group",
+        "continent_id": "id_continent_group",
+        "coordinate": "coordinate_of_group",
+        "country": "id_iso2_country",
+        "country_id": "id_country_group",
+        "country_name": "placeName_country_group",
+        "created": "date_start_group",
+        "description": "textBody_descriptive_group",
+        "id": "id_of_group",
+        "iso3": "id_iso3_country",
+        "isoNumeric": "id_isoNumeric_country",
+        "member_origins": "terms_memberOrigin_group",
+        "members": "count_member_group",
+        "mesh_terms": "terms_mesh_group",
+        "name": "name_of_group",
+        "topics": "terms_topics_group",
+        "urlname": "url_of_group"
+    }
+}
\ No newline at end of file
diff --git a/nesta/core/schemas/tier_1/datasets/nih.json b/nesta/core/schemas/tier_1/datasets/nih.json
new file mode 100644
index 00000000..bfda383b
--- /dev/null
+++ b/nesta/core/schemas/tier_1/datasets/nih.json
@@ -0,0 +1,35 @@
+{
+    "entity_type": "paper",
+    "tier0_to_tier1": {
+        "_booleanFlag_autotranslated_entity": "booleanFlag_autotranslated_entity",
+        "_rank_rhodonite_abstract": "rank_rhodonite_abstract",
+        "_terms_iso2lang_entity": "terms_iso2lang_entity",
+        "_terms_of_countryTags": "terms_of_countryTags",
+        "_terms_of_funders": "terms_of_funders",
+        "_total_cost_usd2018": "_cost_usd2018_project",
+        "abstract_text": "textBody_abstract_project",
+        "city": "placeName_city_organisation",
+        "continent": "id_of_continent",
+        "coordinates": "coordinate_of_organisation",
+        "country": "placeName_country_organisation",
+        "country_alpha_2": "id_iso2_country",
+        "country_alpha_3": "id_iso3_country",
+        "country_numeric": "id_isoNumeric_country",
+        "duplicate_abstract": "booleanFlag_duplicate_abstract",
+        "full_project_num": "id_of_project",
+        "fy": "year_fiscal_funding",
+        "mesh_terms": "terms_mesh_abstract",
+        "org_name": "title_of_organisation",
+        "org_state": "id_state_organisation",
+        "org_zipcode": "placeName_zipcode_organisation",
+        "phr": "textBody_descriptive_project",
+        "placeName_continent_organisation": "placeName_continent_organisation",
+        "placeName_state_organisation": "placeName_state_organisation",
+        "project_end": "date_end_project",
+        "project_start": "date_start_project",
+        "project_terms": "terms_descriptive_project",
+        "project_title": "title_of_project",
+        "total_cost": "cost_total_project",
+        "total_cost_currency": "currency_total_cost"
+    }
+}
\ No newline at end of file
diff --git a/nesta/core/schemas/tier_1/datasets/patstat.json b/nesta/core/schemas/tier_1/datasets/patstat.json
new file mode 100644
index 00000000..96bfbfc3
--- /dev/null
+++ b/nesta/core/schemas/tier_1/datasets/patstat.json
@@ -0,0 +1,18 @@
+{
+    "entity_type": "patent",
+    "tier0_to_tier1": {
+        "abstract": "textBody_abstract_patent",
+        "appln_auth": "terms_authCountry_patent",
+        "ctry": "terms_personCountry_patent",
+        "earliest_filing_date": "date_of_patent",
+        "earliest_filing_year": "year_of_patent",
+        "id": "id_family_patent",
+        "ipc": "terms_ipc_patent",
+        "is_eu": "booleanFlag_eu_patent",
+        "nace2": "terms_nace2_patent",
+        "nb_citing_docdb_fam": "count_citations_patent",
+        "nuts": "terms_personNuts_patent",
+        "tech": "terms_techFieldNumber_patent",
+        "title": "title_of_patent"
+    }
+}
\ No newline at end of file
diff --git a/nesta/core/schemas/tier_1/mappings/README.rst b/nesta/core/schemas/tier_1/mappings/README.rst
new file mode 100644
index 00000000..d9b98a11
--- /dev/null
+++ b/nesta/core/schemas/tier_1/mappings/README.rst
@@ -0,0 +1,148 @@
+Elasticsearch mappings
+======================
+
+Our methodology for constructing Elasticsearch mappings is described here. It is intended to minimise duplication of efforts and enforce standardisation when referring to a common dataset whilst being flexible to individual project needs. It is implied in our framework that a single :code:`dataset` can be used across many projects, and each project is mapped to a single :code:`endpoint`. It is useful to start by looking at the structure of the :code:`nesta/core/schemas/tier_1/mappings/` directory:
+
+.. code-block:: bash
+
+    .
+    ├── datasets
+    │   ├── arxiv_mapping.json
+    │   ├── companies_mapping.json
+    │   ├── cordis_mapping.json
+    │   ├── gtr_mapping.json
+    │   ├── meetup_mapping.json
+    │   ├── nih_mapping.json
+    │   └── patstat_mapping.json
+    ├── defaults
+    │   └── defaults.json
+    └── endpoints
+	├── arxlive
+	│   └── arxiv_mapping.json
+	├── eurito
+	│   ├── arxiv_mapping.json
+	│   ├── companies_mapping.json
+	│   └── patstat_mapping.json
+	└── health-scanner
+	    ├── aliases.json
+	    ├── config.yaml
+	    ├── nih_mapping.json
+	    └── nulls.json
+
+Firstly we consider :code:`defaults/defaults.json` which should contain all default fields for all mappings - for example standard analyzers and dynamic strictness. We might also consider putting global fields there.
+
+Next consider the :code:`datasets` subdirectory. Each mapping file in here should contain the complete :code:`mappings` field for the respective dataset. The naming convention :code:`<dataset>_mapping.json` is a hard requirement, as :code:`<dataset>` will map to the index for this :code:`dataset` at any given :code:`endpoint`.
+
+Finally consider the :code:`endpoints` subdirectory. Each sub-subdirectory here should map to any :code:`endpoint` which requires changes beyond the :code:`defaults` and :code:`datasets` mappings. Each mapping file within each :code:`endpoint` sub-subdirectory (e.g. :code:`arxlive` or :code:`health-scanner`) should satisfy the same naming convention (:code:`<dataset>_mapping.json`). All conventions here are also consistent with the :code:`elasticsearch.yaml` configuration file (to see this configuration, you will need to clone the repo and follow `these steps <https://nesta.readthedocs.io/en/dev/nesta.core.troubleshooting.html#where-is-the-latest-config>`_ to unencrypt the config), which looks a little like this:
+
+
+.. code-block:: yaml
+
+    ## The following assumes the AWS host endpoing naming convention:
+    ## {scheme}://search-{endpoint}-{id}.{region}.es.amazonaws.com
+    defaults:
+      scheme: https
+      port: 443
+      region: eu-west-2
+      type: _doc
+    endpoints:
+      # -------------------------------
+      # <AWS endpoint domain name>:
+      #   id: <AWS endpoint UUID>
+      #   <default override key>: <default override value>  ## e.g.: scheme, port, region, _type
+      #   indexes:
+      #     <index name>: <incremental version number>  ## Note: defaults to <index name>_dev in testing mode
+      # -------------------------------
+      arxlive:
+	id: <this is a secret>
+	indexes:
+	  arxiv: 4
+      # -------------------------------
+      health-scanner:
+	id: <this is a secret>
+	indexes:
+	  nih: 6
+	  companies: 5
+	  meetup: 4
+    ... etc ...
+
+Note that for the :code:`health-scanner` endpoint, :code:`companies` and :code:`meetup` will be generated from the :code:`datasets` mappings, as they are not specified under the :code:`endpoints/health-scanner` subdirectory. Also note that :code:`endpoints` sub-directories do not need to exist for each :code:`endpoint` to be generated: the mappings will simply be generated from the dataset defaults. For example, a new endpoint :code:`general` can be generated from the DAPS codebase using the above, even though there is no :code:`endpoints/general` sub-subdirectory.
+
+Individual :code:`endpoints` can also specify :code:`aliases.json` which harmonises field names across datasets for specific endpoints. This uses a convention as follows:
+
+.. code-block:: python
+
+    {
+	#...the convention is...
+	"<new field name>": {
+	    "<dataset 1>": "<old field name 1>",
+	    "<dataset 2>": "<old field name 2>",
+	    "<dataset 3>": "<old field name 3>"
+	},
+	#...an example is...
+	"city": {
+	    "companies": "placeName_city_organisation",
+	    "meetup": "placeName_city_group",
+	    "nih": "placeName_city_organisation"
+	},
+	#...etc...#
+    }
+
+By default, this applies (what Joel calls) a "soft" alias, which is an `Elasticsearch alias <https://www.elastic.co/guide/en/elasticsearch/reference/current/alias.html>`_, however by specifying :code:`hard-alias=true` in :code:`config.yaml` (see :code:`health-scanner` above), the alias is instead applied directly (i.e. field names are physically replaced, not aliased).
+
+You will also notice the :code:`nulls.json` file in the :code:`health-scanner` endpoint. This is a relatively experimental feature for automatically nullifying values on ingestion through ElasticsearchPlus, in lieu of proper exploratory data analysis. The logic and format for this `is documented here <https://github.com/nestauk/nesta/blob/dev/nesta/core/luigihacks/elasticsearchplus.py#L414>`_.
+
+Mapping construction hierarchy
+------------------------------
+
+Each mapping is constructed by overriding nested fields using the :code:`defaults` :code:`datasets` and :code:`endpoints`, in that order (i.e. :code:`endpoints` override nested fields in :code:`datasets`, and :code:`datasets` override those in :code:`defaults`). If you would like to "switch off" a field from the :code:`defaults` or :code:`datasets` mappings, you should set the value of the nested field to :code:`null`. For example:
+
+.. code-block:: javascript
+
+    {
+	"mappings": {
+	    "_doc": {
+		"dynamic": "strict",
+		"properties": {
+		    "placeName_zipcode_organisation": null
+		}
+	    }
+	}
+    }
+
+will simply "switch off" the field :code:`placeName_zipcode_organisation`, which was specified in :code:`datasets`.
+
+The logic for the mapping construction hierarchy is demonstrated in the respective :code:`orms.orm_utils.get_es_mapping` function:
+
+
+.. code-block:: python
+
+    def get_es_mapping(dataset, endpoint):
+	'''Load the ES mapping for this dataset and endpoint,
+	including aliases.
+
+	Args:
+	    dataset (str): Name of the dataset for the ES mapping.
+	    endpoint (str): Name of the AWS ES endpoint.
+	Returns:
+	    :obj:`dict`
+	'''
+	mapping = _get_es_mapping(dataset, endpoint)
+        _apply_alias(mapping, dataset, endpoint)
+	_prune_nested(mapping)  # prunes any nested keys with null values
+	return mapping
+
+Integrated tests
+----------------
+
+The following :code:`pytest` tests are made (and triggered on PR via travis):
+
+- :code:`aliases.json` files are checked for consistency with available :code:`datasets`.
+- All mappings for each in :code:`datasets` and :code:`endpoints` are fully generated, and tested for compatibility with the schema transformations (which are, in turn, checked against the valid ontology in :code:`ontology.json`).
+
+Features in DAPS2
+-----------------
+
+- The index version (e.g. :code:`'arxiv': 4` in :code:`elasticsearch.yaml`) will be automatically generated from semantic versioning and the git hash in DAPS2, therefore the :code:`indexes` field will consolidate to an itemised list of indexes.
+- The mappings under :code:`datasets` will be automatically generated from the open ontology which will be baked into the tier-0 schemas. This will render :code:`schema_transformations` redundant.
+- Elasticsearch components will be factored out of :code:`orm_utils`.
diff --git a/nesta/core/schemas/tier_1/mappings/datasets/arxiv_mapping.json b/nesta/core/schemas/tier_1/mappings/datasets/arxiv_mapping.json
new file mode 100644
index 00000000..401d1bf3
--- /dev/null
+++ b/nesta/core/schemas/tier_1/mappings/datasets/arxiv_mapping.json
@@ -0,0 +1,65 @@
+{
+    "mappings": {
+        "_doc": {
+            "properties": {
+                "booleanFlag_multinational_article": {
+                    "type": "boolean"
+                },
+                "count_citations_article": {
+                    "type": "integer"
+                },
+                "id_digitalObjectIdentifier_article": {
+                    "type": "keyword"
+                },
+                "metric_novelty_article": {
+                    "type": "float"
+                },
+                "terms_authors_article": {
+                    "analyzer": "terms_analyzer",
+                    "fields": {
+                        "keyword": {
+                            "type": "keyword"
+                        }
+                    },
+                    "type": "text"
+                },
+                "terms_institutes_article": {
+                    "analyzer": "terms_analyzer",
+                    "fields": {
+                        "keyword": {
+                            "type": "keyword"
+                        }
+                    },
+                    "type": "text"
+                },
+                "textBody_abstract_article": {
+                    "fields": {
+                        "keyword": {
+                            "type": "keyword"
+                        }
+                    },
+                    "type": "text"
+                },
+                "title_of_article": {
+                    "fields": {
+                        "keyword": {
+                            "type": "keyword"
+                        }
+                    },
+                    "type": "text"
+                },
+                "url_of_article": {
+                    "fields": {
+                        "keyword": {
+                            "type": "keyword"
+                        }
+                    },
+                    "type": "text"
+                },
+                "year_of_article": {
+                    "type": "integer"
+                }
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/nesta/core/orms/crunchbase_es_config.json b/nesta/core/schemas/tier_1/mappings/datasets/companies_mapping.json
similarity index 89%
rename from nesta/core/orms/crunchbase_es_config.json
rename to nesta/core/schemas/tier_1/mappings/datasets/companies_mapping.json
index 0d3362f1..ccf22c71 100644
--- a/nesta/core/orms/crunchbase_es_config.json
+++ b/nesta/core/schemas/tier_1/mappings/datasets/companies_mapping.json
@@ -1,7 +1,6 @@
 {
     "mappings": {
         "_doc": {
-            "dynamic": "strict",
             "properties": {
                 "_cost_usd2018_organisation": {
                     "type": "float"
@@ -42,8 +41,8 @@
                     "format": "yyyy-MM-dd",
                     "type": "date"
                 },
-                "datetime_updated_organisation": {
-                    "format": "yyyy-MM-dd HH:mm:ss",
+                "date_updated_organisation": {
+                    "format": "yyyy-MM-dd",
                     "type": "date"
                 },
                 "id_continent_organisation": {
@@ -128,7 +127,7 @@
                     "type": "keyword"
                 },
                 "terms_mesh_description": {
-                    "analyzer": "mesh_terms_analyzer",
+                    "analyzer": "terms_analyzer",
                     "fields": {
                         "keyword": {
                             "type": "keyword"
@@ -140,7 +139,7 @@
                     "type": "keyword"
                 },
                 "terms_of_funders": {
-                    "analyzer": "mesh_terms_analyzer",
+                    "analyzer": "terms_analyzer",
                     "fields": {
                         "keyword": {
                             "type": "keyword"
@@ -149,7 +148,7 @@
                     "type": "text"
                 },
                 "terms_roles_organisation": {
-                    "analyzer": "mesh_terms_analyzer",
+                    "analyzer": "terms_analyzer",
                     "fields": {
                         "keyword": {
                             "type": "keyword"
@@ -158,7 +157,7 @@
                     "type": "text"
                 },
                 "terms_subcategory_organisation": {
-                    "analyzer": "mesh_terms_analyzer",
+                    "analyzer": "terms_analyzer",
                     "fields": {
                         "keyword": {
                             "type": "keyword"
@@ -166,6 +165,9 @@
                     },
                     "type": "text"
                 },
+                "terms_tokens_entity": {
+                    "type": "keyword"
+                },
                 "textBody_descriptive_organisation": {
                     "fields": {
                         "keyword": {
@@ -230,24 +232,5 @@
                 }
             }
         }
-    },
-    "settings": {
-        "analysis": {
-            "analyzer": {
-                "mesh_terms_analyzer": {
-                    "filter": [
-                        "standard",
-                        "lowercase",
-                        "stop"
-                    ],
-                    "tokenizer": "standard",
-                    "type": "custom"
-                }
-            }
-        },
-        "index": {
-            "number_of_replicas": "1",
-            "number_of_shards": "5"
-        }
     }
-}
+}
\ No newline at end of file
diff --git a/nesta/core/schemas/tier_1/mappings/datasets/cordis_mapping.json b/nesta/core/schemas/tier_1/mappings/datasets/cordis_mapping.json
new file mode 100644
index 00000000..e0d6685f
--- /dev/null
+++ b/nesta/core/schemas/tier_1/mappings/datasets/cordis_mapping.json
@@ -0,0 +1,68 @@
+{
+    "mappings": {
+        "dynamic": "strict",
+        "properties": {
+            "cost_ecFunding_project": {
+                "type": "integer"
+            },
+            "cost_total_project": {
+                "type": "integer"
+            },
+            "date_ended_project": {
+                "format": "yyyy-MM-dd",
+                "type": "date"
+            },
+            "date_started_project": {
+                "format": "yyyy-MM-dd",
+                "type": "date"
+            },
+            "metric_novelty_project": {
+                "type": "float"
+            },
+            "name_framework_project": {
+                "fields": {
+                    "keyword": {
+                        "type": "keyword"
+                    }
+                },
+                "type": "text"
+            },
+            "status_of_project": {
+                "type": "keyword"
+            },
+            "terms_tokens_entity": {
+                "type": "keyword"
+            },
+            "textBody_description_project": {
+                "fields": {
+                    "keyword": {
+                        "type": "keyword"
+                    }
+                },
+                "type": "text"
+            },
+            "title_of_project": {
+                "fields": {
+                    "keyword": {
+                        "type": "keyword"
+                    }
+                },
+                "type": "text"
+            },
+            "type_of_entity": {
+                "type": "keyword"
+            },
+            "url_of_project": {
+                "fields": {
+                    "keyword": {
+                        "type": "keyword"
+                    }
+                },
+                "type": "text"
+            },
+            "year_of_project": {
+                "type": "integer"
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/nesta/core/orms/meetup_es_config.json b/nesta/core/schemas/tier_1/mappings/datasets/meetup_mapping.json
similarity index 87%
rename from nesta/core/orms/meetup_es_config.json
rename to nesta/core/schemas/tier_1/mappings/datasets/meetup_mapping.json
index f1d14cb6..49558368 100644
--- a/nesta/core/orms/meetup_es_config.json
+++ b/nesta/core/schemas/tier_1/mappings/datasets/meetup_mapping.json
@@ -15,7 +15,7 @@
                     "type": "text"
                 },
                 "_terms_memberOrigin_group": {
-                    "analyzer": "mesh_terms_analyzer",
+                    "analyzer": "terms_analyzer",
                     "fields": {
                         "keyword": {
                             "type": "keyword"
@@ -104,7 +104,7 @@
                     "type": "keyword"
                 },
                 "terms_mesh_group": {
-                    "analyzer": "mesh_terms_analyzer",
+                    "analyzer": "terms_analyzer",
                     "fields": {
                         "keyword": {
                             "type": "keyword"
@@ -116,7 +116,7 @@
                     "type": "keyword"
                 },
                 "terms_topics_group": {
-                    "analyzer": "mesh_terms_analyzer",
+                    "analyzer": "terms_analyzer",
                     "fields": {
                         "keyword": {
                             "type": "keyword"
@@ -145,24 +145,5 @@
                 }
             }
         }
-    },
-    "settings": {
-        "analysis": {
-            "analyzer": {
-                "mesh_terms_analyzer": {
-                    "filter": [
-                        "standard",
-                        "lowercase",
-                        "stop"
-                    ],
-                    "tokenizer": "standard",
-                    "type": "custom"
-                }
-            }
-        },
-        "index": {
-            "number_of_replicas": "1",
-            "number_of_shards": "5"
-        }
     }
-}
+}
\ No newline at end of file
diff --git a/nesta/core/orms/nih_es_config.json b/nesta/core/schemas/tier_1/mappings/datasets/nih_mapping.json
similarity index 90%
rename from nesta/core/orms/nih_es_config.json
rename to nesta/core/schemas/tier_1/mappings/datasets/nih_mapping.json
index d4901b66..864e8dc4 100644
--- a/nesta/core/orms/nih_es_config.json
+++ b/nesta/core/schemas/tier_1/mappings/datasets/nih_mapping.json
@@ -108,7 +108,7 @@
                     "type": "text"
                 },
                 "terms_mesh_abstract": {
-                    "analyzer": "mesh_terms_analyzer",
+                    "analyzer": "terms_analyzer",
                     "fields": {
                         "keyword": {
                             "type": "keyword"
@@ -120,7 +120,7 @@
                     "type": "keyword"
                 },
                 "terms_of_funders": {
-                    "analyzer": "mesh_terms_analyzer",
+                    "analyzer": "terms_analyzer",
                     "fields": {
                         "keyword": {
                             "type": "keyword"
@@ -179,24 +179,5 @@
                 }
             }
         }
-    },
-    "settings": {
-        "analysis": {
-            "analyzer": {
-                "mesh_terms_analyzer": {
-                    "filter": [
-                        "standard",
-                        "lowercase",
-                        "stop"
-                    ],
-                    "tokenizer": "standard",
-                    "type": "custom"
-                }
-            }
-        },
-        "index": {
-            "number_of_replicas": "1",
-            "number_of_shards": "5"
-        }
     }
-}
+}
\ No newline at end of file
diff --git a/nesta/core/orms/patstat-eu_es_config.json b/nesta/core/schemas/tier_1/mappings/datasets/patstat_mapping.json
similarity index 85%
rename from nesta/core/orms/patstat-eu_es_config.json
rename to nesta/core/schemas/tier_1/mappings/datasets/patstat_mapping.json
index 43dea5a6..2b93cb51 100644
--- a/nesta/core/orms/patstat-eu_es_config.json
+++ b/nesta/core/schemas/tier_1/mappings/datasets/patstat_mapping.json
@@ -6,9 +6,6 @@
                 "booleanFlag_autotranslated_entity": {
                     "type": "boolean"
                 },
-                "booleanFlag_eu_patent": {
-                    "type": "boolean"
-                },
                 "count_citations_patent": {
                     "type": "integer"
                 },
@@ -109,24 +106,5 @@
                 }
             }
         }
-    },
-    "settings": {
-        "analysis": {
-            "analyzer": {
-                "terms_analyzer": {
-                    "filter": [
-                        "standard",
-                        "lowercase",
-                        "stop"
-                    ],
-                    "tokenizer": "standard",
-                    "type": "custom"
-                }
-            }
-        },
-        "index": {
-            "number_of_replicas": "1",
-            "number_of_shards": "5"
-        }
     }
-}
+}
\ No newline at end of file
diff --git a/nesta/core/schemas/tier_1/mappings/defaults/defaults.json b/nesta/core/schemas/tier_1/mappings/defaults/defaults.json
new file mode 100644
index 00000000..6a853756
--- /dev/null
+++ b/nesta/core/schemas/tier_1/mappings/defaults/defaults.json
@@ -0,0 +1,24 @@
+{
+    "mappings": {
+        "_doc": {
+            "dynamic": "strict"
+        }
+    },
+    "settings": {
+        "index": {
+            "analysis": {
+                "analyzer": {
+                    "terms_analyzer": {
+                        "filter": [
+                            "standard",
+                            "lowercase",
+                            "stop"
+                        ],
+                        "tokenizer": "standard",
+                        "type": "custom"
+                    }
+                }
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/nesta/core/schemas/tier_1/mappings/endpoints/arxlive/arxiv_mapping.json b/nesta/core/schemas/tier_1/mappings/endpoints/arxlive/arxiv_mapping.json
new file mode 100644
index 00000000..0b114844
--- /dev/null
+++ b/nesta/core/schemas/tier_1/mappings/endpoints/arxlive/arxiv_mapping.json
@@ -0,0 +1,76 @@
+{
+    "mappings": {
+        "_doc": {
+            "properties": {
+                "date_created_article": {
+                    "type": "date"
+                },
+                "json_category_article": {
+                    "properties": {
+                        "ancestors": {
+                            "type": "keyword"
+                        },
+                        "level": {
+                            "type": "integer"
+                        },
+                        "order": {
+                            "type": "integer"
+                        },
+                        "value": {
+                            "type": "keyword"
+                        }
+                    },
+                    "type": "nested"
+                },
+                "json_fieldsOfStudy_article": {
+                    "properties": {
+                        "ancestors": {
+                            "type": "keyword"
+                        },
+                        "level": {
+                            "type": "integer"
+                        },
+                        "order": {
+                            "type": "integer"
+                        },
+                        "value": {
+                            "type": "keyword"
+                        }
+                    },
+                    "type": "nested"
+                },
+                "json_location_article": {
+                    "properties": {
+                        "ancestors": {
+                            "type": "keyword"
+                        },
+                        "level": {
+                            "type": "integer"
+                        },
+                        "order": {
+                            "type": "integer"
+                        },
+                        "value": {
+                            "type": "keyword"
+                        }
+                    },
+                    "type": "nested"
+                },
+                "metric_citations_article": {
+                    "type": "float"
+                },
+                "terms_tokens_article": {
+                    "type": "keyword"
+                },
+                "type_of_entity": {
+                    "fields": {
+                        "keyword": {
+                            "type": "keyword"
+                        }
+                    },
+                    "type": "text"
+                }
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/nesta/core/orms/arxiv-eu_es_config.json b/nesta/core/schemas/tier_1/mappings/endpoints/eurito-dev/arxiv_mapping.json
similarity index 56%
rename from nesta/core/orms/arxiv-eu_es_config.json
rename to nesta/core/schemas/tier_1/mappings/endpoints/eurito-dev/arxiv_mapping.json
index 6b3cf02a..2823ea20 100644
--- a/nesta/core/orms/arxiv-eu_es_config.json
+++ b/nesta/core/schemas/tier_1/mappings/endpoints/eurito-dev/arxiv_mapping.json
@@ -1,40 +1,18 @@
 {
     "mappings": {
         "_doc": {
-            "dynamic": "strict",
             "properties": {
                 "booleanFlag_eu_article": {
                     "type": "boolean"
                 },
-                "booleanFlag_multinational_article": {
-                    "type": "boolean"
-                },
-                "count_citations_article": {
-                    "type": "integer"
-                },
                 "date_created_article": {
                     "format": "yyyy-MM-dd",
                     "type": "date"
                 },
-                "id_digitalObjectIdentifier_article": {
-                    "type": "keyword"
-                },
                 "json_fieldsOfStudy_article": {
                     "dynamic": true,
                     "properties": {}
                 },
-                "metric_novelty_article": {
-                    "type": "float"
-                },
-                "terms_authors_article": {
-                    "analyzer": "terms_analyzer",
-                    "fields": {
-                        "keyword": {
-                            "type": "keyword"
-                        }
-                    },
-                    "type": "text"
-                },
                 "terms_category_article": {
                     "analyzer": "terms_analyzer",
                     "fields": {
@@ -62,15 +40,6 @@
                     },
                     "type": "text"
                 },
-                "terms_institutes_article": {
-                    "analyzer": "terms_analyzer",
-                    "fields": {
-                        "keyword": {
-                            "type": "keyword"
-                        }
-                    },
-                    "type": "text"
-                },
                 "terms_nuts0_article": {
                     "analyzer": "terms_analyzer",
                     "fields": {
@@ -119,56 +88,10 @@
                 "terms_tokens_entity": {
                     "type": "keyword"
                 },
-                "textBody_abstract_article": {
-                    "fields": {
-                        "keyword": {
-                            "type": "keyword"
-                        }
-                    },
-                    "type": "text"
-                },
-                "title_of_article": {
-                    "fields": {
-                        "keyword": {
-                            "type": "keyword"
-                        }
-                    },
-                    "type": "text"
-                },
                 "type_of_entity": {
                     "type": "keyword"
-                },
-                "url_of_article": {
-                    "fields": {
-                        "keyword": {
-                            "type": "keyword"
-                        }
-                    },
-                    "type": "text"
-                },
-                "year_of_article": {
-                    "type": "integer"
-                }
-            }
-        }
-    },
-    "settings": {
-        "analysis": {
-            "analyzer": {
-                "terms_analyzer": {
-                    "filter": [
-                        "standard",
-                        "lowercase",
-                        "stop"
-                    ],
-                    "tokenizer": "standard",
-                    "type": "custom"
                 }
             }
-        },
-        "index": {
-            "number_of_replicas": "1",
-            "number_of_shards": "5"
         }
     }
-}
+}
\ No newline at end of file
diff --git a/nesta/core/schemas/tier_1/mappings/endpoints/eurito-dev/companies_mapping.json b/nesta/core/schemas/tier_1/mappings/endpoints/eurito-dev/companies_mapping.json
new file mode 100644
index 00000000..a2496aa3
--- /dev/null
+++ b/nesta/core/schemas/tier_1/mappings/endpoints/eurito-dev/companies_mapping.json
@@ -0,0 +1,14 @@
+{
+    "mappings": {
+        "_doc": {
+            "properties": {
+                "booleanFlag_eu_organisation": {
+                    "type": "boolean"
+                },
+                "metric_novelty_organisation": {
+                    "type": "float"
+                }
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/nesta/core/orms/cordis-eu_es_config.json b/nesta/core/schemas/tier_1/mappings/endpoints/eurito-dev/cordis_mapping.json
similarity index 81%
rename from nesta/core/orms/cordis-eu_es_config.json
rename to nesta/core/schemas/tier_1/mappings/endpoints/eurito-dev/cordis_mapping.json
index 0e5cae2d..115f354f 100644
--- a/nesta/core/orms/cordis-eu_es_config.json
+++ b/nesta/core/schemas/tier_1/mappings/endpoints/eurito-dev/cordis_mapping.json
@@ -65,25 +65,8 @@
                     "type": "integer"
                 }
             }
-        }
-    },
-    "settings": {
-        "analysis": {
-            "analyzer": {
-                "terms_analyzer": {
-                    "filter": [
-                        "standard",
-                        "lowercase",
-                        "stop"
-                    ],
-                    "tokenizer": "standard",
-                    "type": "custom"
-                }
-            }
         },
-        "index": {
-            "number_of_replicas": "1",
-            "number_of_shards": "5"
-        }
+        "dynamic": null,
+        "properties": null
     }
-}
+}
\ No newline at end of file
diff --git a/nesta/core/schemas/tier_1/mappings/endpoints/eurito-dev/patstat_mapping.json b/nesta/core/schemas/tier_1/mappings/endpoints/eurito-dev/patstat_mapping.json
new file mode 100644
index 00000000..65a50f06
--- /dev/null
+++ b/nesta/core/schemas/tier_1/mappings/endpoints/eurito-dev/patstat_mapping.json
@@ -0,0 +1,12 @@
+{
+    "mappings": {
+        "_doc": {
+            "dynamic": "strict",
+            "properties": {
+                "booleanFlag_eu_patent": {
+                    "type": "boolean"
+                }
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/nesta/core/schemas/tier_1/mappings/endpoints/eurito/arxiv_mapping.json b/nesta/core/schemas/tier_1/mappings/endpoints/eurito/arxiv_mapping.json
new file mode 100644
index 00000000..2823ea20
--- /dev/null
+++ b/nesta/core/schemas/tier_1/mappings/endpoints/eurito/arxiv_mapping.json
@@ -0,0 +1,97 @@
+{
+    "mappings": {
+        "_doc": {
+            "properties": {
+                "booleanFlag_eu_article": {
+                    "type": "boolean"
+                },
+                "date_created_article": {
+                    "format": "yyyy-MM-dd",
+                    "type": "date"
+                },
+                "json_fieldsOfStudy_article": {
+                    "dynamic": true,
+                    "properties": {}
+                },
+                "terms_category_article": {
+                    "analyzer": "terms_analyzer",
+                    "fields": {
+                        "keyword": {
+                            "type": "keyword"
+                        }
+                    },
+                    "type": "text"
+                },
+                "terms_countries_article": {
+                    "analyzer": "terms_analyzer",
+                    "fields": {
+                        "keyword": {
+                            "type": "keyword"
+                        }
+                    },
+                    "type": "text"
+                },
+                "terms_fieldsOfStudy_article": {
+                    "analyzer": "terms_analyzer",
+                    "fields": {
+                        "keyword": {
+                            "type": "keyword"
+                        }
+                    },
+                    "type": "text"
+                },
+                "terms_nuts0_article": {
+                    "analyzer": "terms_analyzer",
+                    "fields": {
+                        "keyword": {
+                            "type": "keyword"
+                        }
+                    },
+                    "type": "text"
+                },
+                "terms_nuts1_article": {
+                    "analyzer": "terms_analyzer",
+                    "fields": {
+                        "keyword": {
+                            "type": "keyword"
+                        }
+                    },
+                    "type": "text"
+                },
+                "terms_nuts2_article": {
+                    "analyzer": "terms_analyzer",
+                    "fields": {
+                        "keyword": {
+                            "type": "keyword"
+                        }
+                    },
+                    "type": "text"
+                },
+                "terms_nuts3_article": {
+                    "analyzer": "terms_analyzer",
+                    "fields": {
+                        "keyword": {
+                            "type": "keyword"
+                        }
+                    },
+                    "type": "text"
+                },
+                "terms_regions_article": {
+                    "analyzer": "terms_analyzer",
+                    "fields": {
+                        "keyword": {
+                            "type": "keyword"
+                        }
+                    },
+                    "type": "text"
+                },
+                "terms_tokens_entity": {
+                    "type": "keyword"
+                },
+                "type_of_entity": {
+                    "type": "keyword"
+                }
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/nesta/core/schemas/tier_1/mappings/endpoints/eurito/companies_mapping.json b/nesta/core/schemas/tier_1/mappings/endpoints/eurito/companies_mapping.json
new file mode 100644
index 00000000..a2496aa3
--- /dev/null
+++ b/nesta/core/schemas/tier_1/mappings/endpoints/eurito/companies_mapping.json
@@ -0,0 +1,14 @@
+{
+    "mappings": {
+        "_doc": {
+            "properties": {
+                "booleanFlag_eu_organisation": {
+                    "type": "boolean"
+                },
+                "metric_novelty_organisation": {
+                    "type": "float"
+                }
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/nesta/core/schemas/tier_1/mappings/endpoints/eurito/patstat_mapping.json b/nesta/core/schemas/tier_1/mappings/endpoints/eurito/patstat_mapping.json
new file mode 100644
index 00000000..65a50f06
--- /dev/null
+++ b/nesta/core/schemas/tier_1/mappings/endpoints/eurito/patstat_mapping.json
@@ -0,0 +1,12 @@
+{
+    "mappings": {
+        "_doc": {
+            "dynamic": "strict",
+            "properties": {
+                "booleanFlag_eu_patent": {
+                    "type": "boolean"
+                }
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/nesta/core/schemas/tier_1/aliases/health_scanner.json b/nesta/core/schemas/tier_1/mappings/endpoints/health-scanner/aliases.json
similarity index 63%
rename from nesta/core/schemas/tier_1/aliases/health_scanner.json
rename to nesta/core/schemas/tier_1/mappings/endpoints/health-scanner/aliases.json
index d03fce7b..f1431691 100644
--- a/nesta/core/schemas/tier_1/aliases/health_scanner.json
+++ b/nesta/core/schemas/tier_1/mappings/endpoints/health-scanner/aliases.json
@@ -1,64 +1,64 @@
 {
     "body": {
-        "crunchbase": "textBody_descriptive_organisation",
+        "companies": "textBody_descriptive_organisation",
         "meetup": "textBody_descriptive_group",
         "nih": "textBody_descriptive_project"
     },
     "city": {
-        "crunchbase": "placeName_city_organisation",
+        "companies": "placeName_city_organisation",
         "meetup": "placeName_city_group",
         "nih": "placeName_city_organisation"
     },
     "continent": {
-        "crunchbase": "placeName_continent_organisation",
+        "companies": "placeName_continent_organisation",
         "meetup": "placeName_continent_group",
         "nih": "placeName_continent_organisation"
     },
     "continent_id": {
-        "crunchbase": "id_continent_organisation",
+        "companies": "id_of_continent",
         "meetup": "id_continent_group",
         "nih": "id_of_continent"
     },
     "cost": {
-        "crunchbase": "cost_of_funding",
+        "companies": "cost_of_funding",
         "nih": "cost_total_project"
     },
     "cost_ref": {
-        "crunchbase": "cost_of_funding",
+        "companies": "cost_of_funding",
         "nih": "cost_total_project"
     },
     "countries_ids": {
-        "crunchbase": "terms_of_countryTags",
+        "companies": "terms_of_countryTags",
         "meetup": "terms_of_countryTags",
         "nih": "terms_of_countryTags"
     },
     "country": {
-        "crunchbase": "placeName_country_organisation",
+        "companies": "placeName_country_organisation",
         "meetup": "placeName_country_group",
         "nih": "placeName_country_organisation"
     },
     "country_id": {
-        "crunchbase": "id_iso2_country",
+        "companies": "id_iso2_country",
         "meetup": "id_iso2_country",
         "nih": "id_iso2_country"
     },
     "currency": {
-        "crunchbase": "currency_of_funding",
+        "companies": "currency_of_funding",
         "nih": "currency_total_cost"
     },
     "end": {
-        "crunchbase": "date_death_organisation",
+        "companies": "date_death_organisation",
         "nih": "date_end_project"
     },
     "funders": {
-        "crunchbase": "terms_of_funders",
+        "companies": "terms_of_funders",
         "nih": "terms_of_funders"
     },
     "is_duplicate": {
         "nih": "booleanFlag_duplicate_abstract"
     },
     "is_health_related": {
-        "crunchbase": "booleanFlag_health_organisation"
+        "companies": "booleanFlag_health_organisation"
     },
     "is_translated": {
         "meetup": "booleanFlag_autotranslated_entity"
@@ -67,48 +67,43 @@
         "meetup": "terms_iso2lang_entity"
     },
     "location": {
-        "crunchbase": "coordinate_of_city",
+        "companies": "coordinate_of_city",
         "meetup": "coordinate_of_group",
         "nih": "coordinate_of_organisation"
     },
     "name": {
-        "crunchbase": "name_of_organisation",
+        "companies": "name_of_organisation",
         "meetup": "name_of_group",
         "nih": "title_of_organisation"
     },
     "novelty": {
-        "crunchbase": "rank_rhodonite_organisation",
+        "companies": "rank_rhodonite_organisation",
         "meetup": "rank_rhodonite_group",
         "nih": "rank_rhodonite_abstract"
     },
     "region": {
-        "crunchbase": "placeName_region_organisation"
-    },
-    "sdg_labels": {
-        "crunchbase": "_terms_sdg_summary",
-        "meetup": "_terms_sdg_description",
-        "nih": "terms_sdg_abstract"
+        "companies": "placeName_region_organisation"
     },
     "start": {
-        "crunchbase": "date_birth_organisation",
+        "companies": "date_birth_organisation",
         "meetup": "date_start_group",
         "nih": "date_start_project"
     },
     "state": {
-        "crunchbase": "placeName_state_organisation",
+        "companies": "placeName_state_organisation",
         "meetup": "_placeName_state_group",
         "nih": "placeName_state_organisation"
     },
     "state_id": {
-        "crunchbase": "id_state_organisation",
+        "companies": "id_state_organisation",
         "nih": "id_state_organisation"
     },
     "summary": {
-        "crunchbase": "textBody_summary_organisation",
+        "companies": "textBody_summary_organisation",
         "nih": "textBody_abstract_project"
     },
     "terms": {
-        "crunchbase": "terms_mesh_description",
+        "companies": "terms_mesh_description",
         "meetup": "terms_mesh_group",
         "nih": "terms_mesh_abstract"
     },
@@ -116,7 +111,7 @@
         "nih": "title_of_project"
     },
     "url": {
-        "crunchbase": "url_of_organisation",
+        "companies": "url_of_organisation",
         "meetup": "url_of_group"
     }
 }
\ No newline at end of file
diff --git a/nesta/core/schemas/tier_1/mappings/endpoints/health-scanner/config.yaml b/nesta/core/schemas/tier_1/mappings/endpoints/health-scanner/config.yaml
new file mode 100644
index 00000000..fc7c9834
--- /dev/null
+++ b/nesta/core/schemas/tier_1/mappings/endpoints/health-scanner/config.yaml
@@ -0,0 +1,3 @@
+# if hard-alias is true, actually change the names of the fields
+# rather than performing an elasticsearch alias
+hard-alias: false
diff --git a/nesta/core/schemas/tier_1/field_null_mappings/health_scanner.json b/nesta/core/schemas/tier_1/mappings/endpoints/health-scanner/nulls.json
similarity index 100%
rename from nesta/core/schemas/tier_1/field_null_mappings/health_scanner.json
rename to nesta/core/schemas/tier_1/mappings/endpoints/health-scanner/nulls.json
diff --git a/nesta/core/schemas/tier_1/tier_1.json b/nesta/core/schemas/tier_1/ontology.json
similarity index 68%
rename from nesta/core/schemas/tier_1/tier_1.json
rename to nesta/core/schemas/tier_1/ontology.json
index 9fc32c95..7f1915b2 100644
--- a/nesta/core/schemas/tier_1/tier_1.json
+++ b/nesta/core/schemas/tier_1/ontology.json
@@ -9,15 +9,12 @@
             "count",
             "currency",
             "date",
-            "datetime",
             "id",
             "json",
-            "language",
-            "level",
             "metric",
             "name",
-            "personName",
             "placeName",
+            "rank",
             "status",
             "terms",
             "textBody",
@@ -31,14 +28,11 @@
         "term": "middleName",
         "values": [
             "abstract",
-            "adminRegion",
-            "ageDepRatioPercWorkAgePop",
             "alias",
-            "arxivSubjectCategory",
             "authCountry",
             "authors",
+            "autotranslated",
             "birth",
-            "capitalCity",
             "category",
             "citations",
             "city",
@@ -47,12 +41,10 @@
             "country",
             "created",
             "crunchBase",
-            "datestamp",
             "death",
             "description",
             "descriptive",
             "digitalObjectIdentifier",
-            "duns",
             "duplicate",
             "ecFunding",
             "employee",
@@ -61,34 +53,19 @@
             "eu",
             "facebook",
             "family",
-            "fieldOfStudy",
-            "fieldOfStudy1",
-            "fieldOfStudy2",
-            "fieldOfStudy3",
-            "fieldOfStudy4",
             "fieldsOfStudy",
             "fiscal",
-            "forked",
-            "forkedFrom",
             "framework",
-            "gini",
             "health",
-            "income",
-            "infMortPer1000LiveBirths",
             "institutes",
             "ipc",
             "iso2",
+            "iso2lang",
             "iso3",
             "isoNumeric",
-            "journalReference",
             "last",
-            "latitude",
-            "lifeExpAtBirth",
             "linkedIn",
             "location",
-            "login",
-            "longitude",
-            "mathSubjectClassification",
             "member",
             "memberOrigin",
             "mesh",
@@ -100,37 +77,26 @@
             "nuts2",
             "nuts3",
             "of",
-            "official",
             "parent",
-            "percPopBelowPovertyLine",
-            "percPopOver25NoEduc",
-            "percPopOver25TertiaryEduc",
-            "percPopRural",
-            "percPopUrban",
             "personCountry",
             "personNuts",
-            "population",
-            "programming",
             "region",
             "regions",
+            "rhodonite",
             "roles",
             "rounds",
-            "short",
             "start",
             "started",
             "state",
             "subcategory",
             "summary",
-            "tech",
             "techFieldNumber",
             "tokens",
             "topics",
             "total",
             "twitter",
-            "update",
             "updated",
             "usd2018",
-            "worldRegion",
             "zipcode"
         ]
     },
@@ -139,21 +105,20 @@
         "values": [
             "abstract",
             "article",
-            "author",
             "category",
             "city",
             "continent",
+            "countryTags",
             "cost",
             "country",
             "description",
+            "entity",
             "funders",
             "funding",
             "group",
-            "member",
             "organisation",
             "patent",
-            "project",
-            "user"
+            "project"
         ]
     }
 ]
\ No newline at end of file
diff --git a/nesta/core/schemas/tier_1/schema_transformations/arxiv.json b/nesta/core/schemas/tier_1/schema_transformations/arxiv.json
deleted file mode 100644
index a480fe30..00000000
--- a/nesta/core/schemas/tier_1/schema_transformations/arxiv.json
+++ /dev/null
@@ -1,67 +0,0 @@
-[
-    {
-        "tier_0": "id",
-        "tier_1": "id_of_article",
-        "unique": true
-    },
-    {
-        "tier_0": "created",
-        "tier_1": "date_created_article"
-    },
-    {
-        "tier_0": "title",
-        "tier_1": "title_of_article"
-    },
-    {
-        "tier_0": "doi",
-        "tier_1": "id_digitalObjectIdentifier_article"
-    },
-    {
-        "tier_0": "abstract",
-        "tier_1": "textBody_abstract_article"
-    },
-    {
-        "tier_0": "authors",
-        "tier_1": "terms_authors_article"
-    },
-    {
-        "tier_0": "citation_count",
-        "tier_1": "count_citations_article"
-    },
-    {
-        "tier_0": "normalised_citation",
-        "tier_1": "metric_citations_article"
-    },
-    {
-        "tier_0": "fos",
-        "tier_1": "json_fieldOfStudy_article"
-    },
-    {
-        "tier_0": "categories",
-        "tier_1": "json_category_article"
-    },
-    {
-        "tier_0": "has_multinational",
-        "tier_1": "booleanFlag_multinational_article"
-    },
-    {
-        "tier_0": "institutes",
-        "tier_1": "terms_institutes_article"
-    },
-    {
-        "tier_0": "tokens",
-        "tier_1": "terms_tokens_article"
-    },
-    {
-        "tier_0": "novelty_of_article",
-        "tier_1": "metric_novelty_article"
-    },
-    {
-        "tier_0": "countries",
-        "tier_1": "json_location_article"
-    },
-    {
-        "tier_0": "year",
-        "tier_1": "year_of_article"
-    }
-]
\ No newline at end of file
diff --git a/nesta/core/schemas/tier_1/schema_transformations/crunchbase_organisation.json b/nesta/core/schemas/tier_1/schema_transformations/crunchbase_organisation.json
deleted file mode 100644
index 5fbcf86a..00000000
--- a/nesta/core/schemas/tier_1/schema_transformations/crunchbase_organisation.json
+++ /dev/null
@@ -1,11 +0,0 @@
-[
-    {
-        "tier_0": "uuid",
-        "tier_1": "id_of_organisation",
-        "unique": true
-    },
-    {
-        "tier_0": "description",
-        "tier_1": "textBody_descriptive_organisation"
-    }
-]
\ No newline at end of file
diff --git a/nesta/core/schemas/tier_1/schema_transformations/crunchbase_organisation_members.json b/nesta/core/schemas/tier_1/schema_transformations/crunchbase_organisation_members.json
deleted file mode 100644
index 18ca49da..00000000
--- a/nesta/core/schemas/tier_1/schema_transformations/crunchbase_organisation_members.json
+++ /dev/null
@@ -1,158 +0,0 @@
-[
-    {
-        "tier_0": "company_name",
-        "tier_1": "name_of_organisation"
-    },
-    {
-        "tier_0": "roles",
-        "tier_1": "terms_roles_organisation"
-    },
-    {
-        "tier_0": "homepage_url",
-        "tier_1": "url_of_organisation"
-    },
-    {
-        "tier_0": "country",
-        "tier_1": "placeName_country_organisation"
-    },
-    {
-        "tier_0": "country_alpha_2",
-        "tier_1": "id_iso2_country"
-    },
-    {
-        "tier_0": "country_alpha_3",
-        "tier_1": "id_iso3_country"
-    },
-    {
-        "tier_0": "country_numeric",
-        "tier_1": "id_isoNumeric_country"
-    },
-    {
-        "tier_0": "continent",
-        "tier_1": "id_of_continent"
-    },
-    {
-        "tier_0": "coordinates",
-        "tier_1": "coordinate_of_city"
-    },
-    {
-        "tier_0": "state_code",
-        "tier_1": "id_state_organisation"
-    },
-    {
-        "tier_0": "region",
-        "tier_1": "placeName_region_organisation"
-    },
-    {
-        "tier_0": "city",
-        "tier_1": "placeName_city_organisation"
-    },
-    {
-        "tier_0": "address",
-        "tier_1": "address_of_organisation"
-    },
-    {
-        "tier_0": "status",
-        "tier_1": "status_of_organisation"
-    },
-    {
-        "tier_0": "short_description",
-        "tier_1": "textBody_summary_organisation"
-    },
-    {
-        "tier_0": "long_description",
-        "tier_1": "textBody_descriptive_organisation"
-    },
-    {
-        "tier_0": "category_list",
-        "tier_1": "terms_subcategory_organisation"
-    },
-    {
-        "tier_0": "category_group_list",
-        "tier_1": "terms_category_organisation"
-    },
-    {
-        "tier_0": "funding_rounds",
-        "tier_1": "count_rounds_funding"
-    },
-    {
-        "tier_0": "funding_total_usd",
-        "tier_1": "cost_of_funding"
-    },
-    {
-        "tier_0": "currency_of_funding",
-        "tier_1": "currency_of_funding"
-    },
-    {
-        "tier_0": "_total_cost_usd2018",
-        "tier_1": "_cost_usd2018_organisation"
-    },
-    {
-        "tier_0": "founded_on",
-        "tier_1": "date_birth_organisation"
-    },
-    {
-        "tier_0": "last_funding_on",
-        "tier_1": "date_last_funding"
-    },
-    {
-        "tier_0": "closed_on",
-        "tier_1": "date_death_organisation"
-    },
-    {
-        "tier_0": "employee_count",
-        "tier_1": "count_employee_organisation"
-    },
-    {
-        "tier_0": "facebook_url",
-        "tier_1": "url_facebook_organisation"
-    },
-    {
-        "tier_0": "linkedin_url",
-        "tier_1": "url_linkedIn_organisation"
-    },
-    {
-        "tier_0": "cb_url",
-        "tier_1": "url_crunchBase_organisation"
-    },
-    {
-        "tier_0": "twitter_url",
-        "tier_1": "url_twitter_organisation"
-    },
-    {
-        "tier_0": "aliases",
-        "tier_1": "terms_alias_organisation"
-    },
-    {
-        "tier_0": "updated_at",
-        "tier_1": "datetime_updated_organisation"
-    },
-    {
-        "tier_0": "primary_role",
-        "tier_1": "type_of_organisation"
-    },
-    {
-        "tier_0": "parent_id",
-        "tier_1": "id_parent_organisation"
-    },
-    {
-        "tier_0": "is_health",
-        "tier_1": "booleanFlag_health_organisation"
-    },
-    {
-        "tier_0": "mesh_terms",
-        "tier_1": "terms_mesh_description"
-    },
-    {
-        "tier_0": "investor_names",
-        "tier_1": "terms_of_funders"
-    },
-    {
-        "tier_0": "placeName_state_organisation",
-        "tier_1": "placeName_state_organisation"
-    },
-    {
-        "tier_0": "placeName_continent_organisation",
-        "tier_1": "placeName_continent_organisation"
-    }
-]
\ No newline at end of file
diff --git a/nesta/core/schemas/tier_1/schema_transformations/eurito/arxiv-eu.json b/nesta/core/schemas/tier_1/schema_transformations/eurito/arxiv-eu.json
deleted file mode 100644
index 03004a5b..00000000
--- a/nesta/core/schemas/tier_1/schema_transformations/eurito/arxiv-eu.json
+++ /dev/null
@@ -1,87 +0,0 @@
-[
-    {
-        "tier_0": "id",
-        "tier_1": "id_of_article",
-        "unique": true
-    },
-    {
-        "tier_0": "created",
-        "tier_1": "date_created_article"
-    },
-    {
-        "tier_0": "title",
-        "tier_1": "title_of_article"
-    },
-    {
-        "tier_0": "doi",
-        "tier_1": "id_digitalObjectIdentifier_article"
-    },
-    {
-        "tier_0": "abstract",
-        "tier_1": "textBody_abstract_article"
-    },
-    {
-        "tier_0": "authors",
-        "tier_1": "terms_authors_article"
-    },
-    {
-        "tier_0": "citation_count",
-        "tier_1": "count_citations_article"
-    },
-    {
-        "tier_0": "fields_of_study",
-        "tier_1": "json_fieldsOfStudy_article"
-    },
-    {
-        "tier_0": "_fields_of_study",
-        "tier_1": "terms_fieldsOfStudy_article"
-    },
-    {
-        "tier_0": "categories",
-        "tier_1": "terms_category_article"
-    },
-    {
-        "tier_0": "has_multinational",
-        "tier_1": "booleanFlag_multinational_article"
-    },
-    {
-        "tier_0": "institutes",
-        "tier_1": "terms_institutes_article"
-    },
-    {
-        "tier_0": "is_eu",
-        "tier_1": "booleanFlag_eu_article"
-    },
-    {
-        "tier_0": "novelty_of_article",
-        "tier_1": "metric_novelty_article"
-    },
-    {
-        "tier_0": "nuts_0",
-        "tier_1": "terms_nuts0_article"
-    },
-    {
-        "tier_0": "nuts_1",
-        "tier_1": "terms_nuts1_article"
-    },
-    {
-        "tier_0": "nuts_2",
-        "tier_1": "terms_nuts2_article"
-    },
-    {
-        "tier_0": "nuts_3",
-        "tier_1": "terms_nuts3_article"
-    },
-    {
-        "tier_0": "countries",
-        "tier_1": "terms_countries_article"
-    },
-    {
-        "tier_0": "regions",
-        "tier_1": "terms_regions_article"
-    },
-    {
-        "tier_0": "year",
-        "tier_1": "year_of_article"
-    }
-]
\ No newline at end of file
diff --git a/nesta/core/schemas/tier_1/schema_transformations/eurito/cordis-eu.json b/nesta/core/schemas/tier_1/schema_transformations/eurito/cordis-eu.json
deleted file mode 100644
index 4c5a1815..00000000
--- a/nesta/core/schemas/tier_1/schema_transformations/eurito/cordis-eu.json
+++ /dev/null
@@ -1,47 +0,0 @@
-[
-    {
-        "tier_0": "rcn",
-        "tier_1": "id_of_project",
-        "unique": true
-    },
-    {
-        "tier_0": "start_date_code",
-        "tier_1": "date_started_project"
-    },
-    {
-        "tier_0": "end_date_code",
-        "tier_1": "date_ended_project"
-    },
-    {
-        "tier_0": "title",
-        "tier_1": "title_of_project"
-    },
-    {
-        "tier_0": "description",
-        "tier_1": "textBody_description_project"
-    },
-    {
-        "tier_0": "ec_contribution",
-        "tier_1": "cost_ecFunding_project"
-    },
-    {
-        "tier_0": "framework",
-        "tier_1": "name_framework_project"
-    },
-    {
-        "tier_0": "status",
-        "tier_1": "status_of_project"
-    },
-    {
-        "tier_0": "total_cost",
-        "tier_1": "cost_total_project"
-    },
-    {
-        "tier_0": "year",
-        "tier_1": "year_of_project"
-    },
-    {
-        "tier_0": "link",
-        "tier_1": "url_of_project"
-    }
-]
\ No newline at end of file
diff --git a/nesta/core/schemas/tier_1/schema_transformations/eurito/crunchbase-eu.json b/nesta/core/schemas/tier_1/schema_transformations/eurito/crunchbase-eu.json
deleted file mode 100644
index 1f3146a5..00000000
--- a/nesta/core/schemas/tier_1/schema_transformations/eurito/crunchbase-eu.json
+++ /dev/null
@@ -1,162 +0,0 @@
-[
-    {
-        "tier_0": "company_name",
-        "tier_1": "name_of_organisation"
-    },
-    {
-        "tier_0": "is_eu",
-        "tier_1": "booleanFlag_eu_organisation"
-    },
-    {
-        "tier_0": "roles",
-        "tier_1": "terms_roles_organisation"
-    },
-    {
-        "tier_0": "homepage_url",
-        "tier_1": "url_of_organisation"
-    },
-    {
-        "tier_0": "country",
-        "tier_1": "placeName_country_organisation"
-    },
-    {
-        "tier_0": "country_alpha_2",
-        "tier_1": "id_iso2_country"
-    },
-    {
-        "tier_0": "country_alpha_3",
-        "tier_1": "id_iso3_country"
-    },
-    {
-        "tier_0": "country_numeric",
-        "tier_1": "id_isoNumeric_country"
-    },
-    {
-        "tier_0": "continent",
-        "tier_1": "id_of_continent"
-    },
-    {
-        "tier_0": "coordinates",
-        "tier_1": "coordinate_of_city"
-    },
-    {
-        "tier_0": "state_code",
-        "tier_1": "id_state_organisation"
-    },
-    {
-        "tier_0": "region",
-        "tier_1": "placeName_region_organisation"
-    },
-    {
-        "tier_0": "city",
-        "tier_1": "placeName_city_organisation"
-    },
-    {
-        "tier_0": "address",
-        "tier_1": "address_of_organisation"
-    },
-    {
-        "tier_0": "status",
-        "tier_1": "status_of_organisation"
-    },
-    {
-        "tier_0": "short_description",
-        "tier_1": "textBody_summary_organisation"
-    },
-    {
-        "tier_0": "long_description",
-        "tier_1": "textBody_descriptive_organisation"
-    },
-    {
-        "tier_0": "category_list",
-        "tier_1": "terms_subcategory_organisation"
-    },
-    {
-        "tier_0": "category_group_list",
-        "tier_1": "terms_category_organisation"
-    },
-    {
-        "tier_0": "funding_rounds",
-        "tier_1": "count_rounds_funding"
-    },
-    {
-        "tier_0": "funding_total_usd",
-        "tier_1": "cost_of_funding"
-    },
-    {
-        "tier_0": "currency_of_funding",
-        "tier_1": "currency_of_funding"
-    },
-    {
-        "tier_0": "_total_cost_usd2018",
-        "tier_1": "_cost_usd2018_organisation"
-    },
-    {
-        "tier_0": "founded_on",
-        "tier_1": "date_birth_organisation"
-    },
-    {
-        "tier_0": "last_funding_on",
-        "tier_1": "date_last_funding"
-    },
-    {
-        "tier_0": "closed_on",
-        "tier_1": "date_death_organisation"
-    },
-    {
-        "tier_0": "employee_count",
-        "tier_1": "count_employee_organisation"
-    },
-    {
-        "tier_0": "facebook_url",
-        "tier_1": "url_facebook_organisation"
-    },
-    {
-        "tier_0": "linkedin_url",
-        "tier_1": "url_linkedIn_organisation"
-    },
-    {
-        "tier_0": "cb_url",
-        "tier_1": "url_crunchBase_organisation"
-    },
-    {
-        "tier_0": "twitter_url",
-        "tier_1": "url_twitter_organisation"
-    },
-    {
-        "tier_0": "aliases",
-        "tier_1": "terms_alias_organisation"
-    },
-    {
-        "tier_0": "updated_at",
-        "tier_1": "date_updated_organisation"
-    },
-    {
-        "tier_0": "primary_role",
-        "tier_1": "type_of_organisation"
-    },
-    {
-        "tier_0": "parent_id",
-        "tier_1": "id_parent_organisation"
-    },
-    {
-        "tier_0": "is_health",
-        "tier_1": "booleanFlag_health_organisation"
-    },
-    {
-        "tier_0": "mesh_terms",
-        "tier_1": "terms_mesh_description"
-    },
-    {
-        "tier_0": "investor_names",
-        "tier_1": "terms_of_funders"
-    },
-    {
-        "tier_0": "placeName_state_organisation",
-        "tier_1": "placeName_state_organisation"
-    },
-    {
-        "tier_0": "placeName_continent_organisation",
-        "tier_1": "placeName_continent_organisation"
-    }
-]
\ No newline at end of file
diff --git a/nesta/core/schemas/tier_1/schema_transformations/eurito/patstat-eu.json b/nesta/core/schemas/tier_1/schema_transformations/eurito/patstat-eu.json
deleted file mode 100644
index d2e1b4f4..00000000
--- a/nesta/core/schemas/tier_1/schema_transformations/eurito/patstat-eu.json
+++ /dev/null
@@ -1,55 +0,0 @@
-[
-    {
-        "tier_0": "id",
-        "tier_1": "id_family_patent",
-        "unique": true
-    },
-    {
-        "tier_0": "is_eu",
-        "tier_1": "booleanFlag_eu_patent"
-    },
-    {
-        "tier_0": "earliest_filing_date",
-        "tier_1": "date_of_patent"
-    },
-    {
-        "tier_0": "earliest_filing_year",
-        "tier_1": "year_of_patent"
-    },
-    {
-        "tier_0": "title",
-        "tier_1": "title_of_patent"
-    },
-    {
-        "tier_0": "abstract",
-        "tier_1": "textBody_abstract_patent"
-    },
-    {
-        "tier_0": "nb_citing_docdb_fam",
-        "tier_1": "count_citations_patent"
-    },
-    {
-        "tier_0": "ipc",
-        "tier_1": "terms_ipc_patent"
-    },
-    {
-        "tier_0": "nace2",
-        "tier_1": "terms_nace2_patent"
-    },
-    {
-        "tier_0": "tech",
-        "tier_1": "terms_techFieldNumber_patent"
-    },
-    {
-        "tier_0": "ctry",
-        "tier_1": "terms_personCountry_patent"
-    },
-    {
-        "tier_0": "nuts",
-        "tier_1": "terms_personNuts_patent"
-    },
-    {
-        "tier_0": "appln_auth",
-        "tier_1": "terms_authCountry_patent"
-    }
-]
\ No newline at end of file
diff --git a/nesta/core/schemas/tier_1/schema_transformations/github.json b/nesta/core/schemas/tier_1/schema_transformations/github.json
deleted file mode 100644
index 3caf1516..00000000
--- a/nesta/core/schemas/tier_1/schema_transformations/github.json
+++ /dev/null
@@ -1,72 +0,0 @@
-[
-    {
-        "tier_0": "project_name",
-        "tier_1": "name_of_project"
-    },
-    {
-        "tier_0": "project_description",
-        "tier_1": "textBody_descriptive_project"
-    },
-    {
-        "tier_0": "project_url",
-        "tier_1": "url_of_project"
-    },
-    {
-        "tier_0": "project_language",
-        "tier_1": "language_programming_project"
-    },
-    {
-        "tier_0": "project_creation_date",
-        "tier_1": "datetime_created_project"
-    },
-    {
-        "tier_0": "project_forked_from",
-        "tier_1": "id_forkedFrom_project"
-    },
-    {
-        "tier_0": "project_id",
-        "tier_1": "id_of_project",
-        "unique": true
-    },
-    {
-        "tier_0": "user_login",
-        "tier_1": "name_login_user"
-    },
-    {
-        "tier_0": "user_company",
-        "tier_1": "name_of_organisation"
-    },
-    {
-        "tier_0": "user_creation_date",
-        "tier_1": "datetime_created_user"
-    },
-    {
-        "tier_0": "user_type",
-        "tier_1": "type_of_user"
-    },
-    {
-        "tier_0": "user_longitude",
-        "tier_1": "coordinate_longitude_user"
-    },
-    {
-        "tier_0": "user_latitude",
-        "tier_1": "coordinate_latitude_user"
-    },
-    {
-        "tier_0": "user_state",
-        "tier_1": "placeName_state_user"
-    },
-    {
-        "tier_0": "user_city",
-        "tier_1": "placeName_city_user"
-    },
-    {
-        "tier_0": "user_id",
-        "tier_1": "id_of_user",
-        "unique": true
-    },
-    {
-        "tier_0": "user_country_code",
-        "tier_1": "id_iso2_country"
-    }
-]
\ No newline at end of file
diff --git a/nesta/core/schemas/tier_1/schema_transformations/meetup.json b/nesta/core/schemas/tier_1/schema_transformations/meetup.json
deleted file mode 100644
index c7145b25..00000000
--- a/nesta/core/schemas/tier_1/schema_transformations/meetup.json
+++ /dev/null
@@ -1,79 +0,0 @@
-[
-    {
-        "tier_0": "id",
-        "tier_1": "id_of_group",
-        "unique": true
-    },
-    {
-        "tier_0": "name",
-        "tier_1": "name_of_group"
-    },
-    {
-        "tier_0": "urlname",
-        "tier_1": "url_of_group"
-    },
-    {
-        "tier_0": "category_name",
-        "tier_1": "name_of_category"
-    },
-    {
-        "tier_0": "country",
-        "tier_1": "id_iso2_country"
-    },
-    {
-        "tier_0": "iso3",
-        "tier_1": "id_iso3_country"
-    },
-    {
-        "tier_0": "isoNumeric",
-        "tier_1": "id_isoNumeric_country"
-    },
-    {
-        "tier_0": "country_name",
-        "tier_1": "placeName_country_group"
-    },
-    {
-        "tier_0": "continent",
-        "tier_1": "placeName_continent_group"
-    },
-    {
-        "tier_0": "city",
-        "tier_1": "placeName_city_group"
-    },
-    {
-        "tier_0": "created",
-        "tier_1": "date_start_group"
-    },
-    {
-        "tier_0": "description",
-        "tier_1": "textBody_descriptive_group"
-    },
-    {
-        "tier_0": "coordinate",
-        "tier_1": "coordinate_of_group"
-    },
-    {
-        "tier_0": "members",
-        "tier_1": "count_member_group"
-    },
-    {
-        "tier_0": "topics",
-        "tier_1": "terms_topics_group"
-    },
-    {
-        "tier_0": "mesh_terms",
-        "tier_1": "terms_mesh_group"
-    },
-    {
-        "tier_0": "member_origins",
-        "tier_1": "terms_memberOrigin_group"
-    },
-    {
-        "tier_0": "continent_id",
-        "tier_1": "id_continent_group"
-    },
-    {
-        "tier_0": "country_id",
-        "tier_1": "id_country_group"
-    }
-]
\ No newline at end of file
diff --git a/nesta/core/schemas/tier_1/schema_transformations/meetup_members.json b/nesta/core/schemas/tier_1/schema_transformations/meetup_members.json
deleted file mode 100644
index cd4fb5a9..00000000
--- a/nesta/core/schemas/tier_1/schema_transformations/meetup_members.json
+++ /dev/null
@@ -1,12 +0,0 @@
-[
-    {
-        "tier_0": "member_id",
-        "tier_1": "id_of_member",
-        "unique": true
-    },
-    {
-        "tier_0": "group_id",
-        "tier_1": "id_of_group",
-        "unique": true
-    }
-]
\ No newline at end of file
diff --git a/nesta/core/schemas/tier_1/schema_transformations/nih.json b/nesta/core/schemas/tier_1/schema_transformations/nih.json
deleted file mode 100644
index f563dc7b..00000000
--- a/nesta/core/schemas/tier_1/schema_transformations/nih.json
+++ /dev/null
@@ -1,103 +0,0 @@
-[
-    {
-        "tier_0": "full_project_num",
-        "tier_1": "id_of_project",
-        "unique": true
-    },
-    {
-        "tier_0": "fy",
-        "tier_1": "year_fiscal_funding"
-    },
-    {
-        "tier_0": "city",
-        "tier_1": "placeName_city_organisation"
-    },
-    {
-        "tier_0": "country",
-        "tier_1": "placeName_country_organisation"
-    },
-    {
-        "tier_0": "org_state",
-        "tier_1": "id_state_organisation"
-    },
-    {
-        "tier_0": "org_zipcode",
-        "tier_1": "placeName_zipcode_organisation"
-    },
-    {
-        "tier_0": "org_name",
-        "tier_1": "title_of_organisation"
-    },
-    {
-        "tier_0": "phr",
-        "tier_1": "textBody_descriptive_project"
-    },
-    {
-        "tier_0": "project_start",
-        "tier_1": "date_start_project"
-    },
-    {
-        "tier_0": "project_end",
-        "tier_1": "date_end_project"
-    },
-    {
-        "tier_0": "project_terms",
-        "tier_1": "terms_descriptive_project"
-    },
-    {
-        "tier_0": "project_title",
-        "tier_1": "title_of_project"
-    },
-    {
-        "tier_0": "total_cost",
-        "tier_1": "cost_total_project"
-    },
-    {
-        "tier_0": "abstract_text",
-        "tier_1": "textBody_abstract_project"
-    },
-    {
-        "tier_0": "coordinates",
-        "tier_1": "coordinate_of_organisation"
-    },
-    {
-        "tier_0": "country_alpha_2",
-        "tier_1": "id_iso2_country"
-    },
-    {
-        "tier_0": "country_alpha_3",
-        "tier_1": "id_iso3_country"
-    },
-    {
-        "tier_0": "country_numeric",
-        "tier_1": "id_isoNumeric_country"
-    },
-    {
-        "tier_0": "continent",
-        "tier_1": "id_of_continent"
-    },
-    {
-        "tier_0": "total_cost_currency",
-        "tier_1": "currency_total_cost"
-    },
-    {
-        "tier_0": "_total_cost_usd2018",
-        "tier_1": "_cost_usd2018_project"
-    },
-    {
-        "tier_0": "mesh_terms",
-        "tier_1": "terms_mesh_abstract"
-    },
-    {
-        "tier_0": "duplicate_abstract",
-        "tier_1": "booleanFlag_duplicate_abstract"
-    },
-    {
-        "tier_0": "placeName_state_organisation",
-        "tier_1": "placeName_state_organisation"
-    },
-    {
-        "tier_0": "placeName_continent_organisation",
-        "tier_1": "placeName_continent_organisation"
-    }
-]
\ No newline at end of file
diff --git a/nesta/core/schemas/tier_1/schema_transformations/worldbank.json b/nesta/core/schemas/tier_1/schema_transformations/worldbank.json
deleted file mode 100644
index e208ca3d..00000000
--- a/nesta/core/schemas/tier_1/schema_transformations/worldbank.json
+++ /dev/null
@@ -1,83 +0,0 @@
-[
-    {
-        "tier_0": "id",
-        "tier_1": "id_iso3_country",
-        "unique": true
-    },
-    {
-        "tier_0": "capitalCity",
-        "tier_1": "placeName_capitalCity_country"
-    },
-    {
-        "tier_0": "incomeLevel",
-        "tier_1": "level_income_country"
-    },
-    {
-        "tier_0": "iso2Code",
-        "tier_1": "id_iso2_country"
-    },
-    {
-        "tier_0": "latitude",
-        "tier_1": "coordinate_latitude_country"
-    },
-    {
-        "tier_0": "longitude",
-        "tier_1": "coordinate_longitude_country"
-    },
-    {
-        "tier_0": "year",
-        "tier_1": "year_datestamp_country"
-    },
-    {
-        "tier_0": "name",
-        "tier_1": "placeName_of_country"
-    },
-    {
-        "tier_0": "region",
-        "tier_1": "placeName_worldRegion_country"
-    },
-    {
-        "tier_0": "adminregion",
-        "tier_1": "placeName_adminRegion_country"
-    },
-    {
-        "tier_0": "gini_index",
-        "tier_1": "metric_gini_country"
-    },
-    {
-        "tier_0": "life_expectancy_at_birth_total_years",
-        "tier_1": "metric_lifeExpAtBirth_country"
-    },
-    {
-        "tier_0": "population_total",
-        "tier_1": "count_population_country"
-    },
-    {
-        "tier_0": "age_dependency_ratio_pc_of_working_age_population",
-        "tier_1": "metric_ageDepRatioPercWorkAgePop_country"
-    },
-    {
-        "tier_0": "barro_lee_percentage_of_population_age_25_with_no_education",
-        "tier_1": "metric_percPopOver25NoEduc_country"
-    },
-    {
-        "tier_0": "barro_lee_perce_of_popul_age_25_with_tertia_school_comple_tertia",
-        "tier_1": "metric_percPopOver25TertiaryEduc_country"
-    },
-    {
-        "tier_0": "poverty_headcount_ratio_at_national_poverty_line_pc_of_populatio",
-        "tier_1": "metric_percPopBelowPovertyLine_country"
-    },
-    {
-        "tier_0": "rural_population_pc_of_total_population",
-        "tier_1": "metric_percPopRural_country"
-    },
-    {
-        "tier_0": "mortality_rate_infant_per_1_000_live_births",
-        "tier_1": "metric_infMortPer1000LiveBirths_country"
-    },
-    {
-        "tier_0": "urban_population_pc_of_total",
-        "tier_1": "metric_percPopUrban_country"
-    }
-]
\ No newline at end of file
diff --git a/nesta/core/schemas/tier_1/tests/test_aliases.py b/nesta/core/schemas/tier_1/tests/test_aliases.py
new file mode 100644
index 00000000..c8b0e906
--- /dev/null
+++ b/nesta/core/schemas/tier_1/tests/test_aliases.py
@@ -0,0 +1,35 @@
+import os
+import glob
+import json
+from pathlib import Path
+import pytest
+
+@pytest.fixture
+def json_files():
+    cwd = os.path.dirname(__file__)
+    return list(glob.glob(f'{cwd}/../**/*json', recursive=True))
+
+
+def test_mappings_build(json_files):
+
+    # Test each dataset for valid ontology
+    dirname = Path(os.path.dirname(__file__)).parent
+    dataset_dirname = os.path.join(dirname, 'datasets')
+    ontology = {}
+    for dataset in os.listdir(dataset_dirname):
+        filename = os.path.join(dataset_dirname, dataset)
+        with open(filename) as f:
+            _ontology = json.load(f)
+        ontology[dataset.split('.json')[0]] = list(_ontology['tier0_to_tier1'].values())
+            
+    # Test that each alias is valid
+    for filename in json_files:
+        _, _filename = os.path.split(filename)
+        if _filename != 'aliases.json':
+            continue
+        with open(filename) as f:
+            aliases = json.load(f)
+        for new_name, info in aliases.items():
+            for dataset, old_name in info.items():
+                assert dataset in ontology, f'No such dataset "{dataset}" in {list(ontology.keys())}, referenced by {filename}'
+                assert old_name in ontology[dataset], f'{old_name} not found in {dataset}, referenced by {filename}'
diff --git a/nesta/core/schemas/tier_1/tests/test_format.py b/nesta/core/schemas/tier_1/tests/test_format.py
new file mode 100644
index 00000000..bb157b9c
--- /dev/null
+++ b/nesta/core/schemas/tier_1/tests/test_format.py
@@ -0,0 +1,44 @@
+from nesta.core.orms.orm_utils import get_es_mapping
+import os
+import glob
+import json
+import pytest
+
+@pytest.fixture
+def json_files():
+    cwd = os.path.dirname(__file__)
+    return glob.glob(f'{cwd}/../**/*json', recursive=True)
+
+
+def test_is_tidy(json_files):
+    """Check that all files are valid, tidy json"""
+    for filename in json_files:
+        # ontology.json is tested elsewhere
+        _, _filename = os.path.split(filename)
+        if _filename == 'ontology.json':
+            continue
+        with open(filename) as f:
+            raw = f.read() 
+            js = json.loads(raw)
+        assert raw == json.dumps(js, sort_keys=True, indent=4), (f'\n\n{_filename} has not been tidied.\nBe sure to '
+                                                                 'run "python .githooks/hooktools/sort_all_json.py" '
+                                                                 'from the root directory to '
+                                                                 'avoid this test failure.\n\n')
+
+def test_mappings_build(json_files):
+    endpoints, datasets = set(), set()
+    for filename in json_files:
+        if not filename.endswith('mapping.json'):
+            continue
+        if 'datasets' in filename:
+            _, _filename = os.path.split(filename)
+            dataset = _filename.split('_mapping.json')[0]
+            datasets.add(dataset)
+        if 'endpoints' in filename:
+            dirname, _ = os.path.split(filename)
+            _, endpoint = os.path.split(dirname)
+            endpoints.add(endpoint)
+    for endpoint in endpoints:
+        for dataset in datasets:
+            get_es_mapping(dataset, endpoint)
+    get_es_mapping(dataset[0], 'dummy') # <--- also test on non-existent endpoint
diff --git a/nesta/core/schemas/tier_1/tests/test_ontology.py b/nesta/core/schemas/tier_1/tests/test_ontology.py
new file mode 100644
index 00000000..e491c50e
--- /dev/null
+++ b/nesta/core/schemas/tier_1/tests/test_ontology.py
@@ -0,0 +1,48 @@
+import os
+from pathlib import Path
+import pytest
+import json
+
+@pytest.fixture
+def ontology():
+    dirname = Path(os.path.dirname(__file__)).parent
+    # Load the ontology                                                                                                       
+    filename = os.path.join(dirname, 'ontology.json')
+    with open(filename) as f:
+        ontology = json.load(f)    
+    return {row['term']: row['values'] for row in ontology}
+
+
+def test_ontology_uniqueness(ontology):
+    for lvl, values in ontology.items():
+        assert len(values) == len(set(values)), f'{lvl} has duplicate values'
+
+
+def test_validate(ontology):
+    dirname = Path(os.path.dirname(__file__)).parent
+    dataset_dirname = os.path.join(dirname, 'datasets')
+    firsts, middles, lasts = [], [], []
+    # Test each dataset for valid ontology
+    for filename in os.listdir(dataset_dirname):
+        filename = os.path.join(dataset_dirname, filename)
+        with open(filename) as f:
+            dataset = json.load(f)
+        for field_name in dataset['tier0_to_tier1'].values():
+            if field_name.startswith('_'):
+                field_name = field_name[1:]
+            first, middle, last = field_name.split('_')
+            # Test the vocab is valid
+            assert first in ontology['firstName'], f'{dataset} has unexpected field {field_name}'
+            assert middle in ontology['middleName'], f'{dataset} has unexpected field {field_name}'
+            assert last in ontology['lastName'], f'{dataset} has unexpected field {field_name}'
+            # Save these for the tests at the end
+            firsts.append(first)
+            middles.append(middle)
+            lasts.append(last)
+    # Test there is no superfluous vocab in the ontology
+    for f in ontology['firstName']:
+        assert f in firsts, f'Unused first name: {f}'
+    for f in ontology['middleName']:
+        assert f in middles, f'Unused middle name: {f}'
+    for f in ontology['lastName']:
+        assert f in lasts, f'Unused last name: {f}'            
diff --git a/nesta/core/schemas/tier_1/tests/test_validate.py b/nesta/core/schemas/tier_1/tests/test_validate.py
deleted file mode 100644
index 4c70be6b..00000000
--- a/nesta/core/schemas/tier_1/tests/test_validate.py
+++ /dev/null
@@ -1,87 +0,0 @@
-import os
-import glob
-import json
-from collections import Counter
-from nesta.core.luigihacks.misctools import find_filepath_from_pathstub
-
-ES_CONF_SUFFIX = "_es_config.json"
-
-def alias_info(filepath):
-    with open(filepath) as f:
-        data = json.load(f)        
-    for alias, info in data.items():
-        for dataset, field in info.items():
-            yield (alias, dataset, field)
-
-
-class TestValidate():
-    def test_validate(self):
-        # Load the ontology
-        cwd = os.path.dirname(__file__)
-        filename = os.path.join(cwd, '../tier_1.json')
-        with open(filename) as f:
-            data = json.load(f)        
-        ontology = {row["term"]: row["values"] for row in data}
-        # Assert the core structure of the ontology
-        assert len(ontology) == 3
-        for term_type in ["firstName", "middleName", "lastName"]:
-            assert term_type in ontology
-
-        # Iterate over schema transformations
-        all_fields = {}
-        for filename in glob.glob(f'{cwd}/../**/*json', 
-                                  recursive=True):
-            # Load the transformation
-            if 'schema_transformations' not in filename:
-                continue
-            print(filename)
-            with open(filename) as f:
-                data = json.load(f)
-            # Assert that the terms are in the ontology
-            tier_0, tier_1 = [], []
-            for row in data:
-                fieldname = row['tier_1']
-                tier_0.append(row['tier_0'])
-                tier_1.append(fieldname)
-                if fieldname.startswith("_"):
-                    fieldname = fieldname[1:]
-                first, middle, last = fieldname.split("_")
-                assert first in ontology["firstName"]
-                assert middle in ontology["middleName"]
-                assert last in ontology["lastName"]
-            # Record the dataset name for the next tests
-            dataset_name = filename.replace(".json", "").split("/")[-1]
-            all_fields[dataset_name] = tier_1
-            # Assert no duplicates
-            _, count = Counter(tier_0).most_common(1)[0]
-            print(Counter(tier_0).most_common(1)[0])
-            assert count == 1
-            _, count = Counter(tier_1).most_common(1)[0]
-            print(Counter(tier_1).most_common(1)[0])
-            assert count == 1
-
-    def test_aliases(self):    
-        """Assert consistency between the aliases and schemas"""
-        top_dir = find_filepath_from_pathstub("core/orms")
-        all_fields = {}
-        for filename in os.listdir(top_dir):
-            if not filename.endswith(ES_CONF_SUFFIX):
-                continue
-            dataset = filename.replace(ES_CONF_SUFFIX, "")
-            filename = os.path.join(top_dir, filename)            
-            with open(filename) as f:
-                data = json.load(f)
-            print(f'Found {filename}')
-            fields = data["mappings"]["_doc"]["properties"].keys()
-            all_fields[dataset] = fields
-
-        cwd = os.path.dirname(__file__)
-        path = os.path.join(cwd, '../aliases/')
-        for filename in os.listdir(path):
-            if not filename.endswith(".json"):
-                continue
-            filename =  os.path.join(path, filename)
-            for alias, dataset, field in alias_info(filename):
-                print("\t", alias, dataset, field)
-                assert dataset in all_fields.keys()
-                assert field in all_fields[dataset]
diff --git a/nesta/core/schemas/tier_1/tidy_schema.py b/nesta/core/schemas/tier_1/tests/tidy_schema.py
similarity index 91%
rename from nesta/core/schemas/tier_1/tidy_schema.py
rename to nesta/core/schemas/tier_1/tests/tidy_schema.py
index c6cf4f15..22ad96db 100644
--- a/nesta/core/schemas/tier_1/tidy_schema.py
+++ b/nesta/core/schemas/tier_1/tests/tidy_schema.py
@@ -7,7 +7,7 @@
 """
 
 import json
-FILENAME="tier_1.json"
+FILENAME="../ontology.json"
 
 # Load
 with open(FILENAME) as f:
diff --git a/nesta/packages/biorxiv/collect_biorxiv.py b/nesta/packages/biorxiv/collect_biorxiv.py
deleted file mode 100644
index 2699d68d..00000000
--- a/nesta/packages/biorxiv/collect_biorxiv.py
+++ /dev/null
@@ -1,35 +0,0 @@
-from nesta.packages.mag.query_mag_api import get_journal_articles 
-from nesta.packages.mag.parse_abstract import uninvert_abstract
-
-"""
-Schema transformation of arviv ORM from MAG raw data,
-so that biorxiv data from MAG can slot into arxiv pipelines.
-"""
-ARXIV_MAG = {'id':'DOI',
-             'datestamp': 'D',
-             'created': 'D',
-             'updated': 'D',
-             'title': 'DN',
-             'doi':'DOI',
-             'abstract': 'IA',
-             'authors' : 'AA',
-             'citation_count': 'CC'}
-
-
-def get_biorxiv_articles(api_key, start_date='1 Jan, 2000'):
-    """Get all biorxiv articles from the MAG API.
-    
-    Args:
-        api_key (str): MAG API key
-        start_date (str): Sensibly formatted date string (interpretted by pd)
-    Yields:
-        article (dict): article object ready for insertion via nesta's arxiv ORM
-    """
-    for article in get_journal_articles('biorxiv', start_date=start_date, 
-                                        api_key=api_key):        
-        # Convert to arxiv format for insertion to database
-        article= {arxiv_field: article[mag_field]
-                  for arxiv_field, mag_field in ARXIV_MAG.items()}
-        article['abstract'] = uninvert_abstract(article['abstract'])
-        article['id'] = f"biorxiv-{article['id']}"  # just to be sure
-        yield article
diff --git a/nesta/packages/biorxiv/test_collect_biorxiv.py b/nesta/packages/biorxiv/test_collect_biorxiv.py
deleted file mode 100644
index e8d0da68..00000000
--- a/nesta/packages/biorxiv/test_collect_biorxiv.py
+++ /dev/null
@@ -1,25 +0,0 @@
-from nesta.packages.biorxiv.collect_biorxiv import get_biorxiv_articles
-from nesta.packages.biorxiv.collect_biorxiv import ARXIV_MAG
-from nesta.core.orms.arxiv_orm import Article
-from unittest import mock
-import pytest
-
-@pytest.fixture
-def dummy_article():
-    return {key: f'blah blah{key}' for key in set(ARXIV_MAG.values())}
-
-def test_all_fields_in_orm():
-    orm = dir(Article)
-    assert all(field in orm for field in ARXIV_MAG.keys())
-
-@mock.patch('nesta.packages.biorxiv.collect_biorxiv.get_journal_articles')
-@mock.patch('nesta.packages.biorxiv.collect_biorxiv.uninvert_abstract')
-def test_get_biorxiv_articles(_, mocked, dummy_article):
-    n_articles = 3
-    mocked.return_value = iter([dummy_article]*n_articles)
-    for i, article in enumerate(get_biorxiv_articles(api_key='dummy_api_key', 
-                                                     start_date='dummy_date')):
-        assert type(article) is dict
-        assert len(article) == len(ARXIV_MAG)
-    assert i + 1 == n_articles
-
diff --git a/nesta/packages/decorators/schema_transform.py b/nesta/packages/decorators/schema_transform.py
index 45c067c5..10b82035 100644
--- a/nesta/packages/decorators/schema_transform.py
+++ b/nesta/packages/decorators/schema_transform.py
@@ -6,40 +6,34 @@
 such that specified field names are transformed and unspecified fields are dropped.
 A valid file would be formatted as shown:
 
-[{"tier_0": "bad_col", "tier_1": "good_col"},
-{"tier_0": "another_bad_col", "tier_1": "another_good_col"},
-...]
-
-where :code:`tier_0` and :code:`tier_1` correspond to :code:`from_key` and :code:`to_key`
-in the below documentation.    
+{ "tier0_to_tier1":
+ { "bad_col": "good_col",
+   "another_bad_col": "another_good_col"
+ }
+}
 '''
 
 import pandas
 import json
 
-def load_transformer(filename, from_key, to_key):
+def load_transformer(filename):
     with open(filename) as f:
         _data = json.load(f)
-    transformer = {row[from_key]:row[to_key] for row in _data}
+    return _data['tier0_to_tier1']
     return transformer
 
 
-def schema_transform(filename, from_key, to_key):
+def schema_transform(filename):
     '''
     Args:
         filename (str): A record-oriented JSON file path mapping field names
-                        denoted by from :code:`from_key` and :code:`to_key`.
-        from_key (str): The key in file indicated by :code:`filename` which indicates
-                        the field name to transform.
-        to_key (str): The key in file indicated by :code:`filename` which what
-                      the field name indicated by :code:`from_key` will be transformed to.
 
     Returns:
         Data in the format it was originally passed to the wrapper in, with 
         specified field names transformed and unspecified fields dropped.
     '''
 
-    transformer = load_transformer(filename, from_key, to_key)
+    transformer = load_transformer(filename)
     def wrapper(func):
         def transformed(*args, **kwargs):
             data = func(*args,**kwargs)
@@ -64,20 +58,18 @@ def transformed(*args, **kwargs):
     return wrapper
 
 
-def schema_transformer(data, *, filename, from_key, to_key, ignore=[]):
+def schema_transformer(data, *, filename, ignore=[]):
     '''Function version of the schema_transformer wrapper.
     Args:
         data (dataframe OR list of dicts): the data requiring the schama transformation
         filename (str): the path to the schema json file
-        from_key (str): tier level of the data
-        to_key (str): tier level to be applied to the data
         ignore (list): optional list of fields, eg ids or keys which shouldn't be dropped
 
     Returns:
         supplied data with schema applied
     '''
     # Accept DataFrames...
-    transformer = load_transformer(filename, from_key, to_key)
+    transformer = load_transformer(filename)
     if type(data) == pandas.DataFrame:
         drop_cols = [c for c in data.columns
                      if c not in transformer
diff --git a/nesta/packages/decorators/tests/test_schema_transform.py b/nesta/packages/decorators/tests/test_schema_transform.py
index aa3c66c0..5994cc35 100644
--- a/nesta/packages/decorators/tests/test_schema_transform.py
+++ b/nesta/packages/decorators/tests/test_schema_transform.py
@@ -22,7 +22,7 @@ def test_transformer():
     def test_dataframe_transform(self, mocked_loader, test_transformer, test_data):
         mocked_loader.return_value = test_transformer
         dummy_func = lambda : pd.DataFrame(test_data)
-        wrapper = schema_transform("dummy", "dummy", "dummy")
+        wrapper = schema_transform("dummy")
         wrapped = wrapper(dummy_func)
         transformed = wrapped()
 
@@ -34,7 +34,7 @@ def test_dataframe_transform(self, mocked_loader, test_transformer, test_data):
     def test_list_of_dict_transform(self, mocked_loader, test_transformer, test_data):
         mocked_loader.return_value = test_transformer
         dummy_func = lambda : test_data
-        wrapper = schema_transform("dummy", "dummy", "dummy")
+        wrapper = schema_transform("dummy")
         wrapped = wrapper(dummy_func)
         transformed = wrapped()
         transformed = pd.DataFrame(transformed)
@@ -46,7 +46,7 @@ def test_list_of_dict_transform(self, mocked_loader, test_transformer, test_data
     def test_invalid_type_transform(self, mocked_loader, test_transformer):
         mocked_loader.return_value = test_transformer
         dummy_func = lambda : None
-        wrapper = schema_transform("dummy", "dummy", "dummy")
+        wrapper = schema_transform("dummy")
         wrapped = wrapper(dummy_func)
         with pytest.raises(ValueError) as e:
             wrapped()
@@ -57,6 +57,5 @@ def test_single_dict(self, mocked_loader, test_transformer):
         mocked_loader.return_value = test_transformer
         test_data = {'bad_col': 111, 'another_bad_col': 222, 'stuff': 333}
 
-        transformed = schema_transformer(test_data, filename='dummy',
-                                         from_key='dummy', to_key='dummy')
+        transformed = schema_transformer(test_data, filename='dummy')
         assert transformed == {'good_col': 111, 'another_good_col': 222}
diff --git a/nesta/packages/geo_utils/country_iso_code.py b/nesta/packages/geo_utils/country_iso_code.py
index 9a9ec591..9817594b 100644
--- a/nesta/packages/geo_utils/country_iso_code.py
+++ b/nesta/packages/geo_utils/country_iso_code.py
@@ -49,19 +49,20 @@ def country_iso_code_dataframe(df, country='country'):
     df['continent'] = None
 
     continents = alpha2_to_continent_mapping()
-
+    country_codes = None
     for idx, row in df.iterrows():
         try:
             country_codes = country_iso_code(row[country])
         except KeyError:
             # some fallback method could go here
-            pass
+            continue
         else:
-            df.at[idx, 'country_alpha_2'] = country_codes.alpha_2
-            df.at[idx, 'country_alpha_3'] = country_codes.alpha_3
-            df.at[idx, 'country_numeric'] = country_codes.numeric
-            df.at[idx, 'continent'] = continents.get(country_codes.alpha_2)
-
+            if country_codes is None:
+                continue
+        df.at[idx, 'country_alpha_2'] = country_codes.alpha_2
+        df.at[idx, 'country_alpha_3'] = country_codes.alpha_3
+        df.at[idx, 'country_numeric'] = country_codes.numeric
+        df.at[idx, 'continent'] = continents.get(country_codes.alpha_2)
     return df
 
 
diff --git a/requirements.txt b/requirements.txt
index 06a159bd..45e64d20 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -27,6 +27,7 @@ PyMySQL==0.9.3
 pyshp==2.1.0
 pytest==4.5.0
 PyVirtualDisplay==0.2.3
+pyyaml==5.3.1
 requests==2.22.0
 requests_aws4auth==0.9
 retrying==1.3.3