Testing speedups/robustness (#2692)

* Crank up the parallelism. * Tweak. * Rofl [long] * Speed up test_beamsearch_blocking. * Speed up test_beamsearch_contextblocking * Try smarter parallelism. * . * .2 * .3 * Fix conftest. * Lint.
facebookresearch · May 29, 2020 · 936d1af · 936d1af
1 parent 9f77063
commit 936d1af
Show file tree

Hide file tree

Showing 5 changed files with 183 additions and 157 deletions.
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -8,21 +8,21 @@ standard_cpu38: &standard_cpu38
     - image: circleci/python:3.8.0-buster-node
   environment:
     PYTHONUNBUFFERED: 1
-  resource_class: large
+  resource_class: xlarge
 
 standard_cpu37: &standard_cpu37
   docker:
     - image: circleci/python:3.7.5-buster-node
   environment:
     PYTHONUNBUFFERED: 1
-  resource_class: large
+  resource_class: xlarge
 
 standard_cpu36: &standard_cpu36
   docker:
     - image: circleci/python:3.6.9-buster-node
   environment:
     PYTHONUNBUFFERED: 1
-  resource_class: large
+  resource_class: xlarge
 
 osx_cpu37: &osx_cpu37
   macos:
@@ -166,6 +166,7 @@ jobs:
             - "~/venv/lib"
       - run:
           name: Unit tests (OSX)
+          no_output_timeout: 60m
           command: coverage run -m pytest --junitxml=test-results/junit.xml -m unit -v
       - <<: *codecov
       - store_test_results:
@@ -174,7 +175,7 @@ jobs:
   unittests_36:
     <<: *standard_cpu36
     working_directory: ~/ParlAI
-    parallelism: 8
+    parallelism: 16
     steps:
       - checkout
       - run:
@@ -193,6 +194,7 @@ jobs:
             - "~/venv/lib"
       - run:
           name: Unit tests (py36)
+          no_output_timeout: 60m
           command: coverage run -m pytest --junitxml=test-results/junit.xml -m unit -v
       - <<: *codecov
       - store_test_results:
@@ -201,7 +203,7 @@ jobs:
   unittests_38:
     <<: *standard_cpu38
     working_directory: ~/ParlAI
-    parallelism: 8
+    parallelism: 16
     steps:
       - checkout
       - <<: *fixgit
@@ -217,6 +219,7 @@ jobs:
             - "~/venv/lib"
       - run:
           name: Unit tests (py38)
+          no_output_timeout: 60m
           command: coverage run -m pytest --junitxml=test-results/junit.xml -m unit -v
       - <<: *codecov
       - store_test_results:
@@ -225,7 +228,7 @@ jobs:
   unittests_37:
     <<: *standard_cpu37
     working_directory: ~/ParlAI
-    parallelism: 8
+    parallelism: 16
     steps:
       - checkout
       - <<: *fixgit
@@ -241,6 +244,7 @@ jobs:
             - "~/venv/lib"
       - run:
           name: Unit tests (py37)
+          no_output_timeout: 60m
           command: coverage run -m pytest --junitxml=test-results/junit.xml -m unit -v
       - <<: *codecov
       - store_test_results:
@@ -249,7 +253,7 @@ jobs:
   unittests_gpu13:
     <<: *gpu
     working_directory: ~/ParlAI
-    parallelism: 8
+    parallelism: 16
     steps:
       - checkout
       - <<: *fixgit
@@ -272,6 +276,7 @@ jobs:
             - "~/venv/lib"
       - run:
           name: Unit tests (GPU; pytorch 1.3)
+          no_output_timeout: 60m
           command: coverage run -m pytest --junitxml=test-results/junit.xml -m unit -v
       - <<: *codecov
       - store_test_results:
@@ -280,7 +285,7 @@ jobs:
   unittests_gpu14:
     <<: *gpu
     working_directory: ~/ParlAI
-    parallelism: 8
+    parallelism: 16
     steps:
       - checkout
       - <<: *fixgit
@@ -303,6 +308,7 @@ jobs:
             - "~/venv/lib"
       - run:
           name: Unit tests (GPU; pytorch 1.4)
+          no_output_timeout: 60m
           command: coverage run -m pytest --junitxml=test-results/junit.xml -m unit -v
       - <<: *codecov
       - store_test_results:

diff --git a/conftest.py b/conftest.py
@@ -13,51 +13,102 @@
 
 import os
 import pathlib
-import random
-from pytest import ExitCode
+import collections
+import pytest
+import subprocess
 
 
 # TODO: rename the folders nicer so they make more sense, maybe even have
 # a 1:1 correspondance with the circleci name
 
 
-def pytest_collection_modifyitems(config, items):
-    # handle circleci parallelism
-    if 'CIRCLE_NODE_TOTAL' in os.environ:
-        total = int(os.environ['CIRCLE_NODE_TOTAL'])
-        index = int(os.environ['CIRCLE_NODE_INDEX'])
+# -----------------------------------------------------------------------
+# From https://github.com/ryanwilsonperkin/pytest-circleci-parallelized.
+# MIT licensed, Copyright Ryan Wilson-Perkin.
+# -----------------------------------------------------------------------
+def get_class_name(item):
+    class_name, module_name = None, None
+    for parent in reversed(item.listchain()):
+        if isinstance(parent, pytest.Class):
+            class_name = parent.name
+        elif isinstance(parent, pytest.Module):
+            module_name = parent.module.__name__
+            break
+
+    if class_name:
+        return "{}.{}".format(module_name, class_name)
     else:
-        total = 1
-        index = 0
+        return module_name
+
+
+def filter_tests_with_circleci(test_list):
+    circleci_input = "\n".join(test_list).encode("utf-8")
+    p = subprocess.Popen(
+        [
+            "circleci",
+            "tests",
+            "split",
+            "--split-by=timings",
+            "--timings-type=classname",
+        ],
+        stdin=subprocess.PIPE,
+        stdout=subprocess.PIPE,
+    )
+    circleci_output, _ = p.communicate(circleci_input)
+    return [
+        line.strip() for line in circleci_output.decode("utf-8").strip().split("\n")
+    ]
+
+
+# -----------------------------------------------------------------------
+MARKER_RULES = [
+    ('parlai_internal', 'test'),
+    ('nightly/gpu', 'nightly_gpu'),
+    ('nightly/cpu/', 'nightly_cpu'),
+    ('datatests/', 'data'),
+    ('tasks/', 'tasks'),
+    ('parlai/mturk/core/test/', 'mturk'),
+]
+
+
+def pytest_collection_modifyitems(config, items):
+    marker_expr = config.getoption('markexpr')
 
+    deselected = []
+
+    # first add all the markers, possibly filtering
     # python 3.4/3.5 compat: rootdir = pathlib.Path(str(config.rootdir))
     rootdir = pathlib.Path(config.rootdir)
-    parallels = [i % total == index for i in range(len(items))]
-    random.Random(42).shuffle(parallels)
-    deselected = []
-    for parallel, item in zip(parallels, items):
+    for item in items:
         rel_path = str(pathlib.Path(item.fspath).relative_to(rootdir))
-        if not parallel:
-            deselected.append(item)
-        elif "parlai_internal" in rel_path:
-            item.add_marker("internal")
-        elif "nightly/gpu/" in rel_path:
-            item.add_marker("nightly_gpu")
-        elif "nightly/cpu/" in rel_path:
-            item.add_marker("nightly_cpu")
-        elif "datatests/" in rel_path:
-            item.add_marker("data")
-        elif "tasks/" in rel_path:
-            item.add_marker("tasks")
-        elif "parlai/mturk/core/test/" in rel_path:
-            item.add_marker("mturk")
-        elif "/" not in rel_path[6:]:
-            item.add_marker("unit")
+        for file_pattern, marker in MARKER_RULES:
+            if file_pattern in rel_path:
+                item.add_marker(marker)
+                if marker_expr and marker != marker_expr:
+                    deselected.append(item)
+                break
         else:
-            raise ValueError(f"Couldn't categorize '{rel_path}'")
-    config.hook.pytest_deselected(items=deselected)
-    for d in deselected:
-        items.remove(d)
+            assert "/" not in rel_path[6:], f"Couldn't categorize '{rel_path}'"
+            item.add_marker("unit")
+            if marker_expr != 'unit' and marker_expr != '':
+                deselected.append(item)
+
+    # kill everything that wasn't grabbed
+    for item in deselected:
+        items.remove(item)
+
+    if 'CIRCLE_NODE_TOTAL' in os.environ:
+        # circleci, split up the parallelism by classes
+        class_mapping = collections.defaultdict(list)
+        for item in items:
+            class_name = get_class_name(item)
+            class_mapping[class_name].append(item)
+
+        filtered_tests = filter_tests_with_circleci(class_mapping.keys())
+        new_items = []
+        for name in filtered_tests:
+            new_items.extend(class_mapping[name])
+            items[:] = new_items
 
 
 def pytest_sessionfinish(session, exitstatus):
@@ -67,5 +118,5 @@ def pytest_sessionfinish(session, exitstatus):
     This can sometimes happen due to the way we distribute tests across multiple circle
     nodes.
     """
-    if exitstatus == ExitCode.NO_TESTS_COLLECTED:
-        session.exitstatus = ExitCode.OK
+    if exitstatus == pytest.ExitCode.NO_TESTS_COLLECTED:
+        session.exitstatus = pytest.ExitCode.OK
diff --git a/parlai/zoo/unittest/build.py b/parlai/zoo/unittest/build.py
@@ -8,17 +8,24 @@
 Pretrained models used by unit tests.
 """
 
-from parlai.core.build_data import download_models
+import os
+from parlai.core.build_data import download_models, built, get_model_dir
 
 
 def download(datapath):
     opt = {'datapath': datapath}
+    model_name = 'unittest'
+    mdir = os.path.join(get_model_dir(datapath), model_name)
+    version = 'v6.1'
     model_filenames = [
         'seq2seq.tar.gz',
         'transformer_ranker.tar.gz',
         'transformer_generator2.tar.gz',
         'memnn.tar.gz',
         'apex_v1.tar.gz',
         'test_bytelevel_bpe_v2.tar.gz',
+        'beam_blocking1.tar.gz',
+        'context_blocking1.tar.gz',
     ]
-    download_models(opt, model_filenames, 'unittest', version='v5.0')
+    if not built(mdir, version):
+        download_models(opt, model_filenames, model_name, version=version)
diff --git a/tests/test_tra.py b/tests/test_tra.py
@@ -31,10 +31,12 @@ def _get_args(self):
         return dict(
             task='integration_tests:candidate',
             optimizer='adamax',
+            candidates='batch',
             learningrate=7e-3,
             batchsize=16,
-            embedding_size=32,
+            embedding_size=16,
             num_epochs=4,
+            gradient_clip=0.0,
         )
 
     def _get_threshold(self):
@@ -149,13 +151,7 @@ def test_eval_vocab(self):
 class TestTransformerRanker(_AbstractTRATest):
     def _get_args(self):
         args = super()._get_args()
-        new_args = dict(
-            model='transformer/ranker',
-            n_layers=1,
-            n_heads=4,
-            ffn_size=64,
-            gradient_clip=0.5,
-        )
+        new_args = dict(model='transformer/ranker', n_layers=1, n_heads=4, ffn_size=32,)
         for k, v in new_args.items():
             args[k] = v
         return args
@@ -176,11 +172,7 @@ class TestPolyRanker(_AbstractTRATest):
     def _get_args(self):
         args = super()._get_args()
         new_args = dict(
-            model='transformer/polyencoder',
-            n_layers=1,
-            n_heads=4,
-            ffn_size=64,
-            gradient_clip=0.5,
+            model='transformer/polyencoder', n_layers=1, n_heads=4, ffn_size=32,
         )
         for k, v in new_args.items():
             args[k] = v
@@ -228,7 +220,7 @@ def test_eval_fixed_label_not_in_cands(self):
                 testing_utils.eval_model(args, skip_valid=True)
 
             args['add_label_to_fixed_cands'] = True
-            valid, test = testing_utils.eval_model(args, skip_valid=True)
+            _, test = testing_utils.eval_model(args, skip_valid=True)
             self.assertGreaterEqual(test['hits@100'], 0.0)