Add tests

bwconrad · bwconrad · commit 5af5f03a154d · 2023-08-18T17:10:45.000-03:00
diff --git a/requirements.txt b/requirements.txt
@@ -1,2 +1,4 @@
+einops==0.6.1
+pytest==7.4.0
 timm==0.9.2
 torch==2.0.1
diff --git a/tests/__init__.py b/tests/__init__.py
diff --git a/tests/test_softmoe.py b/tests/test_softmoe.py
@@ -0,0 +1,110 @@
+import random
+from functools import partial
+
+import pytest
+import torch
+from einops import rearrange
+from timm.models.vision_transformer import Attention
+from torch import nn
+
+from soft_moe_pytorch import SoftMoELayerWrapper
+from soft_moe_pytorch.soft_moe import softmax
+
+
+def test_softmax():
+    """
+    Test between custom multi-dim softmax and naive impl.
+    """
+    for _ in range(20):
+        # Single-dim
+        x = torch.randn(2, 10, 10)
+        y1 = softmax(x, dim=-1)
+        y2 = torch.softmax(x, dim=-1)
+        assert y1.size() == y2.size()
+        assert torch.all(torch.isclose(y1, y2))
+
+        # Multi-dim
+        x = torch.randn(2, 10, 10, 10)
+        y1 = softmax(x, dim=(2, 3))
+        y2 = rearrange(
+            x.flatten(start_dim=2).softmax(dim=-1), "b m (n p) -> b m n p", n=10
+        )
+        assert y1.size() == y2.size()
+        assert torch.all(torch.isclose(y1, y2))
+
+
+def test_soft_moe_layer_forward():
+    """
+    Test forward with different layers
+    """
+    for num_experts in [1, 4]:
+        for slots_per_experts in [1, 2]:
+            for dim in [16, 128]:
+                f = SoftMoELayerWrapper(
+                    dim=dim,
+                    slots_per_expert=slots_per_experts,
+                    num_experts=num_experts,
+                    layer=nn.Linear,
+                    in_features=dim,
+                    out_features=32,
+                )
+                n = random.randint(1, 128)
+                inp = torch.randn(1, n, dim)
+                out = f(inp)
+                assert list(out.shape) == [1, n, 32]
+                assert not torch.isnan(out).any(), "Output included NaNs"
+
+    for num_experts in [1, 4]:
+        for slots_per_experts in [1, 2]:
+            for dim in [16, 128]:
+                f = SoftMoELayerWrapper(
+                    dim=dim,
+                    slots_per_expert=slots_per_experts,
+                    num_experts=num_experts,
+                    layer=partial(Attention, dim=dim),
+                )
+                n = random.randint(1, 128)
+                inp = torch.randn(1, n, dim)
+                out = f(inp)
+                assert list(out.shape) == [1, n, dim]
+                assert not torch.isnan(out).any(), "Output included NaNs"
+
+
+def test_soft_moe_layer_input_wrong_features_channels():
+    """
+    Test for error when input has wrong feature dim
+    """
+    f = SoftMoELayerWrapper(
+        dim=128,
+        slots_per_expert=1,
+        num_experts=16,
+        layer=nn.Linear,
+        in_features=128,
+        out_features=32,
+    )
+
+    with pytest.raises(AssertionError):
+        inp = torch.randn(1, 16, 64)
+        f(inp)
+
+
+def test_soft_moe_layer_input_wrong_dim():
+    """
+    Test for error when input is not 3-dim
+    """
+    f = SoftMoELayerWrapper(
+        dim=128,
+        slots_per_expert=1,
+        num_experts=16,
+        layer=nn.Linear,
+        in_features=128,
+        out_features=32,
+    )
+
+    with pytest.raises(AssertionError):
+        inp = torch.randn(1, 16, 64, 64)
+        f(inp)
+
+    with pytest.raises(AssertionError):
+        inp = torch.randn(1, 16)
+        f(inp)
diff --git a/tests/test_vision_transformer.py b/tests/test_vision_transformer.py
@@ -0,0 +1,91 @@
+import pytest
+import torch
+
+from soft_moe_pytorch import (soft_moe_vit_base, soft_moe_vit_huge,
+                              soft_moe_vit_large, soft_moe_vit_small,
+                              soft_moe_vit_tiny)
+
+
+@pytest.mark.parametrize(
+    "model",
+    [soft_moe_vit_tiny],
+    # [soft_moe_vit_tiny, soft_moe_vit_small, soft_moe_vit_base, soft_moe_vit_large, soft_moe_vit_huge],
+)
+def test_soft_moe_vit_forward(model):
+    """
+    Test network forward pass
+    """
+    for image_size in [128, 224]:
+        for in_chans in [1, 3]:
+            net = model(
+                img_size=image_size,
+                in_chans=in_chans,
+                num_classes=10,
+            )
+            net.eval()
+
+            inp = torch.randn(1, in_chans, image_size, image_size)
+            out = net(inp)
+
+            assert out.shape[0] == 1
+            assert not torch.isnan(out).any(), "Output included NaNs"
+
+
+@pytest.mark.parametrize(
+    "model",
+    [soft_moe_vit_tiny],
+    # [soft_moe_vit_tiny, soft_moe_vit_small, soft_moe_vit_base, soft_moe_vit_large, soft_moe_vit_huge],
+)
+def test_soft_moe_vit_backward(model):
+    """
+    Test network backward pass
+    """
+    image_size = 224
+    num_classes = 10
+
+    net = model(img_size=image_size, num_classes=num_classes)
+    num_params = sum([x.numel() for x in net.parameters()])
+    net.train()
+
+    inp = torch.randn(1, 3, image_size, image_size)
+    out = net(inp)
+
+    out.mean().backward()
+    for n, x in net.named_parameters():
+        assert x.grad is not None, f"No gradient for {n}"
+    num_grad = sum([x.grad.numel() for x in net.parameters() if x.grad is not None])
+
+    assert out.shape[-1] == num_classes
+    assert num_params == num_grad, "Some parameters are missing gradients"
+    assert not torch.isnan(out).any(), "Output included NaNs"
+
+
+@pytest.mark.parametrize(
+    "model",
+    [soft_moe_vit_tiny],
+    # [soft_moe_vit_tiny, soft_moe_vit_small, soft_moe_vit_base, soft_moe_vit_large, soft_moe_vit_huge],
+)
+def test_soft_moe_vit_forward_num_experts(model):
+    """
+    Test network soft-moe arguments
+    """
+    image_size = 224
+    in_chans = 3
+    for num_experts in [1, 4]:
+        for slots_per_experts in [1, 2]:
+            for moe_layer_index in [6, [0, 2, 10]]:
+                net = model(
+                    img_size=image_size,
+                    in_chans=in_chans,
+                    num_classes=10,
+                    num_experts=num_experts,
+                    slots_per_expert=slots_per_experts,
+                    moe_layer_index=moe_layer_index,
+                )
+                net.eval()
+
+                inp = torch.randn(1, in_chans, image_size, image_size)
+                out = net(inp)
+
+                assert out.shape[0] == 1
+                assert not torch.isnan(out).any(), "Output included NaNs"