-
Notifications
You must be signed in to change notification settings - Fork 459
110 lines (107 loc) · 4.69 KB
/
unittest_ci.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
name: Unit Test CI
on:
push:
branches:
- nightly
workflow_dispatch:
jobs:
build_test:
strategy:
fail-fast: false
matrix:
include:
- os: linux.g5.12xlarge.nvidia.gpu
python-version: 3.9
python-tag: "py39"
cuda-tag: "cu118"
- os: linux.g5.12xlarge.nvidia.gpu
python-version: 3.9
python-tag: "py39"
cuda-tag: "cu121"
- os: linux.g5.12xlarge.nvidia.gpu
python-version: 3.9
python-tag: "py39"
cuda-tag: "cu124"
- os: linux.g5.12xlarge.nvidia.gpu
python-version: '3.10'
python-tag: "py310"
cuda-tag: "cu118"
- os: linux.g5.12xlarge.nvidia.gpu
python-version: '3.10'
python-tag: "py310"
cuda-tag: "cu121"
- os: linux.g5.12xlarge.nvidia.gpu
python-version: '3.10'
python-tag: "py310"
cuda-tag: "cu124"
- os: linux.g5.12xlarge.nvidia.gpu
python-version: '3.11'
python-tag: "py311"
cuda-tag: "cu118"
- os: linux.g5.12xlarge.nvidia.gpu
python-version: '3.11'
python-tag: "py311"
cuda-tag: "cu121"
- os: linux.g5.12xlarge.nvidia.gpu
python-version: '3.11'
python-tag: "py311"
cuda-tag: "cu124"
- os: linux.g5.12xlarge.nvidia.gpu
python-version: '3.12'
python-tag: "py312"
cuda-tag: "cu118"
- os: linux.g5.12xlarge.nvidia.gpu
python-version: '3.12'
python-tag: "py312"
cuda-tag: "cu121"
- os: linux.g5.12xlarge.nvidia.gpu
python-version: '3.12'
python-tag: "py312"
cuda-tag: "cu124"
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
with:
runner: ${{ matrix.os }}
timeout: 30
script: |
ldd --version
conda create -y --name build_binary python=${{ matrix.python-version }}
conda info
python --version
conda run -n build_binary python --version
conda run -n build_binary \
pip install torch --index-url https://download.pytorch.org/whl/nightly/${{ matrix.cuda-tag }}
conda run -n build_binary \
python -c "import torch"
echo "torch succeeded"
conda run -n build_binary \
python -c "import torch.distributed"
conda run -n build_binary \
pip install fbgemm-gpu --index-url https://download.pytorch.org/whl/nightly/${{ matrix.cuda-tag }}
conda run -n build_binary \
python -c "import fbgemm_gpu"
echo "fbgemm_gpu succeeded"
conda run -n build_binary \
pip install -r requirements.txt
conda run -n build_binary \
python setup.py bdist_wheel \
--python-tag=${{ matrix.python-tag }}
conda run -n build_binary \
python -c "import torchrec"
echo "torch.distributed succeeded"
conda run -n build_binary \
python -c "import numpy"
echo "numpy succeeded"
conda install -n build_binary -y pytest
conda run -n build_binary \
python -m pytest torchrec -v -s -W ignore::pytest.PytestCollectionWarning --continue-on-collection-errors \
--ignore=torchrec/distributed/tests/test_comm.py --ignore=torchrec/distributed/tests/test_infer_shardings.py \
--ignore=torchrec/distributed/tests/test_keyed_jagged_tensor_pool.py --ignore=torchrec/distributed/tests/test_pt2_multiprocess.py \
--ignore=torchrec/distributed/tests/test_pt2.py --ignore=torchrec/distributed/tests/test_quant_model_parallel.py \
--ignore=torchrec/distributed/tests/test_quant_pruning.py --ignore=torchrec/distributed/tests/test_quant_sequence_model_parallel.py \
--ignore-glob='torchrec/metrics/*' --ignore-glob='torchrec/distributed/tests/test_model_parallel_gloo*' \
--ignore-glob='torchrec/inference/inference_legacy/tests*' --ignore-glob='*test_model_parallel_nccl*' \
--ignore=torchrec/distributed/tests/test_cache_prefetch.py --ignore=torchrec/distributed/tests/test_fp_embeddingbag_single_rank.py \
--ignore=torchrec/distributed/tests/test_infer_utils.py --ignore=torchrec/distributed/tests/test_fx_jit.py --ignore-glob=**/test_utils/ \
--ignore-glob='*test_train_pipeline*' --ignore=torchrec/distributed/tests/test_model_parallel_hierarchical.py