-
Notifications
You must be signed in to change notification settings - Fork 453
215 lines (210 loc) · 7.11 KB
/
release_build.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
name: Push Binary Release
on:
workflow_call:
secrets:
PYPI_TOKEN:
required: false
workflow_dispatch:
jobs:
# build on cpu hosts and upload to GHA
build_on_cpu:
runs-on: ${{ matrix.os }}
strategy:
matrix:
include:
- os: linux.2xlarge
python-version: 3.9
python-tag: "py39"
cuda-tag: "cu124"
- os: linux.2xlarge
python-version: '3.10'
python-tag: "py310"
cuda-tag: "cu124"
- os: linux.2xlarge
python-version: '3.11'
python-tag: "py311"
cuda-tag: "cu124"
- os: linux.2xlarge
python-version: '3.12'
python-tag: "py312"
cuda-tag: "cu124"
steps:
# Checkout the repository to the GitHub Actions runner
- name: Check ldd --version
run: ldd --version
- name: Checkout
uses: actions/checkout@v4
- name: Update pip
run: |
sudo yum update -y
sudo yum -y install git python3-pip
- name: Setup conda
run: |
wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh
bash ~/miniconda.sh -b -p $HOME/miniconda -u
- name: setup Path
run: |
echo "/home/ec2-user/miniconda/bin" >> $GITHUB_PATH
echo "CONDA=/home/ec2-user/miniconda" >> $GITHUB_PATH
- name: create conda env
run: |
conda create --name build_binary python=${{ matrix.python-version }}
conda info
- name: check python version no Conda
run: |
python --version
- name: check python version
run: |
conda run -n build_binary python --version
- name: Install C/C++ compilers
run: |
sudo yum install -y gcc gcc-c++
- name: Install PyTorch and CUDA
shell: bash
run: |
conda run -n build_binary pip install torch
- name: Install fbgemm
shell: bash
run: |
conda run -n build_binary pip install numpy
conda run -n build_binary pip install fbgemm-gpu
- name: Install Dependencies
shell: bash
run: |
conda run -n build_binary python -m pip install -r requirements.txt
- name: Test Installation of dependencies
run: |
conda run -n build_binary python -c "import torch.distributed"
echo "torch.distributed succeeded"
conda run -n build_binary python -c "import skbuild"
echo "skbuild succeeded"
conda run -n build_binary python -c "import numpy"
echo "numpy succeeded"
# for the conda run with quotes, we have to use "\" and double quotes
# here is the issue: https://github.com/conda/conda/issues/10972
- name: Build TorchRec
env:
OFFICIAL_RELEASE: 1
run: |
rm -r dist || true
conda run -n build_binary \
python setup.py bdist_wheel \
--python-tag=${{ matrix.python-tag }}
- name: Upload wheel as GHA artifact
uses: actions/upload-artifact@v4
with:
name: torchrec_${{ matrix.python-version }}_${{ matrix.cuda-tag }}.whl
path: dist/torchrec-*.whl
# download from GHA, sanity check on gpu and push to pypi
sanity_check_on_gpu_and_push:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [linux.g5.12xlarge.nvidia.gpu]
python-version: [3.9, "3.10", "3.11", "3.12"]
cuda-tag: ["cu124"]
needs: build_on_cpu
# the glibc version should match the version of the one we used to build the binary
# for this case, it's 2.26
steps:
- name: Check ldd --version
run: ldd --version
- name: check cpu info
shell: bash
run: |
cat /proc/cpuinfo
- name: check distribution info
shell: bash
run: |
cat /proc/version
- name: Display EC2 information
shell: bash
run: |
set -euo pipefail
function get_ec2_metadata() {
# Pulled from instance metadata endpoint for EC2
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
category=$1
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
}
echo "ami-id: $(get_ec2_metadata ami-id)"
echo "instance-id: $(get_ec2_metadata instance-id)"
echo "instance-type: $(get_ec2_metadata instance-type)"
- name: check gpu info
shell: bash
run: |
sudo yum install lshw -y
sudo lshw -C display
# Checkout the repository to the GitHub Actions runner
- name: Checkout
uses: actions/checkout@v4
- name: Update pip
run: |
sudo yum update -y
sudo yum -y install git python3-pip
- name: Setup conda
run: |
wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh
bash ~/miniconda.sh -b -p $HOME/miniconda
- name: setup Path
run: |
echo "/home/ec2-user/miniconda/bin" >> $GITHUB_PATH
echo "CONDA=/home/ec2-user/miniconda" >> $GITHUB_PATH
- name: create conda env
run: |
conda create --name build_binary python=${{ matrix.python-version }}
conda info
- name: check python version no Conda
run: |
python --version
- name: check python version
run: |
conda run -n build_binary python --version
- name: Install C/C++ compilers
run: |
sudo yum install -y gcc gcc-c++
- name: Install PyTorch and CUDA
shell: bash
run: |
conda run -n build_binary pip install torch
# download wheel from GHA
- name: Install fbgemm
shell: bash
run: |
conda run -n build_binary pip install numpy
conda run -n build_binary pip install fbgemm-gpu
- name: Install torchmetrics
shell: bash
run: |
conda run -n build_binary pip install torchmetrics==1.0.3
- name: Download wheel
uses: actions/download-artifact@v4
with:
name: torchrec_${{ matrix.python-version }}_${{ matrix.cuda-tag }}.whl
- name: Display structure of downloaded files
run: ls -R
- name: Install TorchRec
run: |
rm -r dist || true
conda run -n build_binary python -m pip install *.whl
- name: Test fbgemm_gpu and torchrec installation
shell: bash
run: |
conda run -n build_binary \
python -c "import fbgemm_gpu"
conda run -n build_binary \
python -c "import torchrec"
# Push to Pypi
- name: Push TorchRec Binary to PYPI
env:
PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
run: |
conda run -n build_binary python -m pip install twine
conda run -n build_binary \
python -m twine upload \
--username __token__ \
--password "$PYPI_TOKEN" \
--skip-existing \
torchrec-*.whl