-
Notifications
You must be signed in to change notification settings - Fork 2.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
ADLR/megatron-lm!2113 - build: Use multi-stage for parallel builds
- Loading branch information
Showing
2 changed files
with
80 additions
and
112 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,82 +1,54 @@ | ||
# syntax=docker/dockerfile:experimental | ||
# syntax=docker/dockerfile:1.3-labs | ||
|
||
ARG FROM_IMAGE_NAME | ||
FROM $FROM_IMAGE_NAME as main | ||
ENV DEBIAN_FRONTEND=noninteractive | ||
|
||
RUN sed -i -e 's/^APT/# APT/' -e 's/^DPkg/# DPkg/' \ | ||
/etc/apt/apt.conf.d/docker-clean | ||
|
||
RUN apt-get update && \ | ||
apt-get install -y --no-install-recommends gettext && \ | ||
apt-get clean | ||
|
||
RUN wget https://github.com/mikefarah/yq/releases/download/v4.44.1/yq_linux_amd64 -O /usr/local/bin/yq && \ | ||
chmod a+x /usr/local/bin/yq | ||
|
||
##### For Mamba begin ##### | ||
RUN pip uninstall -y triton && \ | ||
pip install triton==2.1.0 | ||
FROM $FROM_IMAGE_NAME as build_causal_conv1d | ||
WORKDIR /opt | ||
RUN CAUSAL_CONV1D_FORCE_BUILD=TRUE pip3 wheel -v git+https://github.com/Dao-AILab/[email protected] | ||
|
||
# The causal-conv1d and mamba-ssm packages below are built from scratch here | ||
# (which takes significant time) because there are no wheels available on PyPI | ||
# for these relatively newer versions of the packages that are compatible with | ||
# the older NGC-variant PyTorch version (e.g. version 2.2.0.dev231106) that we | ||
# are using (in the NGC base container). Generally, if the package is not | ||
# compatible with the PyTorch version, then it will generate a Python import | ||
# error. The package authors tend to only release wheels for new versions of | ||
# these pacakges which are compatible with the versions of regular PyTorch and | ||
# NGC-variant PyTorch that are newer at the time of release. So, to use newer | ||
# versions of these packages with relatively older versions of the NGC PyTorch | ||
# container, we tend to have to build the packages from scratch. | ||
FROM $FROM_IMAGE_NAME as build_grouped_gemm | ||
WORKDIR /opt | ||
RUN pip3 wheel -v git+https://github.com/fanshiqing/[email protected] | ||
|
||
RUN cd /tmp && \ | ||
pip uninstall -y causal-conv1d && \ | ||
git clone https://github.com/Dao-AILab/causal-conv1d.git && \ | ||
cd causal-conv1d && \ | ||
git checkout v1.2.2.post1 && \ | ||
CAUSAL_CONV1D_FORCE_BUILD=TRUE pip install . && \ | ||
cd .. && \ | ||
rm -rf causal-conv1d | ||
FROM $FROM_IMAGE_NAME as build_mamba_ssm | ||
WORKDIR /opt | ||
RUN MAMBA_FORCE_BUILD=TRUE pip3 wheel -v git+https://github.com/state-spaces/[email protected] | ||
|
||
RUN cd /tmp && \ | ||
pip uninstall -y mamba-ssm && \ | ||
git clone https://github.com/state-spaces/mamba.git && \ | ||
cd mamba && \ | ||
git checkout v2.0.3 && \ | ||
MAMBA_FORCE_BUILD=TRUE pip install . && \ | ||
cd .. && \ | ||
rm -rf mamba | ||
##### For Mamba end ##### | ||
|
||
##### For JET-API start ##### | ||
RUN apt-get update && \ | ||
apt-get install -y python3-venv && \ | ||
apt-get clean -y && \ | ||
python -m venv /opt/jet | ||
##### For JET-API end ##### | ||
|
||
RUN pip3 install --no-cache-dir \ | ||
einops \ | ||
flask-restful \ | ||
nltk \ | ||
pytest \ | ||
pytest-cov \ | ||
pytest_mock \ | ||
pytest-random-order \ | ||
sentencepiece \ | ||
wrapt \ | ||
git+https://github.com/fanshiqing/[email protected] \ | ||
zarr \ | ||
tensorstore==0.1.45 \ | ||
wandb | ||
|
||
COPY . /workspace/megatron-lm | ||
|
||
COPY . /workspace/megatron-lm | ||
RUN cp -r /workspace/megatron-lm /opt && \ | ||
pip install /opt/megatron-lm | ||
FROM $FROM_IMAGE_NAME as main | ||
ENV DEBIAN_FRONTEND=noninteractive | ||
|
||
RUN apt-get update && \ | ||
apt-get install -y --no-install-recommends gettext python3-venv && \ | ||
apt-get clean && \ | ||
python -m venv /opt/jet && \ | ||
wget https://github.com/mikefarah/yq/releases/download/v4.44.1/yq_linux_amd64 -O /usr/local/bin/yq && \ | ||
chmod a+x /usr/local/bin/yq | ||
|
||
COPY --from=build_causal_conv1d /opt/causal_conv1d-1.2.2.post1-cp310-cp310-linux_x86_64.whl ./ | ||
COPY --from=build_grouped_gemm /opt/grouped_gemm-1.1.2-cp310-cp310-linux_x86_64.whl ./ | ||
COPY --from=build_mamba_ssm /opt/mamba_ssm-2.0.3-cp310-cp310-linux_x86_64.whl ./ | ||
|
||
RUN pip3 install --no-cache-dir --upgrade-strategy only-if-needed -v \ | ||
einops \ | ||
flask-restful \ | ||
nltk \ | ||
pytest \ | ||
pytest-cov \ | ||
pytest_mock \ | ||
pytest-random-order \ | ||
sentencepiece \ | ||
wrapt \ | ||
zarr \ | ||
wandb \ | ||
triton==2.1.0 \ | ||
causal_conv1d-1.2.2.post1-cp310-cp310-linux_x86_64.whl \ | ||
mamba_ssm-2.0.3-cp310-cp310-linux_x86_64.whl \ | ||
grouped_gemm-1.1.2-cp310-cp310-linux_x86_64.whl \ | ||
tensorstore==0.1.45 && \ | ||
rm *.whl | ||
|
||
# Since megatron does not have any dependencies (and isn't a dependency to any other package), we can install it separately to make everything a bit quicker | ||
COPY . /opt/megatron-lm | ||
RUN pip install /opt/megatron-lm | ||
|
||
##### For NVIDIANS only ##### | ||
FROM main as jet | ||
|