Skip to content

Commit

Permalink
build: Improve caching
Browse files Browse the repository at this point in the history
Signed-off-by: Oliver Koenig <[email protected]>
  • Loading branch information
ko3n1g committed Sep 22, 2024
1 parent 61c351f commit 7f809cb
Showing 1 changed file with 14 additions and 6 deletions.
20 changes: 14 additions & 6 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,8 +1,3 @@
ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:24.03-py3

FROM ${BASE_IMAGE}


# Number of parallel threads for compute heavy build jobs
# if you get errors building TE or Apex, decrease this to 4
ARG MAX_JOBS=8
Expand All @@ -15,12 +10,19 @@ ARG ALIGNER_COMMIT=main
ARG TRTLLM_VERSION=v0.10.0
ARG PROTOBUF_VERSION=4.24.4

ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:24.03-py3

FROM ${BASE_IMAGE}

ARG MAX_JOBS

# needed in case git complains that it can't detect a valid email, this email is fake but works
RUN git config --global user.email "[email protected]"

WORKDIR /opt

# install TransformerEngine
ARG TE_TAG
RUN pip uninstall -y transformer-engine && \
git clone https://github.com/NVIDIA/TransformerEngine.git && \
cd TransformerEngine && \
Expand All @@ -32,6 +34,7 @@ RUN pip uninstall -y transformer-engine && \
NVTE_FRAMEWORK=pytorch NVTE_WITH_USERBUFFERS=1 MPI_HOME=/usr/local/mpi pip install .

# install latest apex
ARG APEX_TAG
RUN pip uninstall -y apex && \
git clone https://github.com/NVIDIA/apex && \
cd apex && \
Expand All @@ -42,11 +45,14 @@ RUN pip uninstall -y apex && \
pip install -v --no-build-isolation --disable-pip-version-check --no-cache-dir --config-settings "--build-option=--cpp_ext --cuda_ext --fast_layer_norm --distributed_adam --deprecated_fused_adam" ./

# place any util pkgs here
ARG PYTRITON_VERSION
RUN pip install --upgrade-strategy only-if-needed nvidia-pytriton==$PYTRITON_VERSION
ARG PROTOBUF_VERSION
RUN pip install -U --no-deps protobuf==$PROTOBUF_VERSION
RUN pip install --upgrade-strategy only-if-needed jsonlines

# NeMo
ARG NEMO_TAG
RUN git clone https://github.com/NVIDIA/NeMo.git && \
cd NeMo && \
git pull && \
Expand All @@ -59,6 +65,7 @@ RUN git clone https://github.com/NVIDIA/NeMo.git && \
cd nemo/collections/nlp/data/language_modeling/megatron && make

# MLM
ARG MLM_TAG
RUN pip uninstall -y megatron-core && \
git clone https://github.com/NVIDIA/Megatron-LM.git && \
cd Megatron-LM && \
Expand All @@ -70,7 +77,7 @@ RUN pip uninstall -y megatron-core && \
pip install -e .

# NeMo Aligner
ARG APEX_TAG=59b80ee8df79cec125794949327f29913c328746
ARG ALIGNER_COMMIT
RUN git clone https://github.com/NVIDIA/NeMo-Aligner.git && \
cd NeMo-Aligner && \
git pull && \
Expand All @@ -86,6 +93,7 @@ RUN curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.d
git lfs install

# TRTLLM
ARG TRTLLM_VERSION
RUN git clone https://github.com/NVIDIA/TensorRT-LLM.git && \
cd TensorRT-LLM && \
git checkout ${TRTLLM_VERSION} && \
Expand Down

0 comments on commit 7f809cb

Please sign in to comment.