diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 6e4009c..094e0fb 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -77,6 +77,7 @@ jobs: push: true target: production tags: ${{ steps.meta.outputs.tags }} + build-args: LUSTRE_VERSION=cray-2.15.B19 - name: "Build the final Docker image in nektos/act" # The docker/build-push-action does actions/checkout, and that can't be @@ -152,6 +153,7 @@ jobs: push: true target: debug tags: ${{ steps.meta.outputs.tags }} + build-args: LUSTRE_VERSION=cray-2.15.B19 create_release: needs: production diff --git a/Dockerfile b/Dockerfile index bf60d94..e979063 100644 --- a/Dockerfile +++ b/Dockerfile @@ -18,8 +18,7 @@ # These ARGs must be before the first FROM. This allows them to be valid for # use in FROM instructions. ARG MPI_OPERATOR_VERSION=0.6.0 -# See https://www.open-mpi.org/software/ompi/v4.1/ for releases and their -# checkums. +# See https://www.open-mpi.org/software/ompi/v4.1/ for releases and their checksums. ARG OPENMPI_VERSION=4.1.7 ARG OPENMPI_MD5=787d2bc8b3db336db97c34236934b3df @@ -27,17 +26,35 @@ FROM mpioperator/openmpi-builder:v$MPI_OPERATOR_VERSION AS builder ARG OPENMPI_VERSION ARG OPENMPI_MD5 +ARG LUSTRE_VERSION ENV OPENMPI_VERSION=$OPENMPI_VERSION ENV OPENMPI_MD5=$OPENMPI_MD5 +ENV LUSTRE_VERSION=$LUSTRE_VERSION RUN apt-get update && apt-get install -y \ ca-certificates \ wget tar make gcc cmake perl libbz2-dev pkg-config openssl libssl-dev libcap-dev \ git libattr1-dev \ openssh-server openssh-client \ + linux-headers-generic \ + libtool libyaml-dev ed libreadline-dev libsnmp-dev mpi-default-dev libselinux-dev libncurses5-dev libncurses-dev \ + bison flex gnupg libelf-dev gcc libssl-dev bc wget bzip2 build-essential udev kmod cpio module-assistant \ + libmount-dev libnl-genl-3-dev \ && rm -rf /var/lib/apt/lists/* \ && update-ca-certificates +# Build lustre to include in mpifileutils +COPY build_lustre.sh /build_lustre.sh +RUN /build_lustre.sh ${LUSTRE_VERSION} + +# Create MPI File Utils dependencies directory +RUN mkdir -p /deps /mfu +WORKDIR /deps + +# Stash the lustre libraries to make it easier to copy out in later stages +RUN mkdir -p /deps/lustre/lib \ + && cp -r /usr/lib/*lustre* /deps/lustre/lib/ + # Remove the OS binary of openmpi and build from source RUN apt-get remove -y openmpi-bin RUN wget --no-check-certificate https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-$OPENMPI_VERSION.tar.gz @@ -45,36 +62,34 @@ RUN [ $(md5sum openmpi-$OPENMPI_VERSION.tar.gz | awk '{print $1}') = "$OPENMPI_M RUN gunzip -c openmpi-$OPENMPI_VERSION.tar.gz | tar xf - \ && cd openmpi-$OPENMPI_VERSION \ && ./configure --prefix=/opt/openmpi-$OPENMPI_VERSION \ - && make all install + && make -j $(nproc) all \ + && make install RUN cp -r /opt/openmpi-$OPENMPI_VERSION/* /usr # Build and install MPI File Utils and all dependencies -RUN mkdir -p /deps /mfu -WORKDIR /deps - RUN wget https://github.com/hpc/libcircle/releases/download/v0.3/libcircle-0.3.0.tar.gz \ && tar xfz libcircle-0.3.0.tar.gz \ && cd libcircle-0.3.0 \ && ./configure --prefix=/deps/libcircle/lib \ - && make install + && make -j $(nproc) install RUN wget https://github.com/libarchive/libarchive/releases/download/v3.5.3/libarchive-3.5.3.tar.gz \ && tar xfz libarchive-3.5.3.tar.gz \ && cd libarchive-3.5.3 \ && ./configure --prefix=/deps/libarchive/lib \ - && make install + && make -j $(nproc) install RUN wget https://github.com/llnl/lwgrp/releases/download/v1.0.3/lwgrp-1.0.3.tar.gz \ && tar xfz lwgrp-1.0.3.tar.gz \ && cd lwgrp-1.0.3 \ && ./configure --prefix=/deps/lwgrp/lib \ - && make install + && make -j $(nproc) install RUN wget https://github.com/llnl/dtcmp/releases/download/v1.1.1/dtcmp-1.1.1.tar.gz \ && tar xfz dtcmp-1.1.1.tar.gz \ && cd dtcmp-1.1.1 \ && ./configure --prefix=/deps/dtcmp/lib --with-lwgrp=/deps/lwgrp/lib \ - && make install + && make -j $(nproc) install # Until a new release of mpifileutils is cut, we need to use a tagged commit on # our fork to include our dcp changes (i.e. --uid, --gid) @@ -86,6 +101,7 @@ RUN git clone --depth 1 https://github.com/nearnodeflash/mpifileutils.git --bran -DWITH_DTCMP_PREFIX=/deps/dtcmp/lib \ -DWITH_LibArchive_PREFIX=/deps/libarchive/lib \ -DCMAKE_INSTALL_PREFIX=/mfu \ + -DENABLE_LUSTRE=ON \ && make install @@ -104,6 +120,7 @@ RUN cd build \ -DWITH_DTCMP_PREFIX=/deps/dtcmp/lib \ -DWITH_LibArchive_PREFIX=/deps/libarchive/lib \ -DCMAKE_INSTALL_PREFIX=/mfu \ + -DENABLE_LUSTRE=ON \ -DCMAKE_BUILD_TYPE=Debug \ && make install @@ -113,7 +130,8 @@ RUN [ $(md5sum openmpi-$OPENMPI_VERSION.tar.gz | awk '{print $1}') = "$OPENMPI_M RUN gunzip -c openmpi-$OPENMPI_VERSION.tar.gz | tar xf - \ && cd openmpi-$OPENMPI_VERSION \ && ./configure --prefix=/opt/openmpi-$OPENMPI_VERSION-debug --enable-debug \ - && make all install + && make -j $(nproc) all \ + && make install ############################################################################### FROM mpioperator/openmpi:v$MPI_OPERATOR_VERSION AS production @@ -129,12 +147,19 @@ COPY --from=builder /deps/libarchive/lib/ /usr COPY --from=builder /deps/lwgrp/lib/ /usr COPY --from=builder /deps/dtcmp/lib/ /usr +COPY --from=builder /deps/lustre/lib/ /usr/lib + COPY --from=builder /mfu/ /usr RUN apt-get remove -y openmpi-bin COPY --from=builder /opt/openmpi-$OPENMPI_VERSION /opt/openmpi-$OPENMPI_VERSION RUN cp -r /opt/openmpi-$OPENMPI_VERSION/* /usr && rm -rf /openmpi* +# libreadline8 is necessary for dcp with lustre support +RUN apt-get update && apt-get install -y \ + libreadline8 \ + && rm -rf /var/lib/apt/lists/* + # Remove timezone configuration so we can inherit from host RUN rm -rf /etc/timezone && rm -rf /etc/localtime diff --git a/Makefile b/Makefile index e786eef..e8fc5d5 100644 --- a/Makefile +++ b/Makefile @@ -18,24 +18,40 @@ # NOTE: git-version-gen will generate a value for VERSION, unless you override it. IMAGE_TAG_BASE ?= ghcr.io/nearnodeflash/nnf-mfu +# CONTAINER_TOOL defines the container tool to be used for building images. +# Be aware that the target commands are only tested with Docker which is +# scaffolded by default. However, you might want to replace it to use other +# tools. (i.e. podman) +CONTAINER_TOOL ?= docker + +# Which tag to pull from the cray lustre source code. +# **Note:** This must also be set in .github/workflows/main.yaml for both +# production and debug docker builds. +CRAY_LUSTRE_VERSION ?= cray-2.15.B19 + +# Only support linux/amd64 builds. This reduces complexity with installing +# lustre, which is an option enabled for mpifileutils. +PLATFORM ?= linux/amd64 + docker-build: VERSION ?= $(shell cat .version) docker-build: TARGET ?= production docker-build: .version - docker build --target $(TARGET) -t $(IMAGE_TAG_BASE):$(VERSION) . + ${CONTAINER_TOOL} build --platform=$(PLATFORM) --target $(TARGET) --build-arg LUSTRE_VERSION=${CRAY_LUSTRE_VERSION} -t $(IMAGE_TAG_BASE):$(VERSION) . docker-build-debug: VERSION ?= $(shell cat .version) +docker-build-debug: TARGET ?= debug docker-build-debug: IMAGE_TAG_BASE := $(IMAGE_TAG_BASE)-debug docker-build-debug: .version - docker build --target debug -t $(IMAGE_TAG_BASE):$(VERSION) . + ${CONTAINER_TOOL} build --platform=$(PLATFORM) --target $(TARGET) --build-arg LUSTRE_VERSION=${CRAY_LUSTRE_VERSION} -t $(IMAGE_TAG_BASE):$(VERSION) . docker-push: VERSION ?= $(shell cat .version) docker-push: .version - docker push $(IMAGE_TAG_BASE):$(VERSION) + ${CONTAINER_TOOL} push $(IMAGE_TAG_BASE):$(VERSION) docker-push-debug: VERSION ?= $(shell cat .version) docker-push-debug: IMAGE_TAG_BASE := $(IMAGE_TAG_BASE)-debug docker-push-debug: .version - docker push $(IMAGE_TAG_BASE):$(VERSION) + ${CONTAINER_TOOL} push $(IMAGE_TAG_BASE):$(VERSION) kind-push: VERSION ?= $(shell cat .version) kind-push: .version @@ -54,4 +70,3 @@ kind-push-debug: .version clean: rm -f .version - diff --git a/build_lustre.sh b/build_lustre.sh new file mode 100755 index 0000000..95b6768 --- /dev/null +++ b/build_lustre.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +set -ex + +if [[ $1 == "" ]]; then + echo "Usage: $0 " + exit 1 +fi +CRAY_LUSTRE_VERSION=$1 + +# Determine kernel version and set variables +ls -l /usr/src/ +KERNEL_FLAVOR=$(ls /lib/modules | head -1 | tr '-' '\n' | tail -1) +KERNEL_BASE_VER=$(ls /lib/modules | head -1 | grep -oP '\d+\.\d+\.\d+-\d+') +# LINUX_DIR=$(ls -d -1 /usr/src/linux-headers-${KERNEL_BASE_VER}) +LINUX_DIR=$(ls -d -1 /usr/src/linux-headers-${KERNEL_BASE_VER}-${KERNEL_FLAVOR}) + +git clone --depth=1 https://github.com/Cray/lustre.git +cd lustre +git checkout $CRAY_LUSTRE_VERSION +sh autogen.sh +./configure --disable-server --enable-client --disable-tests --enable-mpitests=no \ + --disable-gss-keyring --enable-gss=no \ + --with-linux=${LINUX_DIR} +make -j "$(nproc || true)" +make install