Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Switch tegra build to CUDA 12 #5779

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmake/CUDA_utils.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ endfunction()
# List of currently used arch values
if (${ARCH} MATCHES "aarch64-")
# aarch64-linux
set(CUDA_known_archs "53" "62" "72" "75" "87")
set(CUDA_known_archs "53" "62" "72" "75" "87" "90a")
elseif (${ARCH} MATCHES "aarch64")
# aarch64 SBSA, only >=Volta
# from the whole list/; "70" "75" "80" "86"
Expand Down
2 changes: 1 addition & 1 deletion dali/pipeline/operator/builtin/input_operator.h
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,7 @@ class InputOperator : public Operator<Backend>, virtual public BatchSizeProvider
// data from CPU to GPU. As we keep the order in tl_data_ as internal_copy_stream_, we will use
// an actual stream for running and synchronizing with the copy. Note that the Copy can be truly
// asynchronous if it comes from pinned memory or happens on a device with integrated memory
// (like Xavier) where CPU and GPU share the same memory.
// (like Tegra) where CPU and GPU share the same memory.
if (!order.is_device()) {
order = tl_elm->data.order();
}
Expand Down
55 changes: 30 additions & 25 deletions docker/Dockerfile.build.aarch64-linux
Original file line number Diff line number Diff line change
@@ -1,15 +1,20 @@
ARG AARCH64_BASE_IMAGE=nvidia/cuda:11.8.0-devel-ubuntu20.04
ARG AARCH64_BASE_IMAGE=nvidia/cuda:12.6.3-devel-ubuntu22.04
FROM ${AARCH64_BASE_IMAGE}

ENV DEBIAN_FRONTEND=noninteractive \
CUDA_CROSS_VERSION=11-8 \
CUDA_CROSS_VERSION_DOT=11.8
CUDA_CROSS_VERSION=12-6 \
CUDA_CROSS_VERSION_DOT=12.6

RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub && \
RUN apt-get update && apt-get install -y gnupg ca-certificates wget && \
apt-key del 7fa2af80 && \
gpg --keyserver keyserver.ubuntu.com --recv-keys A4B469963BF863CC && \
gpg --export --armor A4B469963BF863CC | tee /etc/apt/trusted.gpg.d/nvidia.asc && \
apt-get update && apt-get install software-properties-common -y --no-install-recommends && \
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb && \
dpkg -i cuda-keyring_1.1-1_all.deb && \
add-apt-repository 'deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/cross-linux-aarch64/ /' -y && \
add-apt-repository ppa:deadsnakes/ppa -y && \
apt-get update && apt-get install -y --no-install-recommends \
ca-certificates \
libssl-dev \
curl \
wget \
Expand All @@ -31,27 +36,22 @@ RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/
python3.10 python3.10-dev \
python3.11 python3.11-dev \
python3.12 python3.12-dev \
python3.13 python3.13-dev && \
apt-key adv --fetch-key http://repo.download.nvidia.com/jetson/jetson-ota-public.asc && \
add-apt-repository 'deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/cross-linux-aarch64/ /' && \
apt-get update && \
apt-get install -y cuda-cudart-cross-aarch64-${CUDA_CROSS_VERSION} \
libcufft-cross-aarch64-${CUDA_CROSS_VERSION} \
libcurand-cross-aarch64-${CUDA_CROSS_VERSION} \
libcusolver-cross-aarch64-${CUDA_CROSS_VERSION} \
libcublas-cross-aarch64-${CUDA_CROSS_VERSION} \
cuda-driver-cross-aarch64-${CUDA_CROSS_VERSION} \
cuda-cccl-cross-aarch64-${CUDA_CROSS_VERSION} \
cuda-nvcc-cross-aarch64-${CUDA_CROSS_VERSION} \
libnpp-cross-aarch64-${CUDA_CROSS_VERSION} \
&& \
python3.13 python3.13-dev \
cuda-cudart-cross-aarch64-${CUDA_CROSS_VERSION} \
libcufft-cross-aarch64-${CUDA_CROSS_VERSION} \
libcurand-cross-aarch64-${CUDA_CROSS_VERSION} \
libcusolver-cross-aarch64-${CUDA_CROSS_VERSION} \
libcublas-cross-aarch64-${CUDA_CROSS_VERSION} \
cuda-driver-cross-aarch64-${CUDA_CROSS_VERSION} \
cuda-cccl-cross-aarch64-${CUDA_CROSS_VERSION} \
cuda-nvcc-cross-aarch64-${CUDA_CROSS_VERSION} \
libnpp-cross-aarch64-${CUDA_CROSS_VERSION} \
libnvjpeg-cross-aarch64-${CUDA_CROSS_VERSION} && \
rm -rf /var/lib/apt/lists/* && \
PYTHON_VER=$(python3 -c "import sys;print(f'{sys.version_info[0]}{sys.version_info[1]}')") && \
if [ "${PYTHON_VER}" = "36" ]; then \
curl -O https://bootstrap.pypa.io/pip/3.6/get-pip.py; \
else \
curl -O https://bootstrap.pypa.io/get-pip.py; \
fi && python3 get-pip.py && rm get-pip.py && \
curl -O https://bootstrap.pypa.io/get-pip.py && \
python3 get-pip.py && \
rm get-pip.py && \
# decouple libclang and clang installation so libclang changes are not overriden by clang
pip install clang==14.0 && pip install libclang==14.0.1 flake8 bandit "black[jupyter]"==24.8.0 && \
rm -rf /root/.cache/pip/ && \
Expand Down Expand Up @@ -108,7 +108,12 @@ RUN export PYVERS="3.8.5 3.9.0 3.10.0 3.11.0 3.12.0 3.13.0" && \
cd / && rm -rf /tmp/Python*; \
done && \
# hack - patch the host pythonX-config to return --extension-suffix for the target
find /usr/ -iname x86_64-linux-gnu-python* -exec sed -i "s/\(SO.*\)\(x86_64\)\(.*\)/\1aarch64\3/" {} \;
for pythonconfig in $(ls /usr/bin/x86_64-linux-gnu-python*-config); do \
sed -i "s/\(SO.*\)\(x86_64\)\(.*\)/\1aarch64\3/" ${pythonconfig}; \
done && \
# avoid fatal error: aarch64-linux-gnu/python3.8/pyconfig.h: No such file or directory
mkdir -p /usr/include/aarch64-linux-gnu/ && \
cp -r /usr/include/x86_64-linux-gnu/python* /usr/include/aarch64-linux-gnu/;

VOLUME /dali

Expand Down
3 changes: 2 additions & 1 deletion docs/support_matrix.rst
Original file line number Diff line number Diff line change
Expand Up @@ -35,5 +35,6 @@ Supported NVIDIA hardware, CUDA, OS, and CUDA driver
| - Turing | | see `enhanced CUDA compatibility guide <https://docs.nvidia.com/deploy/cuda-compatibility/index.html#enhanced-compat-minor-releases>`__ | - RHEL 8 | | | | |
| - Volta | | | - and other `PEP599 - The manylinux2014 Platform Tag <https://www.python.org/dev/peps/pep-0599/>`__ compatible | | | | |
+----------------------------------+---------------+-----------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------+-------------------------+---------------------+--------------------+---------------------------------------------------------------+
| - Xavier | Not Available | 11.8 | Jetpack 5.0.2 | SM 5.3 and later | Jetpack 5.0.2 | Jetpack 5.0.2 | - :ref:`Python wheel can be build from source <jetson build>` |
| - Tegra | Not Available | - 11.8 (Jetpack 5) | - Jetpack 5.0.2 | SM 5.3 and later | Jetpack 5.0.2 | - Jetpack 5.0.2 | - :ref:`Python wheel can be build from source <jetson build>` |
| | | - 12.2 (Jetpack 6) | - Jetpack 6.0 | | Jetpack 6.0 | - Jetpack 6.0 | |
+----------------------------------+---------------+-----------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------+-------------------------+---------------------+--------------------+---------------------------------------------------------------+
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#!/bin/bash -e

test_py_with_framework() {
# we are not able to easily install this packages in xavier for aarch64 so filter it out
# also there is no nvJPEG on xavier so don't run any test with the ImageDecoder having
# we are not able to easily install this packages in tegra for aarch64 so filter it out
# also there is no nvJPEG on tegra so don't run any test with the ImageDecoder having
# the device explicitly set
EXCLUDE_PACKAGES=(
"scipy"
Expand Down
File renamed without changes.
Loading