Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(docker): Optimize GPU support in container startup scripts #15669

Open
wants to merge 51 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
51 commits
Select commit Hold shift + click to select a range
511a92c
feat(drivers/lidar,localization): support lidar fusion and localizati…
Apr 3, 2023
765d277
fix(perception): support single camera and fix lint
qilinhu Apr 6, 2023
5a64cc0
Merge "fix(perception): support single camera and fix lint" into r8.0.0
qilinhu Apr 11, 2023
e8816c8
fix(drivers/lidar): fix compensator tf failed when using system clock
Apr 11, 2023
b3e6220
fix(docker): change python tools install path to user local
daohu527 Apr 13, 2023
9171259
feat(docker): auto set GEOLOC by check_timezone_cn
daohu527 Apr 13, 2023
f762e76
fix(docker): change default lidar model to cnnseg16
daohu527 Apr 14, 2023
f055720
Fix bug:can not select parking space,and change sendparkingroutingreq…
lj0919 Oct 20, 2022
33d9d73
Fix format
lj0919 Oct 20, 2022
06e4235
Fix select parking space bug
lj0919 Oct 21, 2022
a868049
fix(dreamview): remove unused function for deadend junction
lj0919 May 18, 2023
37ee935
fix(dreamview): fix send parking routing request function
lj0919 May 18, 2023
eb84822
fix(dreamview): fix send parking routing request function:add heading
lj0919 May 18, 2023
2bc947f
fix(dreamview): add dreamveiw frontend build output after fix fe code…
lj0919 May 18, 2023
b482443
fix(dreamview): mistake delete get map data code,recover it and add b…
lj0919 May 18, 2023
265fb94
fix(dreamview): add the v2x button in dev_kit_debug mode
macDure May 30, 2023
4e59675
fix(lint): fix the lint error
macDure May 31, 2023
2897b56
feat(tools): add the localization pose dot in control info
macDure Jun 1, 2023
3cf0f2f
fix(canbus): fix the component init
macDure Mar 1, 2023
178cb4a
feat(perception): fix camera template match error
daohu527 Jun 5, 2023
ee22899
fix(planning): move static const to task config
ad-litianjiao Jun 5, 2023
818ad01
Merge branch 'r8.0.0' of ssh://icode.baidu.com:8235/baidu/apollo-bj/a…
ad-litianjiao Jun 6, 2023
c4d74e9
Merge changes Id5a49ebe,I0a4f6707 into r8.0.0
ad-litianjiao Jun 6, 2023
a1fb570
fix(planning): fix speed noise may cause kappa abnormal
ad-litianjiao Jun 13, 2023
8b08be4
feat(planning): improve open space search
ad-litianjiao Jun 13, 2023
093a1a1
fix:(dreamview) change the calculation method of the distance from ad…
fanyueqiao Jun 13, 2023
055ca9c
fix(planning): adjust parking space end buffer by parameter
ad-litianjiao Jun 19, 2023
5bf129c
fix(planning): fix core dump if no last path
ad-litianjiao Jun 19, 2023
00b3886
fix(planning):open space replan only in fallback
ad-litianjiao Jun 19, 2023
5532836
style(planning): fix lint error
ad-litianjiao Jun 19, 2023
eaf598a
fix(control): fix the reverse path remain when full stop
macDure Jun 19, 2023
1492e56
Merge changes I3e09c2bc,Ib530ff30,I045d7c82,Id89f2ada,Ia137d79a, ... …
ad-litianjiao Jun 19, 2023
94ef0d4
Merge "fix(control): fix the reverse path remain when full stop" int…
macDure Jun 20, 2023
c4685b4
feat(dreamview): add the perception model conf copy rule in vehicle data
macDure Jun 20, 2023
74e42aa
fix(planning): fix stitch trajectory empty if position init failed
ad-litianjiao Jun 20, 2023
1e7cd3f
Merge "fix(planning): fix stitch trajectory empty if position init fa…
ad-litianjiao Jun 20, 2023
7d90bbd
fix(dreamview): fix the bug in calculating the distance from obstacle…
fanyueqiao Jun 28, 2023
0b4cbec
fix(bridge): fix buffer overflow bug
Jul 4, 2023
083df0e
fix(cyber):cyber_monitor no message bug fix
hearto1314 Aug 21, 2023
5a7c5d0
Merge pull request #15103 from hearto1314/cyber_bugfix
lykling Aug 21, 2023
3ecbf30
fix(docker): limit amodel version, and check if pip3 exist (#15113)
daohu527 Aug 29, 2023
930c8a5
feat(canbus): rollback canbus framework
daohu527 Mar 19, 2025
8886291
feat(docker): Optimize GPU support in container startup scripts
mang0825 Mar 21, 2025
b768576
feat(audio): Add Torch version compatibility checks for FFT operation…
mang0825 Mar 21, 2025
ac418d2
build(bazel): Migrate localization_msf to external dependency management
mang0825 Mar 21, 2025
1c32b5a
fix(core/inference): Resolve noexcept conflicts and add TensorRT vers…
mang0825 Mar 21, 2025
f45e5e2
fix(build): Resolve ARM64 architecture image path matching issues
mang0825 Mar 21, 2025
6dd763d
feat(build): Add multi-architecture optimized Bazel build configuration
mang0825 Mar 21, 2025
50deb94
feat(docker): Optimize GPU support in container startup scripts
mang0825 Mar 24, 2025
b2a3391
chore(build): enforce lint rules and add noexcept to leakyReLU_plugin
mang0825 Mar 28, 2025
ff5af0c
chore(deps): update grpc dependency to v1.30.0-apollo release
mang0825 Mar 28, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
7 changes: 3 additions & 4 deletions WORKSPACE
Original file line number Diff line number Diff line change
Expand Up @@ -75,12 +75,11 @@ http_archive(
# grpc
http_archive(
name = "com_github_grpc_grpc",
sha256 = "419dba362eaf8f1d36849ceee17c3e2ff8ff12ac666b42d3ff02a164ebe090e9",
sha256 = "2378b608557a4331c6a6a97f89a9257aee2f8e56a095ce6619eea62e288fcfbe",
patches = ["//third_party/absl:grpc.patch"],
strip_prefix = "grpc-1.30.0",
urls = [
"https://apollo-system.cdn.bcebos.com/archive/6.0/v1.30.0.tar.gz",
"https://github.com/grpc/grpc/archive/v1.30.0.tar.gz",
"https://apollo-system.cdn.bcebos.com/archive/8.0/v1.30.0-apollo.tar.gz",
],
)
http_archive(
Expand All @@ -100,4 +99,4 @@ grpc_deps()

load("@com_github_grpc_grpc//bazel:grpc_extra_deps.bzl", "grpc_extra_deps")

grpc_extra_deps()
grpc_extra_deps()
31 changes: 28 additions & 3 deletions cyber/message/protobuf_factory.cc
Original file line number Diff line number Diff line change
Expand Up @@ -47,15 +47,28 @@ bool ProtobufFactory::RegisterMessage(const Descriptor& desc) {
}

bool ProtobufFactory::RegisterMessage(const ProtoDesc& proto_desc) {
FileDescriptorProto file_desc_proto;
file_desc_proto.ParseFromString(proto_desc.desc());

// If the message in this proto file has been registered, return true.
if (FindMessageTypeByFile(file_desc_proto)) {
return true;
}
for (int i = 0; i < proto_desc.dependencies_size(); ++i) {
auto dep = proto_desc.dependencies(i);
if (!RegisterMessage(dep)) {
return false;
}
FileDescriptorProto dep_file_desc_proto;
dep_file_desc_proto.ParseFromString(dep.desc());
const google::protobuf::Descriptor* descriptor =
FindMessageTypeByFile(dep_file_desc_proto);

// If descriptor is found, replace the dependency with registered path.
if (descriptor != nullptr) {
file_desc_proto.set_dependency(i, descriptor->file()->name());
}
}

FileDescriptorProto file_desc_proto;
file_desc_proto.ParseFromString(proto_desc.desc());
return RegisterMessage(file_desc_proto);
}

Expand Down Expand Up @@ -207,6 +220,18 @@ const google::protobuf::ServiceDescriptor* ProtobufFactory::FindServiceByName(
return pool_->FindServiceByName(name);
}

const Descriptor* ProtobufFactory::FindMessageTypeByFile(
const FileDescriptorProto& file_desc_proto) {
const std::string& scope = file_desc_proto.package();
std::string type;
if (file_desc_proto.message_type_size()) {
type = scope + "." + file_desc_proto.message_type(0).name();
}
const google::protobuf::Descriptor* descriptor =
pool_->FindMessageTypeByName(type);
return descriptor;
}

void ErrorCollector::AddError(const std::string& filename,
const std::string& element_name,
const google::protobuf::Message* descriptor,
Expand Down
4 changes: 4 additions & 0 deletions cyber/message/protobuf_factory.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,10 @@ class ProtobufFactory {
google::protobuf::Message* GenerateMessageByType(
const std::string& type) const;

// Find a descriptor by FileDescriptorProto. Returns nullptr if not found.
const Descriptor* FindMessageTypeByFile(
const FileDescriptorProto& file_desc_proto);

// Find a top-level message type by name. Returns nullptr if not found.
const Descriptor* FindMessageTypeByName(const std::string& type) const;

Expand Down
1 change: 0 additions & 1 deletion cyber/setup.bash
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ for entry in "${mainboard_path}" \
done

pathprepend ${bazel_bin_path}/cyber/python/internal PYTHONPATH
# todo(zero): The python version determines the path and needs to be optimized
pathprepend "${PYTHON_INSTALL_PATH}/lib/python${PYTHON_VERSION}/site-packages" PYTHONPATH
pathprepend "${PYTHON_INSTALL_PATH}/bin/" PATH

Expand Down
36 changes: 35 additions & 1 deletion docker/scripts/dev_into.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,44 @@
# limitations under the License.
###############################################################################
DOCKER_USER="${USER}"
DEV_CONTAINER="apollo_dev_${USER}"
DEV_CONTAINER_PREFIX='apollo_dev_'
DEV_CONTAINER="${DEV_CONTAINER_PREFIX}${USER}"

function parse_arguments {
local container_name=''

while [ $# -gt 0 ]; do
local opt="$1"
shift
case "${opt}" in
-n | --name)
container_name="$1"
shift
;;

--user)
export CUSTOM_USER="$1"
shift
;;
esac
done

[[ ! -z "${container_name}" ]] && DEV_CONTAINER="${DEV_CONTAINER_PREFIX}${container_name}"
[[ ! -z "${CUSTOM_USER}" ]] && DOCKER_USER="${CUSTOM_USER}"
}

function restart_stopped_container {
if docker ps -f status=exited -f name="${DEV_CONTAINER}" | grep "${DEV_CONTAINER}"; then
docker start "${DEV_CONTAINER}"
fi
}

xhost +local:root 1>/dev/null 2>&1

parse_arguments "$@"

restart_stopped_container

docker exec \
-u "${DOCKER_USER}" \
-e HISTFILE=/apollo/.dev_bash_hist \
Expand Down
64 changes: 48 additions & 16 deletions docker/scripts/dev_start.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,21 +21,23 @@ source "${CURR_DIR}/docker_base.sh"
CACHE_ROOT_DIR="${APOLLO_ROOT_DIR}/.cache"

DOCKER_REPO="apolloauto/apollo"
DEV_CONTAINER="apollo_dev_${USER}"
DEV_CONTAINER_PREFIX='apollo_dev_'
DEV_CONTAINER="${DEV_CONTAINER_PREFIX}${USER}"
DEV_INSIDE="in-dev-docker"

SUPPORTED_ARCHS=(x86_64 aarch64)
TARGET_ARCH="$(uname -m)"

VERSION_X86_64="dev-x86_64-18.04-20221124_1708"
TESTING_VERSION_X86_64="dev-x86_64-18.04-testing-20210112_0008"

VERSION_AARCH64="dev-aarch64-18.04-20201218_0030"
VERSION_AARCH64="dev-aarch64-20.04-20231024_1054"
USER_VERSION_OPT=

FAST_MODE="y"
FAST_MODE="n"

GEOLOC=
TIMEZONE_CN=(
"Time zone: Asia/Shanghai (CST, +0800)"
)

USE_LOCAL_IMAGE=0
CUSTOM_DIST=
Expand All @@ -47,16 +49,21 @@ USER_SPECIFIED_MAPS=
MAP_VOLUMES_CONF=

# Install python tools
PYTHON_INSTALL_PATH="/opt/apollo/python_tools"
PYTHON_VERSION=$(python3 -c 'import sys; print(".".join(map(str, sys.version_info[0:2])))')
source docker/setup_host/host_env.sh
DEFAULT_PYTHON_TOOLS=(
amodel
amodel~=0.1.0
)

# Model
MODEL_REPOSITORY="https://apollo-pkg-beta.cdn.bcebos.com/perception_model"
DEFAULT_INSTALL_MODEL=(
"${MODEL_REPOSITORY}/cnnseg128_caffe.zip"
"${MODEL_REPOSITORY}/tl_detection_caffe.zip"
"${MODEL_REPOSITORY}/horizontal_caffe.zip"
"${MODEL_REPOSITORY}/quadrate_caffe.zip"
"${MODEL_REPOSITORY}/vertical_caffe.zip"
"${MODEL_REPOSITORY}/darkSCNN_caffe.zip"
"${MODEL_REPOSITORY}/cnnseg16_caffe.zip"
"${MODEL_REPOSITORY}/3d-r4-half_caffe.zip"
)

# Map
Expand All @@ -65,7 +72,7 @@ DEFAULT_MAPS=(
sunnyvale_loop
sunnyvale_with_two_offices
san_mateo
apollo_virutal_map
#apollo_virutal_map
)

DEFAULT_TEST_MAPS=(
Expand Down Expand Up @@ -206,6 +213,18 @@ function check_target_arch() {
exit 1
}

function check_timezone_cn() {
# https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
time_zone=$(timedatectl | grep "Time zone" | xargs)

for tz in "${TIMEZONE_CN[@]}"; do
if [[ "${time_zone}" == "${tz}" ]]; then
GEOLOC="cn"
return 0
fi
done
}

function setup_devices_and_mount_local_volumes() {
local __retval="$1"

Expand Down Expand Up @@ -332,15 +351,20 @@ function install_python_tools() {

for tool in ${DEFAULT_PYTHON_TOOLS[@]}; do
info "Install python tool ${tool} ..."
pip3 install --user "${tool}"
# Use /usr/bin/pip3 because native python is used in the container.
/usr/bin/pip3 install --user "${tool}"
if [ $? -ne 0 ]; then
error "Failed to install ${tool}"
exit 1
fi
done
}

function install_perception_models() {
if [ "$FAST_MODE" == "n" ] || [ "$FAST_MODE" == "no" ]; then
for model_url in ${DEFAULT_INSTALL_MODEL[@]}; do
info "Install model ${model_url} ..."
amodel install "${model_url}"
amodel install "${model_url}" -s
done
else
warning "Skip the model installation, if you need to run the perception module, you can manually install."
Expand All @@ -358,6 +382,8 @@ function main() {
fi

determine_dev_image "${USER_VERSION_OPT}"

[[ -z "${GEOLOC}" ]] && check_timezone_cn
geo_specific_config "${GEOLOC}"

if [[ "${USE_LOCAL_IMAGE}" -gt 0 ]]; then
Expand All @@ -381,11 +407,17 @@ function main() {

mount_map_volumes

info "Installing python tools ..."
install_python_tools
if ! [ -x "$(command -v pip3)" ]; then
warning "Skip install perception models!!! " \
"Need pip3 to install Apollo model management tool!" \
"Try \"sudo apt install python3-pip\" "
else
info "Installing python tools ..."
install_python_tools

info "Installing perception models ..."
install_perception_models
info "Installing perception models ..."
install_perception_models
fi

info "Starting Docker container \"${DEV_CONTAINER}\" ..."

Expand Down
8 changes: 6 additions & 2 deletions docker/scripts/docker_base.sh
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,15 @@ function determine_gpu_use_host() {

local nv_docker_doc="https://github.com/NVIDIA/nvidia-docker/blob/master/README.md"
if [[ "${USE_GPU_HOST}" -eq 1 ]]; then
if [[ -x "$(which nvidia-container-toolkit)" ]]; then
if [[ -x "$(which nvidia-container-toolkit)" || -x "$(which nvidia-container-runtime)" ]]; then
local docker_version
docker_version="$(docker version --format '{{.Server.Version}}')"
if dpkg --compare-versions "${docker_version}" "ge" "19.03"; then
DOCKER_RUN_CMD="docker run --gpus all"
if [[ "${HOST_ARCH}" == "aarch64" ]]; then
DOCKER_RUN_CMD="docker run --runtime nvidia"
else
DOCKER_RUN_CMD="docker run --gpus all"
fi
else
warning "Please upgrade to docker-ce 19.03+ to access GPU from container."
USE_GPU_HOST=0
Expand Down
4 changes: 2 additions & 2 deletions docker/setup_host/host_env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
###############################################################################

APOLLO_ROOT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../.." && pwd )"

PYTHON_INSTALL_PATH="/opt/apollo/python_tools"
PYTHON_USER_BASE=$(python3 -c 'import site; print(site.USER_BASE)')
PYTHON_INSTALL_PATH="${PYTHON_USER_BASE}/apollo/tools"
PYTHON_VERSION=$(python3 -c 'import sys; print(".".join(map(str, sys.version_info[0:2])))')

source ${APOLLO_ROOT_DIR}/scripts/common.bashrc
Expand Down
17 changes: 17 additions & 0 deletions modules/audio/inference/direction_detection.cc
Original file line number Diff line number Diff line change
Expand Up @@ -136,13 +136,30 @@ double DirectionDetection::GccPhat(const torch::Tensor& sig,
n = n_sig + n_refsig;
torch::Tensor psig = at::constant_pad_nd(sig, {0, n_refsig}, 0);
torch::Tensor prefsig = at::constant_pad_nd(refsig, {0, n_sig}, 0);
#if TORCH_VERSION_MINOR <= 7
psig = at::rfft(psig, 1, false, true);
prefsig = at::rfft(prefsig, 1, false, true);
#else
auto psig_complex = at::fft_rfft(psig, c10::nullopt, -1, c10::nullopt);
psig = at::stack({torch::real(psig_complex), torch::imag(psig_complex)}, -1);

auto prefsig_complex = at::fft_rfft(prefsig, c10::nullopt, -1, c10::nullopt);
prefsig = at::stack(
{torch::real(prefsig_complex), torch::imag(prefsig_complex)}, -1);
#endif

ConjugateTensor(&prefsig);
torch::Tensor r = ComplexMultiply(psig, prefsig);
#if TORCH_VERSION_MINOR <= 7
torch::Tensor cc =
at::irfft(r / ComplexAbsolute(r), 1, false, true, {interp * n});
#else
auto irfft_input_transpose = at::transpose(r / ComplexAbsolute(r), 0, 1);
auto irfft_complex =
torch::complex(irfft_input_transpose[0], irfft_input_transpose[1]);
torch::Tensor cc =
torch::real(torch::fft::irfft(irfft_complex, n, -1, c10::nullopt));
#endif
int max_shift = static_cast<int>(interp * n / 2);
if (max_tau != 0)
max_shift = std::min(static_cast<int>(interp * fs * max_tau), max_shift);
Expand Down
11 changes: 8 additions & 3 deletions modules/bridge/common/bridge_header.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,14 +53,19 @@ bool BridgeHeader::Diserialize(const char *buf, size_t buf_size) {
i -= static_cast<int>(sizeof(HType) + sizeof(bsize) + size + 3);
continue;
}
size_t value_size = 0;

for (int j = 0; j < Header_Tail; j++) {
if (type == header_item[j]->GetType()) {
cursor = header_item[j]->DiserializeItem(cursor, &value_size);
size_t value_size = 0;
cursor = header_item[j]->DiserializeItem(cursor, static_cast<size_t>(i),
&value_size);
i -= static_cast<int>(value_size);
if (cursor == nullptr) {
return false;
}
break;
}
}
i -= static_cast<int>(value_size);
}
return true;
}
Expand Down
Loading