Skip to content

Commit ab35e3e

Browse files
committed
[Refactor] AlexNet
1 parent 33d54ea commit ab35e3e

File tree

7 files changed

+539
-388
lines changed

7 files changed

+539
-388
lines changed

alexnet/CMakeLists.txt

Lines changed: 38 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,45 @@
1-
cmake_minimum_required(VERSION 2.6)
1+
cmake_minimum_required(VERSION 3.14)
22

3-
project(alexnet)
3+
project(
4+
alexnet
5+
VERSION 0.1
6+
LANGUAGES C CXX CUDA)
47

5-
add_definitions(-std=c++11)
8+
if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
9+
set(CMAKE_CUDA_ARCHITECTURES
10+
60
11+
70
12+
72
13+
75
14+
80
15+
86
16+
89)
17+
endif()
618

7-
option(CUDA_USE_STATIC_CUDA_RUNTIME OFF)
8-
set(CMAKE_CXX_STANDARD 11)
9-
set(CMAKE_BUILD_TYPE Debug)
19+
set(CMAKE_CXX_STANDARD 17)
20+
set(CMAKE_CXX_STANDARD_REQUIRED ON)
21+
set(CMAKE_CUDA_STANDARD 17)
22+
set(CMAKE_CUDA_STANDARD_REQUIRED ON)
23+
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
24+
set(CMAKE_INCLUDE_CURRENT_DIR TRUE)
25+
set(CMAKE_BUILD_TYPE
26+
"Debug"
27+
CACHE STRING "Build type for this project" FORCE)
1028

11-
include_directories(${PROJECT_SOURCE_DIR}/include)
12-
# include and link dirs of cuda and tensorrt, you need adapt them if yours are different
13-
# cuda
14-
include_directories(/usr/local/cuda/include)
15-
link_directories(/usr/local/cuda/lib64)
16-
# tensorrt
17-
include_directories(/usr/include/x86_64-linux-gnu/)
18-
link_directories(/usr/lib/x86_64-linux-gnu/)
29+
option(CUDA_USE_STATIC_CUDA_RUNTIME "Use static cudaruntime library" OFF)
1930

20-
add_executable(alexnet ${PROJECT_SOURCE_DIR}/alex.cpp)
21-
target_link_libraries(alexnet nvinfer)
22-
target_link_libraries(alexnet cudart)
31+
find_package(Threads REQUIRED)
32+
find_package(CUDAToolkit REQUIRED)
2333

24-
add_definitions(-O2 -pthread)
34+
if(NOT TARGET TensorRT::TensorRT)
35+
include(FindTensorRT.cmake)
36+
else()
37+
message("TensorRT has been found, skipping for ${PROJECT_NAME}")
38+
endif()
2539

40+
add_executable(${PROJECT_NAME} alex.cpp)
41+
42+
target_include_directories(${PROJECT_NAME} PUBLIC ${CMAKE_SOURCE_DIR}/include)
43+
44+
target_link_libraries(${PROJECT_NAME} PUBLIC Threads::Threads m
45+
TensorRT::TensorRT CUDA::cudart)

alexnet/FindTensorRT.cmake

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
cmake_minimum_required(VERSION 3.17.0)
2+
3+
set(TRT_VERSION
4+
$ENV{TRT_VERSION}
5+
CACHE STRING
6+
"TensorRT version, e.g. \"8.6.1.6\" or \"8.6.1.6+cuda12.0.1.011\"")
7+
8+
# find TensorRT include folder
9+
if(NOT TensorRT_INCLUDE_DIR)
10+
if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
11+
set(TensorRT_INCLUDE_DIR
12+
"/usr/local/cuda/targets/aarch64-linux/include"
13+
CACHE PATH "TensorRT_INCLUDE_DIR")
14+
else()
15+
set(TensorRT_INCLUDE_DIR
16+
"/usr/include/x86_64-linux-gnu"
17+
CACHE PATH "TensorRT_INCLUDE_DIR")
18+
endif()
19+
message(STATUS "TensorRT: ${TensorRT_INCLUDE_DIR}")
20+
endif()
21+
22+
# find TensorRT library folder
23+
if(NOT TensorRT_LIBRARY_DIR)
24+
if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
25+
set(TensorRT_LIBRARY_DIR
26+
"/usr/lib/aarch64-linux-gnu/tegra"
27+
CACHE PATH "TensorRT_LIBRARY_DIR")
28+
else()
29+
set(TensorRT_LIBRARY_DIR
30+
"/usr/include/x86_64-linux-gnu"
31+
CACHE PATH "TensorRT_LIBRARY_DIR")
32+
endif()
33+
message(STATUS "TensorRT: ${TensorRT_LIBRARY_DIR}")
34+
endif()
35+
36+
set(TensorRT_LIBRARIES)
37+
38+
message(STATUS "Found TensorRT lib: ${TensorRT_LIBRARIES}")
39+
40+
# process for different TensorRT version
41+
if(DEFINED TRT_VERSION AND NOT TRT_VERSION STREQUAL "")
42+
string(REGEX MATCH "([0-9]+)" _match ${TRT_VERSION})
43+
set(TRT_MAJOR_VERSION "${_match}")
44+
set(_modules nvinfer nvinfer_plugin)
45+
46+
if(TRT_MAJOR_VERSION GREATER_EQUAL 8)
47+
list(APPEND _modules nvinfer_vc_plugin nvinfer_dispatch nvinfer_lean)
48+
endif()
49+
else()
50+
message(FATAL_ERROR "Please set a environment variable \"TRT_VERSION\"")
51+
endif()
52+
53+
# find and add all modules of TensorRT into list
54+
foreach(lib IN LISTS _modules)
55+
find_library(
56+
TensorRT_${lib}_LIBRARY
57+
NAMES ${lib}
58+
HINTS ${TensorRT_LIBRARY_DIR})
59+
list(APPEND TensorRT_LIBRARIES ${TensorRT_${lib}_LIBRARY})
60+
endforeach()
61+
62+
# make the "TensorRT target"
63+
add_library(TensorRT IMPORTED INTERFACE)
64+
add_library(TensorRT::TensorRT ALIAS TensorRT)
65+
target_link_libraries(TensorRT INTERFACE ${TensorRT_LIBRARIES})
66+
67+
set_target_properties(
68+
TensorRT
69+
PROPERTIES C_STANDARD 17
70+
CXX_STANDARD 17
71+
POSITION_INDEPENDENT_CODE ON
72+
SKIP_BUILD_RPATH TRUE
73+
BUILD_WITH_INSTALL_RPATH TRUE
74+
INSTALL_RPATH "$\{ORIGIN\}"
75+
INTERFACE_INCLUDE_DIRECTORIES "${TensorRT_INCLUDE_DIR}")
76+
77+
unset(TRT_MAJOR_VERSION)
78+
unset(_modules)

alexnet/README.md

Lines changed: 53 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,67 @@
1-
# alexnet
1+
# AlexNet
22

3-
AlexNet model architecture from the "One weird trick..." <https://arxiv.org/abs/1404.5997>`_ paper.
3+
## Introduction
44

5-
For the details, you can refer to [pytorchx/alexnet](https://github.com/wang-xinyu/pytorchx/tree/master/alexnet)
5+
AlexNet model architecture comes from this paper: [One weird trick for parallelizing convolutional neural networks](https://arxiv.org/abs/1404.5997). To generate `.wts` file, you can refer to [pytorchx/alexnet](https://github.com/wang-xinyu/pytorchx/tree/master/alexnet). To check the pytorch implementation of AlexNet, refer to [HERE](https://github.com/pytorch/vision/blob/main/torchvision/models/alexnet.py#L17)
66

7-
This alexnet is just several `conv-relu-pool` blocks followed by several `fc-relu`, nothing special. All layers can be implemented by tensorrt api, including `addConvolution`, `addActivation`, `addPooling`, `addFullyConnected`.
7+
AlexNet consists of 3 major parts: features, adaptive average pooling, and classifier:
8+
* features: just several stacked `CRP`(conv-relu-pool) and `CR` layers
9+
* adaptive average pooling: pytorch can decide its inner parameters, but we need to calculate it ourselves in TensorRT API
10+
* classifier: just several `fc-relu` layers. All layers can be implemented by tensorrt api, including `addConvolution`, `addActivation`, `addPooling`, `addMatrixMultiply`, `addElementWise` etc.
811

9-
```
10-
// 1. generate alexnet.wts from [pytorchx/alexnet](https://github.com/wang-xinyu/pytorchx/tree/master/alexnet)
11-
12-
// 2. put alexnet.wts into tensorrtx/alexnet
13-
14-
// 3. build and run
15-
16-
cd tensorrtx/alexnet
12+
## Use AlexNet from PyTorch
1713

18-
mkdir build
14+
We can use torchvision to load the pretrained alexnet model:
1915

20-
cd build
16+
```python
17+
alexnet = torchvision.models.alexnet(pretrained=True)
18+
```
2119

22-
cmake ..
20+
The model structure is:
21+
22+
```txt
23+
AlexNet(
24+
(features): Sequential(
25+
(0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
26+
(1): ReLU(inplace=True)
27+
(2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
28+
(3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
29+
(4): ReLU(inplace=True)
30+
(5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
31+
(6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
32+
(7): ReLU(inplace=True)
33+
(8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
34+
(9): ReLU(inplace=True)
35+
(10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
36+
(11): ReLU(inplace=True)
37+
(12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
38+
)
39+
(avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
40+
(classifier): Sequential(
41+
(0): Dropout(p=0.5, inplace=False)
42+
(1): Linear(in_features=9216, out_features=4096, bias=True)
43+
(2): ReLU(inplace=True)
44+
(3): Dropout(p=0.5, inplace=False)
45+
(4): Linear(in_features=4096, out_features=4096, bias=True)
46+
(5): ReLU(inplace=True)
47+
(6): Linear(in_features=4096, out_features=1000, bias=True)
48+
)
49+
)
50+
```
2351

24-
make
52+
## FAQ
2553

26-
sudo ./alexnet -s // serialize model to plan file i.e. 'alexnet.engine'
54+
### How to align the output with Pytorch?
2755

28-
sudo ./alexnet -d // deserialize plan file and run inference
56+
If your output is different from pytorch, you have to check which TensorRT API or your code cause this. A simple solution would be check the `.engine` output part by part, e.g., you can set the early layer of alexnet as output:
2957

30-
// 4. see if the output is same as pytorchx/alexnet
58+
```c++
59+
fc3_1->getOutput(0)->setName(OUTPUT_NAME);
60+
network->markOutput(*pool3->getOutput(0)); // original is: "*fc3_1->getOutput(0)"
3161
```
3262
63+
For this line of code, i use the output from "feature" part of alexnet, ignoring the rest of the model, then, don't forget to change the `OUTPUT_SIZE` macro on top of the file, lastly, build the `.engine` file to apply the changes.
64+
65+
You can sum up all output from C++ code, and compare it with Pytorch output, for Pytorch, you can do this by: `torch.sum(x)` at debug phase. The ideal value deviation between 2 values would be $[10^{-1}, 10^{-2}]$, for this example, since the output elements for "feature" is $256 * 6 * 6$ (bacth = 1), the final error would roughly be $10^{-4}$.
3366
67+
Note: This is a quick check, for more accurate check, you have to save the output tensor into a file to compare them value by value, but this situation is rare.

0 commit comments

Comments
 (0)