Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 1f40b92

Browse files
srkreddy1238echuraev
andauthoredJan 24, 2023
[TOOL][NATIVE] Android native application for deploy and run (#13791)
* [TOOL][NATIVE] Android native appliction for deploy and run This application helps as a reference for verifying and integration of TVM compiled models on Android targets natively independent of RPC setup. tvmc will be used to for compiling tuning and to run it before deployment. This PR also covers * Enabling clml for tvmc compilation tool. * Graph runtime api "get_output_info" to return output tensor specification similar to "get_input_into" * This tool adds and enabled 3rdparty dependency "cnpy" to deal with npz files. * Update apps/cpp_rtvm/README.md Co-authored-by: Egor Churaev <[email protected]> * Update apps/cpp_rtvm/README.md Co-authored-by: Egor Churaev <[email protected]> * * review comments. * * proof reading * Update apps/cpp_rtvm/README.md Co-authored-by: Egor Churaev <[email protected]> * * review Co-authored-by: Egor Churaev <[email protected]>
1 parent 1d89071 commit 1f40b92

File tree

19 files changed

+1248
-3
lines changed

19 files changed

+1248
-3
lines changed
 

‎.gitmodules

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,6 @@
1919
[submodule "3rdparty/OpenCL-Headers"]
2020
path = 3rdparty/OpenCL-Headers
2121
url = https://github.com/KhronosGroup/OpenCL-Headers.git
22+
[submodule "3rdparty/cnpy"]
23+
path = 3rdparty/cnpy
24+
url = https://github.com/rogersce/cnpy.git

‎3rdparty/cnpy

Submodule cnpy added at 4e8810b

‎CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -593,6 +593,10 @@ if(USE_CPP_RPC)
593593
add_subdirectory("apps/cpp_rpc")
594594
endif()
595595

596+
if(USE_CPP_RTVM)
597+
add_subdirectory("apps/cpp_rtvm")
598+
endif()
599+
596600
if(USE_IOS_RPC)
597601
add_subdirectory("apps/ios_rpc")
598602
endif()

‎LICENSE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,7 @@ MIT License
234234
3rdparty/libcrc
235235
3rdparty/cma
236236
3rdparty/compiler-rt/builtin_fp16.h
237+
3rdparty/cnpy
237238

238239
The Unlicense
239240
-------------

‎apps/cpp_rtvm/CMakeLists.txt

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
cmake_policy(SET CMP0069 NEW) # suppress cmake warning about IPO
2+
3+
set(RTVM_SOURCES
4+
main.cc
5+
tvm_runner.cc
6+
../../3rdparty/cnpy/cnpy.cpp
7+
)
8+
set(TVM_RUNNER_SOURCES
9+
tvm_runner.cc
10+
../../3rdparty/cnpy/cnpy.cpp
11+
)
12+
13+
set(RTVM_LINKER_LIBS "")
14+
15+
if(WIN32)
16+
list(APPEND RTVM_SOURCES win32_process.cc)
17+
list(APPEND TVM_RUNNER_SOURCES win32_process.cc)
18+
endif()
19+
20+
# Set output to same directory as the other TVM libs
21+
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
22+
add_executable(rtvm ${RTVM_SOURCES})
23+
add_library(tvm_runner_objs OBJECT ${TVM_RUNNER_SOURCES})
24+
add_library(tvm_runner SHARED $<TARGET_OBJECTS:tvm_runner_objs>)
25+
26+
include(CheckIPOSupported)
27+
check_ipo_supported(RESULT result OUTPUT output)
28+
if(result)
29+
set_property(TARGET rtvm PROPERTY INTERPROCEDURAL_OPTIMIZATION_RELEASE TRUE)
30+
endif()
31+
32+
if(WIN32)
33+
target_compile_definitions(rtvm PUBLIC -DNOMINMAX)
34+
endif()
35+
36+
if (OS)
37+
if (OS STREQUAL "Linux")
38+
set_property(TARGET rtvm PROPERTY LINK_FLAGS -lpthread)
39+
set_property(TARGET tvm_runner PROPERTY LINK_FLAGS -lpthread)
40+
endif()
41+
endif()
42+
43+
if(USE_OPENCL)
44+
if (ANDROID_ABI)
45+
if(DEFINED ENV{ANDROID_NDK_MAJOR})
46+
if($ENV{ANDROID_NDK_MAJOR} VERSION_LESS "23")
47+
set_property(TARGET rtvm PROPERTY LINK_FLAGS -fuse-ld=gold)
48+
set_property(TARGET tvm_runner PROPERTY LINK_FLAGS -fuse-ld=gold)
49+
endif()
50+
endif()
51+
endif()
52+
endif()
53+
54+
target_include_directories(
55+
rtvm
56+
PUBLIC "../../include"
57+
PUBLIC "../../3rdparty/cnpy"
58+
PUBLIC DLPACK_PATH
59+
PUBLIC DMLC_PATH
60+
)
61+
62+
if (BUILD_FOR_ANDROID AND USE_HEXAGON)
63+
get_hexagon_sdk_property("${USE_HEXAGON_SDK}" "${USE_HEXAGON_ARCH}"
64+
DSPRPC_LIB DSPRPC_LIB_DIRS
65+
)
66+
if(DSPRPC_LIB_DIRS)
67+
link_directories(${DSPRPC_LIB_DIRS})
68+
else()
69+
message(WARNING "Could not locate some Hexagon SDK components")
70+
endif()
71+
list(APPEND RTVM_LINKER_LIBS cdsprpc log)
72+
endif()
73+
74+
if(USE_ETHOSN)
75+
if (ETHOSN_RUNTIME_LIBRARY)
76+
list(APPEND RTVM_LINKER_LIBS ${ETHOSN_RUNTIME_LIBRARY})
77+
else()
78+
message(WARNING "Could not locate Arm(R) Ethos(TM)-N runtime library components")
79+
endif()
80+
endif()
81+
82+
if(BUILD_STATIC_RUNTIME)
83+
list(APPEND RTVM_LINKER_LIBS -Wl,--whole-archive tvm_runtime -Wl,--no-whole-archive z)
84+
else()
85+
list(APPEND RTVM_LINKER_LIBS tvm_runtime z)
86+
endif()
87+
88+
target_link_libraries(rtvm ${RTVM_LINKER_LIBS})
89+
90+
# Build tvm_runner as a exportable lib
91+
target_include_directories(
92+
tvm_runner_objs
93+
PUBLIC "../../include"
94+
PUBLIC "../../3rdparty/cnpy"
95+
PUBLIC DLPACK_PATH
96+
PUBLIC DMLC_PATH
97+
)
98+
target_link_libraries(tvm_runner ${RTVM_LINKER_LIBS})

‎apps/cpp_rtvm/README.md

Lines changed: 354 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,354 @@
1+
<!--- Licensed to the Apache Software Foundation (ASF) under one -->
2+
<!--- or more contributor license agreements. See the NOTICE file -->
3+
<!--- distributed with this work for additional information -->
4+
<!--- regarding copyright ownership. The ASF licenses this file -->
5+
<!--- to you under the Apache License, Version 2.0 (the -->
6+
<!--- "License"); you may not use this file except in compliance -->
7+
<!--- with the License. You may obtain a copy of the License at -->
8+
9+
<!--- http://www.apache.org/licenses/LICENSE-2.0 -->
10+
11+
<!--- Unless required by applicable law or agreed to in writing, -->
12+
<!--- software distributed under the License is distributed on an -->
13+
<!--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -->
14+
<!--- KIND, either express or implied. See the License for the -->
15+
<!--- specific language governing permissions and limitations -->
16+
<!--- under the License. -->
17+
18+
19+
# Native Inference application for CPP Native
20+
21+
Native inference tool ```rtvm``` helps in deploying TVM compiled models from a standalone cpp environment.
22+
Overall process starts from getting a model from a framework all the way up to running on target device using `rtvm` tool.
23+
24+
### Models
25+
26+
Models can be downloaded from well known frameworks like Tensorflow, PyTorch, TFLite, Onnx ..etc.
27+
scripts/download_models.py has a reference to prepare sample network ```resnet50``` from keras framework.
28+
29+
```bash
30+
python3 scripts/download_models.py
31+
```
32+
33+
### Auto Tuning
34+
Auto tuning process tunes various operatrors the given model for respective target. Auto tuning for remote devices use ```tvm_rpc``` and we need to setup the rpc environment before we invoke tuning.
35+
Please refer below section [RPC setup](#rpc-setup) for the same.
36+
37+
Auto tunng is necessary to obtain best performaning kernels. We can skip this step if we have tuning log already or the tuning cache is available from tophub (implicite by TVM compilation process).
38+
Below message indicate that there exists some kernels not optimized for the selected target. In this case we can proceed with tuning to best performance.
39+
```One or more operators have not been tuned. Please tune your model for better performance. Use DEBUG logging level to see more details.```
40+
41+
with below environment from [RPC setup](#rpc-setup)
42+
``` bash
43+
tvm tracker running on ```TVM_TRACKER_HOST```
44+
tracker port being ```TVM_TRACKER_PORT```
45+
rpc device access key being ```TVM_RPC_KEY```
46+
the model to be tuned being ```./model_data/keras-resnet50/resnet50.h5```
47+
```
48+
49+
the below command we can generate the tuning cache to file ```./model_data/keras-resnet50/keras-resnet50.log```
50+
51+
```bash
52+
python3 -m tvm.driver.tvmc tune --target="opencl" --target-host="llvm -mtriple=aarch64-linux-gnu" \
53+
./model_data/keras-resnet50/resnet50.h5 -o ./model_data/keras-resnet50/keras-resnet50.log \
54+
--early-stopping 0 --repeat 30 --rpc-key ${TVM_RPC_KEY} --rpc-tracker ${TVM_TRACKER_HOST}:${TVM_TRACKER_PORT} --trials 1024 \
55+
--tuning-records ./model_data/keras-resnet50/keras-resnet50-records.log --tuner xgb
56+
```
57+
58+
where
59+
```bash
60+
--target="opencl" refers to opencl device on Android device
61+
--target-host="llvm -mtriple=aarch64-linux-gnu" refers to target_host being an ARM64 CPU
62+
Options --early-stopping, --repeat, --trials, --tuner are Auto TVM specific options.
63+
```
64+
Please refer to AutoTVM documentation for more details [here](https://tvm.apache.org/docs/how_to/tune_with_autotvm/index.html?highlight=autotvm).
65+
66+
### Compile the model
67+
68+
Compilation step generates TVM compiler output artifacts which need to be taken to target device for deployment.
69+
These artifacts is a compressed archive with kernel shared lib, json with graph description and params binary.
70+
71+
Below command will generate the same
72+
73+
74+
```bash
75+
python3 -m tvm.driver.tvmc compile --cross-compiler ${ANDROID_NDK_HOME}/toolchains/llvm/prebuilt/linux-x86_64/bin/aarch64-linux-android28-clang \
76+
--target="opencl, llvm" --target-llvm-mtriple aarch64-linux-gnu -o keras-resnet50.tar ./model_data/keras-resnet50/resnet50.h5
77+
```
78+
79+
where
80+
```
81+
--cross-compiler : Indicates the cross compiler path for kernel library generation
82+
--target="opencl, llvm" indicates target and host devices
83+
```
84+
85+
### Test Run via RPC
86+
87+
At this stage we can verify the generated compiler output for execution correctness over the RPC setup interface.
88+
Below command can run the compiled output on remote target device.
89+
90+
with
91+
92+
``` bash
93+
tvm tracker running on ```TVM_TRACKER_HOST```
94+
tracker port being ```TVM_TRACKER_PORT```
95+
rpc device access key being ```TVM_RPC_KEY```
96+
compilation out being keras-resnet50.tar
97+
```
98+
99+
```bash
100+
python3 -m tvm.driver.tvmc run --device="cl" keras-resnet50.tar --rpc-key ${TVM_RPC_KEY} --rpc-tracker ${TVM_TRACKER_HOST}:${TVM_TRACKER_PORT} --print-time
101+
```
102+
103+
This inputs random inputs and validates the execution correctness of the compiled model.
104+
105+
```tvmc``` tool has various options to input custom data, profile the model and benchmark the execution.
106+
107+
108+
### Deployment Run
109+
110+
Now we will verify the deployment run of the compiled model using ```rtvm``` tool on target device without any RPC or host based execution.
111+
112+
We need to extract the tar achive on target device. We can copy the extracted contents of ```keras-resnet50.tar``` under Android temp folder at ```/data/local/tmp/keras-resnet50/```
113+
114+
Also copy the cross compiled tool ```rtvm``` and ```libtvm_runtime.so``` to ```data/local/tmp/```
115+
116+
```rtvm``` usage can be quired as below
117+
```bash
118+
Android:/data/local/tmp $ LD_LIBRARY_PATH=./ ./rtvm
119+
Command line usage
120+
--model - The folder containing tvm artifacts(mod.so, mod.param, mod.json)
121+
--device - The target device to use {llvm, opencl, cpu, cuda, metal, rocm, vpi, oneapi}
122+
--input - Numpy file for the model input (optional and we use random of not given)
123+
--output - Numpy file name to dump the model output as numpy
124+
--dump-meta - Dump model meta information
125+
126+
Example
127+
./rtvm --model=keras-resnet50 --device="opencl" --dump-meta
128+
./rtvm --model=keras-resnet50 --device="opencl" --input input.npz --output=output.npz
129+
```
130+
131+
```rtvm``` can run the model using no inputs (just a dry run without any valid inputs) and also with specific input supplied as a numpy npz format file.
132+
133+
We can create npz dump for all inputs by saving the dict object as shown below.
134+
135+
With ```keras-resnet50``` having one input ```input_1``` with shape ```[1, 224, 224, 3]``` and dtype ```float32```
136+
137+
```
138+
# Random initilization
139+
input1 = np.random.uniform(low=-1, high=1, size=(1, 224, 224, 3)).astype("float32")
140+
dataset = {"input_1": input1}
141+
np.savez("input.npz", **dataset)
142+
```
143+
144+
Copy ```input.npz``` also to the target device as ```/data/local/tmp/input.npz```
145+
146+
147+
Now, on Android shell we can do a dry run as well as with specific input as shown below.
148+
```bash
149+
# Query meta data information
150+
Android:/data/local/tmp/ $ LD_LIBRARY_PATH=./ ./rtvm --model=keras-resnet50 --device=opencl --dump-meta
151+
. . . . . .
152+
Meta Information:keras-resnet50
153+
Number of Inputs:183
154+
Number of Outputs:1
155+
Input MetaInfo:
156+
Input:input_1
157+
DType:float32
158+
Shape:[1, 224, 224, 3]
159+
Output MetaInfo:
160+
Output:tvmgen_default_fused_nn_softmax
161+
DType:float32
162+
Shape:[1, 1000]
163+
. . . . . .
164+
165+
# Dry run with out any inputs
166+
Android:/data/local/tmp/ $ LD_LIBRARY_PATH=./ ./rtvm --model=keras-resnet50 --device=opencl
167+
Model = keras-resnet50
168+
Device = opencl
169+
Input =
170+
Output =
171+
Dump Metadata = False
172+
TVMRunner Constructor:keras-resnet50 Devices:opencl
173+
TVMRunner Load:keras-resnet50
174+
TVMRunner::GetMetaInfo
175+
Executing dry run ...
176+
Set Random Input for :input_1
177+
TVMRunner::GetInputMemSize:input_1
178+
Random Input Size:602112 bytes
179+
TVMRunner::SetInput (Raw)
180+
TVMRunner::Run
181+
Get Output for :tvmgen_default_fused_nn_softmax
182+
TVMRunner::GetOutputMemSize:tvmgen_default_fused_nn_softmax
183+
TVMRunner::GetOutput (Raw)
184+
Output Size:4000 bytes
185+
186+
187+
# Run with input and dump output as npz file
188+
Android:/data/local/tmp/ $ LD_LIBRARY_PATH=./ ./rtvm --model=keras-resnet50 --device=opencl --input=input.npz --output=output.npz
189+
Model = keras-resnet50
190+
Device = opencl
191+
Input = input.npz
192+
Output = output.npz
193+
Dump Metadata = False
194+
TVMRunner Constructor:keras-resnet50 Devices:opencl
195+
TVMRunner Load:keras-resnet50
196+
TVMRunner::GetMetaInfo
197+
Executing with Input:input.npz Output:output.npz
198+
TVMRunner::SetInput (Numpy):input.npz
199+
Set Numpy Input for :input_1
200+
TVMRunner::Run
201+
TVMRunner::GetOutput (Numpy):output.npz
202+
Get Output for :tvmgen_default_fused_nn_softmax
203+
Output Size:4000 bytes
204+
```
205+
206+
output.npz contains the modle outputs. Below is a quick look of its contents.
207+
```bash
208+
tvm-host:~$ unzip -l output.npz
209+
Archive: output.npz
210+
Length Date Time Name
211+
--------- ---------- ----- ----
212+
4080 1980-00-00 00:00 tvmgen_default_fused_nn_softmax.npy
213+
--------- -------
214+
4080 1 file
215+
216+
```
217+
218+
Building ```cpp_rtvm``` produces ```libtvm_runner.so```, a simplified interface that rtvm use internally for loading and executing tvm compiled models from C/C++ environments.
219+
```tvm_runner.h``` describes the interface definition here. Alternatively pro users can use TVM's [c_native_api](https://github.com/apache/tvm/blob/main/include/tvm/runtime/c_runtime_api.h) interface for more access to TVM features.
220+
221+
222+
# RPC Setup
223+
224+
For Android devices we require cross compilation of tvm_rpc (also libtvm_runtime.so which is a dependency) for remote device.
225+
RPC setup involves running tracker on host device and running tvm_rpc on target device.
226+
227+
### Tracker
228+
229+
Below command runs the tracker on host over port ```9100```
230+
231+
```bash
232+
python3 -m tvm.exec.rpc_tracker --host 127.0.0.1 --port 9100"
233+
```
234+
### RPC on Target
235+
236+
With ```abcd1234ef``` being adb device id and tvm_rpc (and libtvm_runtime.so) is pushed to target device at ```/data/local/tmp/tvm_rpc/```
237+
238+
```bash
239+
export ANDROID_SERIAL=abcd1234ef
240+
# Below settings will reroute networking tcm connections on devices to host device via adb interface
241+
adb reverse tcp:9100 tcp:9100
242+
adb forward tcp:5000 tcp:5000
243+
# Run the tvm_rpc on device
244+
env adb shell "cd /data/local/tmp/tvm_rpc; killall -9 tvm_rpc; \
245+
LD_LIBRARY_PATH=/data/local/tmp/tvm_rpc/ ./tvm_rpc server --host=0.0.0.0 --port=5000 --port-end=5010 --tracker=127.0.0.1:9100 --key=android
246+
```
247+
248+
Now we have the rpc setup with ```TVM_TRACKER_HOST=127.0.0.1```, ```TVM_TRACKER_PORT=9100``` and ```TVM_RPC_KEY=android```.
249+
250+
We can also check connected and available devices on tracker as shown below.
251+
252+
```bash
253+
python3 -m tvm.exec.query_rpc_tracker --port ${TVM_TRACKER_PORT}
254+
Tracker address 127.0.0.1:9100
255+
256+
Server List
257+
------------------------------
258+
server-address key
259+
------------------------------
260+
127.0.0.1:5000 server:android
261+
------------------------------
262+
263+
Queue Status
264+
-------------------------------
265+
key total free pending
266+
-------------------------------
267+
android 1 1 0
268+
-------------------------------
269+
```
270+
271+
272+
# Target Specific Configuration
273+
274+
Below sections describe device/target specific settings to be used with ```tvmc``` tool.
275+
276+
### Adreno GPU
277+
278+
Adreno GPU has a docker definition that helps to ease the development environment.
279+
280+
We can build the docker image by using below command from TVM repo.
281+
282+
```bash
283+
./docker/build.sh ci_adreno
284+
docker tag tvm.ci_adreno ci_adreno
285+
```
286+
287+
Below command builds host and target rpc components for Adreno and drops into an interactive shell.
288+
289+
```bash
290+
./tests/scripts/ci.py adreno -i
291+
```
292+
293+
Also, one can build with Adreno OpenCLML SDK support
294+
295+
```bash
296+
export ADRENO_OPENCL=<Path to OpenCLML SDK>
297+
./tests/scripts/ci.py adreno -i
298+
```
299+
300+
Above command produces
301+
```build-adreno``` which is host build
302+
```build-adreno-target``` which contains cross compiled tvm_rpc and libtvm_runtime.so
303+
304+
305+
Below options to be used for Adreno GPU while working with tvmc
306+
307+
* Tuning
308+
309+
```
310+
--target="opencl -device=adreno"
311+
--target-host="llvm -mtriple=aarch64-linux-gnu"
312+
```
313+
314+
* Compilation
315+
316+
```
317+
--cross-compiler ${ANDROID_NDK_HOME}/toolchains/llvm/prebuilt/linux-x86_64/bin/aarch64-linux-android28-clang
318+
--target="opencl, llvm"
319+
--target-opencl-device adreno
320+
--target-llvm-mtriple aarch64-linux-gnu
321+
```
322+
323+
While enabling CLML just need to specify below target option for compilation.
324+
```--target="opencl, clml, llvm"```
325+
326+
327+
* Running
328+
329+
```--device="cl"```
330+
331+
332+
For example with a model from keras ```./model_data/keras-resnet50/resnet50.h5```
333+
334+
335+
```bash
336+
# Tuning
337+
python3 -m tvm.driver.tvmc tune --desired-layout NCHW --target="opencl -device=adreno" --target-host="llvm -mtriple=aarch64-linux-gnu" \
338+
./model_data/keras-resnet50/resnet50.h5 -o ./model_data/keras-resnet50/keras-resnet50.log --early-stopping 0 --repeat 30 \
339+
--rpc-key ${TVM_RPC_KEY} --rpc-tracker {TVM_TRACKER_HOST}:{TVM_TRACKER_PORT} --trials 1024 --tuning-records ./model_data/keras-resnet50/keras-resnet50-records.log --tuner xgb
340+
341+
# Tuning produces tuning log ./model_data/keras-resnet50/keras-resnet50.log
342+
343+
344+
# Compilation
345+
python3 -m tvm.driver.tvmc compile --cross-compiler ${ANDROID_NDK_HOME}/toolchains/llvm/prebuilt/linux-x86_64/bin/aarch64-linux-android28-clang \
346+
--desired-layout NCHW --target="opencl, llvm" --target-opencl-device adreno --target-llvm-mtriple aarch64-linux-gnu \
347+
./model_data/keras-resnet50/resnet50.h5 -o keras-resnet50.tar
348+
349+
# Compilation produces target artifacts keras-resnet50.tar
350+
351+
# Run on adreno device via RPC
352+
python3 -m tvm.driver.tvmc run --device="cl" keras-resnet50.tar --rpc-key ${TVM_RPC_KEY} --rpc-tracker {TVM_TRACKER_HOST}:{TVM_TRACKER_PORT} --print-time
353+
354+
```

‎apps/cpp_rtvm/main.cc

Lines changed: 264 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,264 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
/*!
21+
* \file main.cc
22+
* \brief TVM runtime utility for TVM.
23+
*/
24+
#include <csignal>
25+
#include <cstdio>
26+
#include <cstdlib>
27+
#if defined(__linux__) || defined(__ANDROID__)
28+
#include <unistd.h>
29+
#endif
30+
#include <dmlc/logging.h>
31+
32+
#include <cstring>
33+
#include <iostream>
34+
#include <sstream>
35+
#include <vector>
36+
37+
#include "../../src/support/socket.h"
38+
#include "../../src/support/utils.h"
39+
#include "tvm_runner.h"
40+
41+
#if defined(_WIN32)
42+
#include "win32_process.h"
43+
#endif
44+
45+
using namespace std;
46+
using namespace tvm::runtime;
47+
using namespace tvm::support;
48+
49+
static const string kUsage =
50+
"Command line usage\n"
51+
"--model - The folder containing tvm artifacts(mod.so, mod.param, mod.json) \n"
52+
"--device - The target device to use {llvm, opencl, cpu, cuda, metal, rocm, vpi, "
53+
"oneapi}\n"
54+
"--input - Numpy file for the model input (optional and we use random of not given)\n"
55+
"--output - Numpy file name to dump the model output as numpy\n"
56+
"--dump-meta - Dump model meta information\n"
57+
"\n"
58+
" Example\n"
59+
" ./rtvm --model=keras-resnet50 --device=\"opencl\" --dump-meta\n"
60+
" ./rtvm --model=keras-resnet50 --device=\"opencl\" --input input.npz --output=output.npz\n"
61+
"\n";
62+
63+
/*!
64+
* \brief Tool Arguments.
65+
* \arg model The tvm artifact to load & run
66+
* \arg device The target device to use {llvm, cl, ...etc.}
67+
* \arg input Numpy file for the model input
68+
* \arg output Numpy file name to dump the model output as numpy
69+
*/
70+
struct ToolArgs {
71+
string model;
72+
string device;
73+
string input;
74+
string output;
75+
bool dump_meta = false;
76+
};
77+
78+
/*!
79+
* \brief PrintArgs print the contents of ToolArgs
80+
* \param args ToolArgs structure
81+
*/
82+
void PrintArgs(const ToolArgs& args) {
83+
LOG(INFO) << "Model = " << args.model;
84+
LOG(INFO) << "Device = " << args.device;
85+
LOG(INFO) << "Input = " << args.input;
86+
LOG(INFO) << "Output = " << args.output;
87+
LOG(INFO) << "Dump Metadata = " << ((args.dump_meta) ? ("True") : ("False"));
88+
}
89+
90+
#if defined(__linux__) || defined(__ANDROID__)
91+
/*!
92+
* \brief CtrlCHandler, exits if Ctrl+C is pressed
93+
* \param s signal
94+
*/
95+
void CtrlCHandler(int s) {
96+
LOG(INFO) << "\nUser pressed Ctrl+C, Exiting";
97+
exit(1);
98+
}
99+
100+
/*!
101+
* \brief HandleCtrlC Register for handling Ctrl+C event.
102+
*/
103+
void HandleCtrlC() {
104+
// Ctrl+C handler
105+
struct sigaction sigIntHandler;
106+
sigIntHandler.sa_handler = CtrlCHandler;
107+
sigemptyset(&sigIntHandler.sa_mask);
108+
sigIntHandler.sa_flags = 0;
109+
sigaction(SIGINT, &sigIntHandler, nullptr);
110+
}
111+
#endif
112+
/*!
113+
* \brief GetCmdOption Parse and find the command option.
114+
* \param argc arg counter
115+
* \param argv arg values
116+
* \param option command line option to search for.
117+
* \param key whether the option itself is key
118+
* \return value corresponding to option.
119+
*/
120+
string GetCmdOption(int argc, char* argv[], string option, bool key = false) {
121+
string cmd;
122+
for (int i = 1; i < argc; ++i) {
123+
string arg = argv[i];
124+
if (arg.find(option) == 0) {
125+
if (key) {
126+
cmd = argv[i];
127+
return cmd;
128+
}
129+
// We assume "=" is the end of option.
130+
ICHECK_EQ(*option.rbegin(), '=');
131+
cmd = arg.substr(arg.find('=') + 1);
132+
return cmd;
133+
}
134+
}
135+
return cmd;
136+
}
137+
138+
/*!
139+
* \brief ParseCmdArgs parses the command line arguments.
140+
* \param argc arg counter
141+
* \param argv arg values
142+
* \param args the output structure which holds the parsed values
143+
*/
144+
void ParseCmdArgs(int argc, char* argv[], struct ToolArgs& args) {
145+
const string model = GetCmdOption(argc, argv, "--model=");
146+
if (!model.empty()) {
147+
args.model = model;
148+
} else {
149+
LOG(INFO) << kUsage;
150+
exit(0);
151+
}
152+
153+
const string device = GetCmdOption(argc, argv, "--device=");
154+
if (!device.empty()) {
155+
args.device = device;
156+
} else {
157+
LOG(INFO) << kUsage;
158+
exit(0);
159+
}
160+
161+
const string input = GetCmdOption(argc, argv, "--input=");
162+
if (!input.empty()) {
163+
args.input = input;
164+
}
165+
166+
const string output = GetCmdOption(argc, argv, "--output=");
167+
if (!output.empty()) {
168+
args.output = output;
169+
}
170+
171+
const string pmeta = GetCmdOption(argc, argv, "--dump-meta", true);
172+
if (!pmeta.empty()) {
173+
args.dump_meta = true;
174+
}
175+
}
176+
177+
/*!
178+
* \brief Loads and Executes the model on given Target.
179+
* \param args tool arguments
180+
* \return result of operation.
181+
*/
182+
int ExecuteModel(ToolArgs& args) {
183+
#if defined(__linux__) || defined(__ANDROID__)
184+
// Ctrl+C handler
185+
HandleCtrlC();
186+
#endif
187+
188+
// Initialize TVM Runner
189+
TVMRunner runner = TVMRunner(args.model, args.device);
190+
191+
// Load the model
192+
runner.Load();
193+
194+
// Query Model meta Information
195+
TVMMetaInfo mInfo = runner.GetMetaInfo();
196+
197+
// Print Meta Information
198+
if (args.dump_meta) runner.PrintMetaInfo();
199+
200+
if (args.input.empty() || args.output.empty()) {
201+
LOG(INFO) << "Executing dry run ... ";
202+
// Set random input for all inputs
203+
for (auto& elem : mInfo.input_info) {
204+
LOG(INFO) << "Set Random Input for :" << elem.first;
205+
auto shape = elem.second.first;
206+
size_t ssize = runner.GetInputMemSize(elem.first);
207+
char* data = (char*)malloc(ssize);
208+
LOG(INFO) << "Random Input Size:" << ssize << " bytes";
209+
runner.SetInput(elem.first, data);
210+
free(data);
211+
}
212+
213+
// Run the model
214+
runner.Run();
215+
216+
// Get Output and dump few values
217+
for (auto& elem : mInfo.output_info) {
218+
LOG(INFO) << "Get Output for :" << elem.first;
219+
auto shape = elem.second.first;
220+
size_t ssize = runner.GetOutputMemSize(elem.first);
221+
char* data = (char*)malloc(ssize);
222+
runner.GetOutput(elem.first, data);
223+
LOG(INFO) << "Output Size:" << ssize << " bytes";
224+
free(data);
225+
}
226+
} else {
227+
LOG(INFO) << "Executing with Input:" << args.input << " Output:" << args.output;
228+
229+
// Set Input from Numpy Input
230+
runner.SetInput(args.input);
231+
232+
// Run the model
233+
runner.Run();
234+
235+
// Get Output as Numpy dump
236+
runner.GetOutput(args.output);
237+
}
238+
239+
return 0;
240+
}
241+
242+
/*!
243+
* \brief main The main function.
244+
* \param argc arg counter
245+
* \param argv arg values
246+
* \return result of operation.
247+
*/
248+
int main(int argc, char* argv[]) {
249+
if (argc <= 1) {
250+
LOG(INFO) << kUsage;
251+
return 0;
252+
}
253+
254+
ToolArgs args;
255+
ParseCmdArgs(argc, argv, args);
256+
PrintArgs(args);
257+
258+
if (ExecuteModel(args)) {
259+
PrintArgs(args);
260+
LOG(INFO) << kUsage;
261+
return -1;
262+
}
263+
return 0;
264+
}
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
#!/usr/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
4+
# Licensed to the Apache Software Foundation (ASF) under one
5+
# or more contributor license agreements. See the NOTICE file
6+
# distributed with this work for additional information
7+
# regarding copyright ownership. The ASF licenses this file
8+
# to you under the Apache License, Version 2.0 (the
9+
# "License"); you may not use this file except in compliance
10+
# with the License. You may obtain a copy of the License at
11+
#
12+
# http://www.apache.org/licenses/LICENSE-2.0
13+
#
14+
# Unless required by applicable law or agreed to in writing,
15+
# software distributed under the License is distributed on an
16+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17+
# KIND, either express or implied. See the License for the
18+
# specific language governing permissions and limitations
19+
# under the License.
20+
21+
tmp_dir = "./model_data/"
22+
dload_models = []
23+
24+
# Keras : Resnet50
25+
try:
26+
from tensorflow.keras.applications.resnet50 import ResNet50
27+
28+
model_file_name = "{}/{}".format(tmp_dir + "keras-resnet50", "resnet50.h5")
29+
model = ResNet50(include_top=True, weights="imagenet", input_shape=(224, 224, 3), classes=1000)
30+
model.save(model_file_name)
31+
dload_models.append(model_file_name)
32+
except ImportError:
33+
LOG.warning("Keras is not installed, skipping Keras models")
34+
35+
36+
print("Models:", dload_models)

‎apps/cpp_rtvm/tvm_runner.cc

Lines changed: 320 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,320 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
/*!
21+
* \file tvm_runner.cc
22+
* \brief TVM model runner implementation.
23+
*/
24+
25+
#include "tvm_runner.h"
26+
27+
#include <cnpy.h>
28+
29+
#include <fstream>
30+
#include <streambuf>
31+
#include <string>
32+
33+
namespace tvm {
34+
namespace runtime {
35+
36+
/*!
37+
* \brief Get the TVM device id corresponding to device string.
38+
* \param device the target device in string format.
39+
* \return dl_device corresponding to the device string.
40+
*/
41+
int GetTVMDevice(std::string device) {
42+
if (!device.compare("cpu")) {
43+
return static_cast<int>(kDLCPU);
44+
} else if (!device.compare("llvm")) {
45+
return static_cast<int>(kDLCPU);
46+
} else if (!device.compare("cuda")) {
47+
return static_cast<int>(kDLCUDA);
48+
} else if (!device.compare("opencl")) {
49+
return static_cast<int>(kDLOpenCL);
50+
} else if (!device.compare("vulkan")) {
51+
return static_cast<int>(kDLVulkan);
52+
} else if (!device.compare("metal")) {
53+
return static_cast<int>(kDLMetal);
54+
} else if (!device.compare("vpi")) {
55+
return static_cast<int>(kDLVPI);
56+
} else if (!device.compare("rocm")) {
57+
return static_cast<int>(kDLROCM);
58+
} else if (!device.compare("oneapi")) {
59+
return static_cast<int>(kDLOneAPI);
60+
} else {
61+
LOG(FATAL) << "TVMRunner : Unsupported device :" << device;
62+
}
63+
}
64+
65+
/*!
66+
* \brief Constructor for TVMRunner.
67+
* \param path where the tfm compiler artifacts present.
68+
* \param device the target device where we need to load the compiled model.
69+
*/
70+
TVMRunner::TVMRunner(std::string path, std::string device) : r_model_path(path), r_device(device) {
71+
LOG(INFO) << "TVMRunner Constructor:" << r_model_path << " Devices:" << r_device;
72+
}
73+
74+
/*!
75+
* \brief Load Setup TVM graph runtime for given model.
76+
* \param 0 on success else error code.
77+
*/
78+
int TVMRunner::Load(void) {
79+
LOG(INFO) << "TVMRunner Load:" << r_model_path;
80+
// Load the lib file
81+
r_mod_handle = Module::LoadFromFile((r_model_path + "/mod.so").c_str(), "so");
82+
83+
// Read model json file
84+
std::ifstream json_reader((r_model_path + "/mod.json").c_str());
85+
CHECK(!json_reader.fail()) << "Failed to open json file:" << (r_model_path + "/mod.json").c_str();
86+
std::string json_str((std::istreambuf_iterator<char>(json_reader)),
87+
std::istreambuf_iterator<char>());
88+
json_reader.close();
89+
90+
// Get ref to graph exeutor
91+
auto f_handle = tvm::runtime::Registry::Get("tvm.graph_executor.create");
92+
93+
// Greate graph runtime
94+
r_graph_handle = (*f_handle)(json_str, r_mod_handle, GetTVMDevice(r_device), 0);
95+
96+
// Read params binary file
97+
std::ifstream params_reader((r_model_path + "/mod.params").c_str(), std::ios::binary);
98+
CHECK(!params_reader.fail()) << "Failed to open json file:"
99+
<< (r_model_path + "/mod.params").c_str();
100+
const std::string params_str((std::istreambuf_iterator<char>(params_reader)),
101+
std::istreambuf_iterator<char>());
102+
params_reader.close();
103+
TVMByteArray params_arr;
104+
params_arr.data = params_str.c_str();
105+
params_arr.size = params_str.length();
106+
107+
// Load parameters
108+
r_graph_handle.GetFunction("load_params")(params_arr);
109+
110+
return 0;
111+
}
112+
113+
/*!
114+
* \brief Calculated the memory size for the NDArray.
115+
* \param NDArray object.
116+
* \return size of the memory.
117+
*/
118+
inline size_t GetMemSize(NDArray& narr) {
119+
size_t size = 1;
120+
for (tvm_index_t i = 0; i < narr->ndim; ++i) {
121+
size *= static_cast<size_t>(narr->shape[i]);
122+
}
123+
size *= (narr->dtype.bits * narr->dtype.lanes + 7) / 8;
124+
return size;
125+
}
126+
127+
/*!
128+
* \brief Get the input alloc mem size.
129+
* \param input_id The input id to query the mem size.
130+
* \return The memory size.
131+
*/
132+
size_t TVMRunner::GetInputMemSize(std::string input_id) {
133+
LOG(INFO) << "TVMRunner::GetInputMemSize:" << input_id;
134+
135+
NDArray in_arr = r_graph_handle.GetFunction("get_input")(input_id);
136+
auto ssize = GetMemSize(in_arr);
137+
138+
return ssize;
139+
}
140+
141+
/*!
142+
* \brief Get the output alloc mem size.
143+
* \param output_id The output id to query the mem size.
144+
* \return The memory size.
145+
*/
146+
size_t TVMRunner::GetOutputMemSize(std::string output_id) {
147+
LOG(INFO) << "TVMRunner::GetOutputMemSize:" << output_id;
148+
149+
NDArray out_arr = r_graph_handle.GetFunction("get_output")(output_id);
150+
auto ssize = GetMemSize(out_arr);
151+
152+
return ssize;
153+
}
154+
155+
/*!
156+
* \brief Set the model inputs from npz file.
157+
* \param inputfile the npz file from where we read input tensor data.
158+
* \param 0 on success else error code.
159+
*/
160+
int TVMRunner::SetInput(std::string inputfile) {
161+
LOG(INFO) << "TVMRunner::SetInput (Numpy):" << inputfile;
162+
cnpy::npz_t npz_input = cnpy::npz_load(inputfile);
163+
164+
for (auto& elem : mInfo.input_info) {
165+
LOG(INFO) << "Set Numpy Input for :" << elem.first;
166+
NDArray in_arr = r_graph_handle.GetFunction("get_input")(elem.first);
167+
auto ssize = GetMemSize(in_arr);
168+
169+
if (npz_input.find(elem.first) != npz_input.end()) {
170+
in_arr.CopyFromBytes(npz_input[elem.first].data<char>(), ssize);
171+
} else {
172+
LOG(WARNING) << "Couldn't find input " << elem.first << " in npy input file";
173+
}
174+
}
175+
176+
return 0;
177+
}
178+
179+
/*!
180+
* \brief Set the model input from the given binary buffer.
181+
* \param input_id input node name.
182+
* \param raw_input binary input buffer to copy over input NDArray.
183+
* \param 0 on success else error code.
184+
*/
185+
int TVMRunner::SetInput(std::string input_id, char* raw_input) {
186+
LOG(INFO) << "TVMRunner::SetInput (Raw)";
187+
NDArray in_arr = r_graph_handle.GetFunction("get_input")(input_id);
188+
auto ssize = GetMemSize(in_arr);
189+
in_arr.CopyFromBytes(raw_input, ssize);
190+
return 0;
191+
}
192+
193+
/*!
194+
* \brief Get the model outputs and dump them to npz file.
195+
* \param outputfile the npz file to where we dump the output data.
196+
* \param 0 on success else error code.
197+
*/
198+
int TVMRunner::GetOutput(std::string outputfile) {
199+
LOG(INFO) << "TVMRunner::GetOutput (Numpy):" << outputfile;
200+
201+
for (auto& elem : mInfo.output_info) {
202+
LOG(INFO) << "Get Output for :" << elem.first;
203+
NDArray out_arr = r_graph_handle.GetFunction("get_output")(elem.first);
204+
auto ssize = GetMemSize(out_arr);
205+
LOG(INFO) << "Output Size:" << ssize << " bytes";
206+
207+
void* data = (void*)malloc(ssize * (out_arr->dtype.bits * out_arr->dtype.lanes + 7) / 8);
208+
out_arr.CopyToBytes(data, ssize);
209+
std::vector<size_t> shape;
210+
211+
for (int j = 0; j < out_arr->ndim; ++j) shape.push_back(out_arr->shape[j]);
212+
if (!elem.second.second.compare("float32")) {
213+
cnpy::npz_save<float>(outputfile, elem.first, (float*)data, shape, "a");
214+
} else if (!elem.second.second.compare("int8")) {
215+
cnpy::npz_save<int8_t>(outputfile, elem.first, (int8_t*)data, shape, "a");
216+
} else {
217+
LOG(WARNING) << "DType:" << elem.second.second << " is not supported for npy_save";
218+
}
219+
free(data);
220+
}
221+
222+
return 0;
223+
}
224+
225+
/*!
226+
* \brief Get output of the model as a binary buffer.
227+
* \param output_id output node name to read the data.
228+
* \param raw_output the buffer to copy the data to.
229+
* \param 0 on success else error code.
230+
*/
231+
int TVMRunner::GetOutput(std::string output_id, char* raw_output) {
232+
LOG(INFO) << "TVMRunner::GetOutput (Raw)";
233+
NDArray out_arr = r_graph_handle.GetFunction("get_output")(output_id);
234+
auto ssize = GetMemSize(out_arr);
235+
out_arr.CopyToBytes(raw_output, ssize);
236+
return 0;
237+
}
238+
239+
/*!
240+
* \brief Call one cycle of execution for the model.
241+
* \param 0 on success else error code.
242+
*/
243+
int TVMRunner::Run(void) {
244+
LOG(INFO) << "TVMRunner::Run";
245+
246+
r_graph_handle.GetFunction("run")();
247+
return 0;
248+
}
249+
250+
/*!
251+
* \brief Query various metadata from the grsph runtime.
252+
* \param 0 on success else error code.
253+
*/
254+
TVMMetaInfo TVMRunner::GetMetaInfo(void) {
255+
LOG(INFO) << "TVMRunner::GetMetaInfo";
256+
257+
mInfo.n_inputs = r_graph_handle.GetFunction("get_num_inputs")();
258+
mInfo.n_outputs = r_graph_handle.GetFunction("get_num_outputs")();
259+
260+
Map<String, ObjectRef> tvm_input_info = r_graph_handle.GetFunction("get_input_info")();
261+
auto shape_info = GetRef<Map<String, ObjectRef>>(tvm_input_info["shape"].as<MapNode>());
262+
auto dtype_info = GetRef<Map<String, ObjectRef>>(tvm_input_info["dtype"].as<MapNode>());
263+
for (const auto& kv : shape_info) {
264+
auto stuple = GetRef<ShapeTuple>(kv.second.as<ShapeTupleObj>());
265+
std::vector<int> vshape;
266+
vshape.assign(stuple.begin(), stuple.end());
267+
auto dtype = GetRef<String>(dtype_info[kv.first].as<StringObj>());
268+
std::pair<std::vector<int>, std::string> value = std::make_pair(vshape, dtype);
269+
mInfo.input_info.insert({kv.first, value});
270+
}
271+
272+
tvm_input_info = r_graph_handle.GetFunction("get_output_info")();
273+
shape_info = GetRef<Map<String, ObjectRef>>(tvm_input_info["shape"].as<MapNode>());
274+
dtype_info = GetRef<Map<String, ObjectRef>>(tvm_input_info["dtype"].as<MapNode>());
275+
for (const auto& kv : shape_info) {
276+
auto stuple = GetRef<ShapeTuple>(kv.second.as<ShapeTupleObj>());
277+
std::vector<int> vshape;
278+
vshape.assign(stuple.begin(), stuple.end());
279+
auto dtype = GetRef<String>(dtype_info[kv.first].as<StringObj>());
280+
std::pair<std::vector<int>, std::string> value = std::make_pair(vshape, dtype);
281+
mInfo.output_info.insert({kv.first, value});
282+
}
283+
284+
return mInfo;
285+
}
286+
287+
/*!
288+
* \brief Print the meta information.
289+
* \param 0 on success else error code.
290+
*/
291+
void TVMRunner::PrintMetaInfo(void) {
292+
LOG(INFO) << "Meta Information:" << r_model_path;
293+
LOG(INFO) << " Number of Inputs:" << mInfo.n_inputs;
294+
LOG(INFO) << " Number of Outputs:" << mInfo.n_outputs;
295+
LOG(INFO) << " Input MetaInfo:";
296+
for (auto& elem : mInfo.input_info) {
297+
std::ostringstream stream;
298+
stream << "[";
299+
copy(elem.second.first.begin(), elem.second.first.end() - 1,
300+
std::ostream_iterator<int>(stream, ", "));
301+
stream << elem.second.first.back() << "]";
302+
LOG(INFO) << " Input:" << elem.first;
303+
LOG(INFO) << " DType:" << elem.second.second;
304+
LOG(INFO) << " Shape:" << stream.str();
305+
}
306+
LOG(INFO) << " Output MetaInfo:";
307+
for (auto& elem : mInfo.output_info) {
308+
std::ostringstream stream;
309+
stream << "[";
310+
copy(elem.second.first.begin(), elem.second.first.end() - 1,
311+
std::ostream_iterator<int>(stream, ", "));
312+
stream << elem.second.first.back() << "]";
313+
LOG(INFO) << " Output:" << elem.first;
314+
LOG(INFO) << " DType:" << elem.second.second;
315+
LOG(INFO) << " Shape:" << stream.str();
316+
}
317+
}
318+
319+
} // namespace runtime
320+
} // namespace tvm

‎apps/cpp_rtvm/tvm_runner.h

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
/*!
21+
* \file tvm_runner.h
22+
* \brief TVM model runner.
23+
*/
24+
#ifndef TVM_APPS_CPP_RTVM_RUNNER_H_
25+
#define TVM_APPS_CPP_RTVM_RUNNER_H_
26+
27+
#include <tvm/runtime/module.h>
28+
#include <tvm/runtime/packed_func.h>
29+
#include <tvm/runtime/registry.h>
30+
31+
#include <string>
32+
33+
#include "tvm/runtime/c_runtime_api.h"
34+
35+
namespace tvm {
36+
namespace runtime {
37+
38+
/*!
39+
* \brief various meta information related to the compiled TVM model.
40+
*/
41+
typedef struct {
42+
public:
43+
int n_inputs;
44+
int n_outputs;
45+
std::map<std::string, std::pair<std::vector<int>, std::string>> input_info;
46+
std::map<std::string, std::pair<std::vector<int>, std::string>> output_info;
47+
} TVMMetaInfo;
48+
49+
/*!
50+
* \brief encapsulates TVM graph runtime functionality with simplified API interface.
51+
*/
52+
class TVMRunner {
53+
public:
54+
/*! \brief Constructor */
55+
TVMRunner(std::string path, std::string device);
56+
57+
/*! \brief Initiates graph runtime and with the compiled model */
58+
int Load(void);
59+
/*! \brief Executes one inference cycle */
60+
int Run(void);
61+
/*! \brief To set the inputs from given npz file */
62+
int SetInput(std::string);
63+
/*! \brief To set the input from binary data */
64+
int SetInput(std::string, char*);
65+
/*! \brief Save the model output into given npz file */
66+
int GetOutput(std::string);
67+
/*! \brief Get the model output in binary format */
68+
int GetOutput(std::string, char*);
69+
/*! \brief To get the input mem size */
70+
size_t GetInputMemSize(std::string);
71+
/*! \brief To get the output mem size */
72+
size_t GetOutputMemSize(std::string);
73+
/*! \brief Populates various meta information from graph runtime */
74+
TVMMetaInfo GetMetaInfo(void);
75+
/*! \brief Print function to show all meta information */
76+
void PrintMetaInfo(void);
77+
78+
private:
79+
/*! \brief Module handle for the shared object */
80+
Module r_mod_handle;
81+
/*! \brief Graph runtime module handle */
82+
Module r_graph_handle;
83+
/*! \brief The local model path from where we load the model */
84+
std::string r_model_path;
85+
/*! \brief The target device */
86+
std::string r_device;
87+
/*! \brief Holds meta information queried from graph runtime */
88+
TVMMetaInfo mInfo;
89+
};
90+
91+
} // namespace runtime
92+
} // namespace tvm
93+
#endif // TVM_APPS_CPP_RTVM_RUNNER_H_

‎cmake/config.cmake

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,9 @@ set(USE_RPC ON)
110110
# Whether to build the C++ RPC server binary
111111
set(USE_CPP_RPC OFF)
112112

113+
# Whether to build the C++ native runtime tool binary
114+
set(USE_CPP_RTVM OFF)
115+
113116
# Whether to build the iOS RPC server application
114117
set(USE_IOS_RPC OFF)
115118

‎cmake/modules/LibInfo.cmake

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ function(add_lib_info src_file)
6060
TVM_INFO_USE_CMSISNN="${USE_CMSISNN}"
6161
TVM_INFO_USE_COREML="${USE_COREML}"
6262
TVM_INFO_USE_CPP_RPC="${USE_CPP_RPC}"
63+
TVM_INFO_USE_CPP_RTVM="${USE_CPP_RTVM}"
6364
TVM_INFO_USE_CUBLAS="${USE_CUBLAS}"
6465
TVM_INFO_USE_CUDA="${USE_CUDA}"
6566
TVM_INFO_USE_CUDNN="${USE_CUDNN}"

‎python/tvm/driver/tvmc/composite_target.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
from tvm.relay.op.contrib.ethosu import partition_for_ethosu
2929
from tvm.relay.op.contrib.bnns import partition_for_bnns
3030
from tvm.relay.op.contrib.vitis_ai import partition_for_vitis_ai
31+
from tvm.relay.op.contrib.clml import partition_for_clml
3132

3233

3334
from tvm.driver.tvmc import TVMCException
@@ -71,6 +72,10 @@
7172
"config_key": "relay.ext.vitis_ai.options",
7273
"pass_pipeline": partition_for_vitis_ai,
7374
},
75+
"clml": {
76+
"config_key": None,
77+
"pass_pipeline": partition_for_clml,
78+
},
7479
}
7580

7681

‎python/tvm/relay/op/contrib/clml.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ def transform_function(
7979
return RemoveDropout().visit(func)
8080

8181

82-
def partition_for_clml(mod, params=None):
82+
def partition_for_clml(mod, params=None, **opts):
8383
"""Partition the graph greedily offloading supported
8484
operators to CLML Library.
8585

‎src/auto_scheduler/search_task.cc

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,14 @@ HardwareParams HardwareParamsNode::GetDefaultHardwareParams(const Target& target
102102
int max_vthread_extent = 1;
103103
return HardwareParams(-1, 16, 64, max_shared_memory_per_block, max_local_memory_per_block,
104104
max_threads_per_block, max_vthread_extent, warp_size);
105+
} else if (target->GetAttr<String>("device", "") == "adreno") {
106+
int max_shared_memory_per_block = 32768;
107+
int max_local_memory_per_block = 32768;
108+
int max_threads_per_block = 256;
109+
int warp_size = 1;
110+
int max_vthread_extent = 1;
111+
return HardwareParams(-1, 16, 64, max_shared_memory_per_block, max_local_memory_per_block,
112+
max_threads_per_block, max_vthread_extent, warp_size);
105113
} else {
106114
// add other opencl target
107115
auto dev = Device{static_cast<DLDeviceType>(device_type), 0};

‎src/runtime/graph_executor/graph_executor.cc

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,28 @@ std::tuple<GraphExecutor::ShapeInfo, GraphExecutor::DtypeInfo> GraphExecutor::Ge
138138
return std::make_tuple(shape_dict, dtype_dict);
139139
}
140140

141+
/*!
142+
* \brief Get the output info of Graph by parsing the output nodes.
143+
* \return The shape and dtype tuple.
144+
*/
145+
std::tuple<GraphExecutor::ShapeInfo, GraphExecutor::DtypeInfo> GraphExecutor::GetOutputInfo()
146+
const {
147+
GraphExecutor::ShapeInfo shape_dict;
148+
GraphExecutor::DtypeInfo dtype_dict;
149+
for (auto out : outputs_) {
150+
uint32_t nid = out.node_id;
151+
CHECK_LE(nid, nodes_.size());
152+
std::string name = nodes_[nid].name;
153+
CHECK_LE(nid, attrs_.shape.size());
154+
auto shape = attrs_.shape[nid];
155+
shape_dict.Set(name, ShapeTuple(shape));
156+
CHECK_LE(nid, attrs_.dltype.size());
157+
auto dtype = attrs_.dltype[nid];
158+
dtype_dict.Set(name, String(dtype));
159+
}
160+
return std::make_tuple(shape_dict, dtype_dict);
161+
}
162+
141163
/*!
142164
* \brief Get the output index given the name of output.
143165
* \param name The name of the output.
@@ -606,7 +628,19 @@ PackedFunc GraphExecutor::GetFunction(const std::string& name,
606628
if (args.num_args == 2) {
607629
this->CopyOutputTo(args[0], args[1]);
608630
} else {
609-
*rv = this->GetOutput(args[0]);
631+
int out_idx = -1;
632+
if (String::CanConvertFrom(args[0])) {
633+
for (size_t i = 0; i < outputs_.size(); i++) {
634+
std::string& name = nodes_[outputs_[i].node_id].name;
635+
if (args[0].operator String() == name) {
636+
out_idx = i;
637+
}
638+
}
639+
CHECK(out_idx != -1) << "Invalid output node:" << args[0].operator String();
640+
} else {
641+
out_idx = args[0];
642+
}
643+
*rv = this->GetOutput(out_idx);
610644
}
611645
});
612646
} else if (name == "get_input") {
@@ -682,6 +716,14 @@ PackedFunc GraphExecutor::GetFunction(const std::string& name,
682716
input_info.Set("dtype", dtype_info);
683717
*rv = input_info;
684718
});
719+
} else if (name == "get_output_info") {
720+
return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
721+
auto [shape_info, dtype_info] = this->GetOutputInfo();
722+
Map<String, ObjectRef> input_info;
723+
input_info.Set("shape", shape_info);
724+
input_info.Set("dtype", dtype_info);
725+
*rv = input_info;
726+
});
685727
} else {
686728
return PackedFunc();
687729
}

‎src/runtime/graph_executor/graph_executor.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,12 @@ class TVM_DLL GraphExecutor : public ModuleNode {
117117
*/
118118
std::tuple<ShapeInfo, DtypeInfo> GetInputInfo() const;
119119

120+
/*!
121+
* \brief Get the output info of Graph by parsing the output nodes.
122+
* \return The shape and dtype tuple.
123+
*/
124+
std::tuple<ShapeInfo, DtypeInfo> GetOutputInfo() const;
125+
120126
/*!
121127
* \brief Get the output index given the name of output.
122128
* \param name The name of the output.

‎src/support/libinfo.cc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,10 @@
203203
#define TVM_INFO_USE_CPP_RPC "NOT-FOUND"
204204
#endif
205205

206+
#ifndef TVM_INFO_USE_CPP_RTVM
207+
#define TVM_INFO_USE_CPP_RTVM "NOT-FOUND"
208+
#endif
209+
206210
#ifndef TVM_INFO_USE_TFLITE
207211
#define TVM_INFO_USE_TFLITE "NOT-FOUND"
208212
#endif
@@ -273,6 +277,7 @@ TVM_DLL Map<String, String> GetLibInfo() {
273277
{"USE_CMSISNN", TVM_INFO_USE_CMSISNN},
274278
{"USE_COREML", TVM_INFO_USE_COREML},
275279
{"USE_CPP_RPC", TVM_INFO_USE_CPP_RPC},
280+
{"USE_CPP_RTVM", TVM_INFO_USE_CPP_RTVM},
276281
{"USE_CUBLAS", TVM_INFO_USE_CUBLAS},
277282
{"USE_CUDA", TVM_INFO_USE_CUDA},
278283
{"USE_CUDNN", TVM_INFO_USE_CUDNN},

‎tests/scripts/task_build_adreno_bins.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ echo set\(USE_OPENCL ON\) >> config.cmake
3737
fi
3838
echo set\(USE_RPC ON\) >> config.cmake
3939
echo set\(USE_CPP_RPC ON\) >> config.cmake
40+
echo set\(USE_CPP_RTVM ON\) >> config.cmake
4041
echo set\(USE_GRAPH_EXECUTOR ON\) >> config.cmake
4142
echo set\(USE_LIBBACKTRACE AUTO\) >> config.cmake
4243
echo set\(USE_KALLOC_ALIGNMENT 32\) >> config.cmake
@@ -56,4 +57,4 @@ cmake -DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK_HOME}/build/cmake/android.toolchain.
5657
-DCMAKE_C_COMPILER="${ANDROID_NDK_HOME}/toolchains/llvm/prebuilt/linux-x86_64/bin/aarch64-linux-android28-clang" \
5758
-DMACHINE_NAME="aarch64-linux-gnu" ..
5859

59-
make -j$(nproc) tvm_rpc
60+
make -j$(nproc) tvm_rpc rtvm

0 commit comments

Comments
 (0)
Please sign in to comment.