diff --git a/dory/Hardware_targets/PULP/GAP9/Templates/network_c_template.c b/dory/Hardware_targets/PULP/GAP9/Templates/network_c_template.c index 34199ba6..230ac9c2 100644 --- a/dory/Hardware_targets/PULP/GAP9/Templates/network_c_template.c +++ b/dory/Hardware_targets/PULP/GAP9/Templates/network_c_template.c @@ -119,7 +119,7 @@ void ${prefix}execute_layer_fork(void *args) { #endif } -void ${prefix}network_run(void *l2_buffer, size_t l2_buffer_size, void *l2_final_output, int exec${", void *L2_input_h" if not l3_supported else ""}) +struct ${prefix}network_run_token ${prefix}network_run_async(void *l2_buffer, size_t l2_buffer_size, void *l2_final_output, int exec, int initial_dir${", void *L2_input_h" if not l3_supported else ""}) { struct pi_device cluster_dev = {0}; struct pi_cluster_conf conf; @@ -135,8 +135,9 @@ void ${prefix}network_run(void *l2_buffer, size_t l2_buffer_size, void *l2_final args[1] = (unsigned int) l2_buffer_size; args[2] = (unsigned int) l2_final_output; args[3] = (unsigned int) exec; + args[4] = (unsigned int) initial_dir; % if not l3_supported: - args[4] = (unsigned int) L2_input_h; + args[5] = (unsigned int) L2_input_h; % endif // open cluster... pi_cluster_task(&cluster_task, ${prefix}network_run_cluster, args); @@ -149,20 +150,33 @@ void ${prefix}network_run(void *l2_buffer, size_t l2_buffer_size, void *l2_final #endif cluster_task.slave_stack_size = ${slave_stack}; pi_cluster_send_task_to_cl(&cluster_dev, &cluster_task); - pi_cluster_close(&cluster_dev); + return (struct ${prefix}network_run_token) { + .cluster_dev = cluster_dev + }; +} + +void ${prefix}network_run_wait(struct ${prefix}network_run_token token) +{ + pi_cluster_close(&token.cluster_dev); % if 'Perf_final' in verbose_level: print_perf("Final", ${prefix}cycle_network_execution, ${MACs}); % endif } +void ${prefix}network_run(void *l2_buffer, size_t l2_buffer_size, void *l2_final_output, int exec, int initial_dir${", void *L2_input_h" if not l3_supported else ""}) +{ + ${prefix}network_run_wait(network_run_async(l2_buffer, l2_buffer_size, l2_final_output, exec, initial_dir${", L2_input_h" if not l3_supported else ""})); +} + void ${prefix}network_run_cluster(void *args) { unsigned int * real_args = (unsigned int *) args; void * l2_buffer = (void *) real_args[0]; size_t l2_buffer_size = (size_t) real_args[1]; void * l2_final_output = (void *) real_args[2]; int exec = (int) real_args[3]; + int dir = (int) real_args[4]; % if not l3_supported: - void * L2_input_h = (void *)real_args[4]; + void * L2_input_h = (void *)real_args[5]; % endif /* - initial buffer allocation L2 and L1 @@ -177,7 +191,6 @@ void ${prefix}network_run_cluster(void *args) { void *L3_weights_curr = L3_weights; void *bypass_activations = NULL; - int dir = 1; int residual_number = 0; int bypass_dimension = 0; % if not l3_supported: diff --git a/dory/Hardware_targets/PULP/GAP9/Templates/network_h_template.h b/dory/Hardware_targets/PULP/GAP9/Templates/network_h_template.h deleted file mode 100644 index 31eb3c5c..00000000 --- a/dory/Hardware_targets/PULP/GAP9/Templates/network_h_template.h +++ /dev/null @@ -1,199 +0,0 @@ -/* - * network.h - * Alessio Burrello - * - * Copyright (C) 2019-2020 University of Bologna - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __${prefix.upper()}NETWORK_H__ -#define __${prefix.upper()}NETWORK_H__ - -<% - l3_supported = DORY_HW_graph[0].HW_description['memory']['levels'] > 2 - single_input = n_inputs==1 -%>\ -% if not l3_supported: -#include "${prefix}weights_definition.h" -% endif -#include - - -% if l3_supported: -void ${prefix}network_terminate(); -void ${prefix}network_initialize(); -% endif -void ${prefix}network_run_cluster(void * args); -void ${prefix}network_run(void *l2_buffer, size_t l2_buffer_size, void *l2_final_output, int exec${", void *L2_input_h" if not l3_supported else ""}); -void ${prefix}execute_layer_fork(void *arg); - -% if l3_supported and not single_input: -static char * ${prefix}Input_names[${n_inputs}] = { \ - % for n in range(n_inputs-1): - "${f"{prefix}inputs_{n}.hex"}", - % endfor - "${f"{prefix}inputs_{n_inputs-1}.hex"}" -}; -% endif - -#ifdef DEFINE_CONSTANTS -% if l3_supported: -// allocation of buffers with parameters needed by the network execution -static const char * L3_weights_files[] = { - ${files_list} -}; -static int L3_weights_size[${weights_number}]; -static int layers_pointers[${len(DORY_HW_graph)}]; -% endif -static char * Layers_name[${len(DORY_HW_graph)}] = {\ -% for node in DORY_HW_graph: -"${node.prefixed_name}"${'' if loop.last else ', '}\ -% endfor -}; -% if l3_supported: -static int L3_input_layers[${len(DORY_HW_graph)}] = {\ -1, -% for node in DORY_HW_graph[1:]: -% if node.L3_input != 0: -1${'' if loop.last else ', '}\ -% else: -0${'' if loop.last else ', '}\ -% endif -% endfor -}; -static int L3_output_layers[${len(DORY_HW_graph)}] = {\ -% for node in DORY_HW_graph: -% if node.tiling_dimensions["L3"]["output_dimensions"] != node.tiling_dimensions["L2"]["output_dimensions"]: -1${'' if loop.last else ', '}\ -% else: -0${'' if loop.last else ', '}\ -% endif -% endfor -}; -static int allocate_layer[${len(DORY_HW_graph)}] = {\ -% for node in DORY_HW_graph: -% if node.tiling_dimensions["L3"]["weights_dimensions"] == node.tiling_dimensions["L2"]["weights_dimensions"] and ('FullyConnected' in node.name or 'Conv' in node.name): -1${'' if loop.last else ', '}\ -% else: -0${'' if loop.last else ', '}\ -% endif -% endfor -}; -% else: -static char *Weights_name[${len(DORY_HW_graph)}] = {\ -% for i in range(len(DORY_HW_graph)): -% if 'Conv' in DORY_HW_graph[i].name or 'FullyConnected' in DORY_HW_graph[i].name: -Weights_${DORY_HW_graph[i].prefixed_name}${'' if loop.last else ', '}\ -% else: -"None"${'' if loop.last else ', '}\ -% endif -% endfor -}; -% endif -static int branch_input[${len(DORY_HW_graph)}] = {\ -% for node in DORY_HW_graph: -% if node.branch_in == 1: -1${'' if loop.last else ', '}\ -% else: -0${'' if loop.last else ', '}\ -% endif -% endfor -}; -static int branch_output[${len(DORY_HW_graph)}] = {\ -% for node in DORY_HW_graph: -% if node.branch_out == 1: -1${'' if loop.last else ', '}\ -% else: -0${'' if loop.last else ', '}\ -% endif -% endfor -}; -static int branch_change[${len(DORY_HW_graph)}] = {\ -% for node in DORY_HW_graph: -% if node.branch_change == 1: -1${'' if loop.last else ', '}\ -% else: -0${'' if loop.last else ', '}\ -% endif -% endfor -}; -static int weights_checksum[${len(DORY_HW_graph)}] = {\ -% for node in DORY_HW_graph: -${node.check_sum_w}${'' if loop.last else ', '}\ -% endfor -}; -static int weights_size[${len(DORY_HW_graph)}] = {\ -% for node in DORY_HW_graph: -% if l3_supported: -${int((node.tiling_dimensions["L2"]["weight_memory"] + node.tiling_dimensions["L2"]["constants_memory"] + node.tiling_dimensions["L2"]["bias_memory"]) * (1 + int(node.tiling_dimensions["L3"]["weights_dimensions"] != node.tiling_dimensions["L2"]["weights_dimensions"])))}${'' if loop.last else ', '}\ -% else: -${int(node.tiling_dimensions["L2"]["weight_memory"] + node.tiling_dimensions["L2"]["constants_memory"] + node.tiling_dimensions["L2"]["bias_memory"])}${'' if loop.last else ', '}\ -% endif -% endfor -}; -static int activations_checksum[${len(DORY_HW_graph)}][${DORY_HW_graph[0].n_test_inputs}] = {\ -% for i in range(len(DORY_HW_graph)): -{ - % for j in range(DORY_HW_graph[0].n_test_inputs): - ${DORY_HW_graph[i].check_sum_in[j]}${", " if j != DORY_HW_graph[0].n_test_inputs-1 else ""} \ - % endfor -}${"," if i != len(DORY_HW_graph)-1 else ""} -% endfor -}; -static int activations_size[${len(DORY_HW_graph)}] = {\ -% for node in DORY_HW_graph: -% if l3_supported: -${int(node.tiling_dimensions["L2"]["input_activation_memory"] * (1 + int(node.tiling_dimensions["L3"]["input_dimensions"] != node.tiling_dimensions["L2"]["input_dimensions"])))}${'' if loop.last else ', '}\ -% else: -${int(node.tiling_dimensions["L2"]["input_activation_memory"])}${'' if loop.last else ', '}\ -% endif -% endfor -}; -static int activations_out_checksum[${len(DORY_HW_graph)}][${DORY_HW_graph[0].n_test_inputs}] = {\ -% for i in range(len(DORY_HW_graph)): -{ - % for j in range(DORY_HW_graph[0].n_test_inputs): - ${DORY_HW_graph[i].check_sum_out[j]}${", " if j != DORY_HW_graph[0].n_test_inputs-1 else ""} \ - % endfor -}${"," if i != len(DORY_HW_graph)-1 else ""} -% endfor -}; -static int activations_out_size[${len(DORY_HW_graph)}] = {\ -% for node in DORY_HW_graph: -% if l3_supported: -${int(node.tiling_dimensions["L2"]["output_activation_memory"] * (1 + int(node.tiling_dimensions["L3"]["output_dimensions"] != node.tiling_dimensions["L2"]["output_dimensions"])))}${'' if loop.last else ', '}\ -% else: -${int(node.tiling_dimensions["L2"]["output_activation_memory"])}${'' if loop.last else ', '}\ -% endif -% endfor -}; -static int layer_with_weights[${len(DORY_HW_graph)}] = {\ -% for node in DORY_HW_graph: -% if 'Conv' in node.name or 'FullyConnected' in node.name: -1${'' if loop.last else ', '}\ -% else: -0${'' if loop.last else ', '}\ -% endif -% endfor -}; -% if 'Yes' in performance: -static int NODEs_MACS[${len(DORY_HW_graph)}] = {\ -% for node in DORY_HW_graph: -${node.MACs}${'' if loop.last else ', '}\ -% endfor -}; -% endif -#endif - -#endif // __NETWORK_H__ diff --git a/dory/Hardware_targets/PULP/GAP9/Templates/network_h_template.h b/dory/Hardware_targets/PULP/GAP9/Templates/network_h_template.h new file mode 120000 index 00000000..246946c0 --- /dev/null +++ b/dory/Hardware_targets/PULP/GAP9/Templates/network_h_template.h @@ -0,0 +1 @@ +../../Common/Templates/network.h.t \ No newline at end of file diff --git a/dory/Hardware_targets/PULP/GAP9_NE16/Templates/network_h_template.h b/dory/Hardware_targets/PULP/GAP9_NE16/Templates/network_h_template.h index 7ea5b4bd..246946c0 120000 --- a/dory/Hardware_targets/PULP/GAP9_NE16/Templates/network_h_template.h +++ b/dory/Hardware_targets/PULP/GAP9_NE16/Templates/network_h_template.h @@ -1 +1 @@ -../../GAP9/Templates/network_h_template.h \ No newline at end of file +../../Common/Templates/network.h.t \ No newline at end of file diff --git a/test_PULP.py b/test_PULP.py index af08f7f8..bccf2fd5 100644 --- a/test_PULP.py +++ b/test_PULP.py @@ -163,7 +163,7 @@ def test_network(network, capsys, compat, appdir): try: proc = subprocess.run(cmd, check=True, capture_output=True, text=True, timeout=360) except subprocess.CalledProcessError as e: - assert False, f"Building application failed with exit status {e.returncode}\nBuild error:\n{e.stderr}" + assert False, f"Building application failed with exit status {e.returncode}\nBuild output:\n{e.stdout}\nBuild error:\n{e.stderr}" except subprocess.TimeoutExpired as e: print(f"Test timed out...\nSTDOUT:") if e.output is not None: