Skip to content

Support cloning up to 7 times #1299

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions hls4ml/backends/fpga/passes/clone.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,9 @@ def transform(self, model, node):
n_outputs = len(output_map[output]) + in_output
if n_outputs == 1:
continue
if n_outputs > 3:
if n_outputs > 7:
msg = f'ERROR: Cloning output {output} of {node.class_name}\
({node.name}) more than 3 times not currently supported'
({node.name}) more than 7 times not currently supported'
raise ValueError(msg)

out_var = node.get_output_variable(output)
Expand Down
94 changes: 94 additions & 0 deletions hls4ml/templates/catapult/nnet_utils/nnet_stream.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,100 @@ void clone_stream(ac_channel<data_T> &data, ac_channel<res_T> &res1, ac_channel<
}
}

template <class data_T, class res_T, int N>
void clone_stream(ac_channel<data_T> &data, ac_channel<res_T> &res1, ac_channel<res_T> &res2, ac_channel<res_T> &res3,
ac_channel<res_T> &res4) {
#ifndef __SYNTHESIS__
while (data.available(1))
#endif
{
data_T in_data = data.read();
res_T out_data;

ClonePack:
for (int j = 0; j < data_T::size; j++) {
out_data[j] = in_data[j];
}

res1.write(out_data);
res2.write(out_data);
res3.write(out_data);
res4.write(out_data);
}
}

template <class data_T, class res_T, int N>
void clone_stream(ac_channel<data_T> &data, ac_channel<res_T> &res1, ac_channel<res_T> &res2, ac_channel<res_T> &res3,
ac_channel<res_T> &res4, ac_channel<res_T> &res5) {
#ifndef __SYNTHESIS__
while (data.available(1))
#endif
{
data_T in_data = data.read();
res_T out_data;

ClonePack:
for (int j = 0; j < data_T::size; j++) {
out_data[j] = in_data[j];
}

res1.write(out_data);
res2.write(out_data);
res3.write(out_data);
res4.write(out_data);
res5.write(out_data);
}
}

template <class data_T, class res_T, int N>
void clone_stream(ac_channel<data_T> &data, ac_channel<res_T> &res1, ac_channel<res_T> &res2, ac_channel<res_T> &res3,
ac_channel<res_T> &res4, ac_channel<res_T> &res5, ac_channel<res_T> &res6) {
#ifndef __SYNTHESIS__
while (data.available(1))
#endif
{
data_T in_data = data.read();
res_T out_data;

ClonePack:
for (int j = 0; j < data_T::size; j++) {
out_data[j] = in_data[j];
}

res1.write(out_data);
res2.write(out_data);
res3.write(out_data);
res4.write(out_data);
res5.write(out_data);
res6.write(out_data);
}
}

template <class data_T, class res_T, int N>
void clone_stream(ac_channel<data_T> &data, ac_channel<res_T> &res1, ac_channel<res_T> &res2, ac_channel<res_T> &res3,
ac_channel<res_T> &res4, ac_channel<res_T> &res5, ac_channel<res_T> &res6, ac_channel<res_T> &res7) {
#ifndef __SYNTHESIS__
while (data.available(1))
#endif
{
data_T in_data = data.read();
res_T out_data;

ClonePack:
for (int j = 0; j < data_T::size; j++) {
out_data[j] = in_data[j];
}

res1.write(out_data);
res2.write(out_data);
res3.write(out_data);
res4.write(out_data);
res5.write(out_data);
res6.write(out_data);
res7.write(out_data);
}
}

template <class data_T, class res_T, int N> void repack_stream(ac_channel<data_T> &data, ac_channel<res_T> &res) {
if (data_T::size == res_T::size) {
for (int i = 0; i < N / data_T::size; i++) {
Expand Down
153 changes: 153 additions & 0 deletions hls4ml/templates/oneapi/firmware/nnet_utils/nnet_stream.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,159 @@ template <class data_pipe, class res1_pipe, class res2_pipe, class res3_pipe, in
}
}

template <class data_pipe, class res1_pipe, class res2_pipe, class res3_pipe, class res4_pipe, int N> void clone_stream() {
using data_T = typename ExtractPipeType<data_pipe>::value_type;
using res1_T = typename ExtractPipeType<res1_pipe>::value_type;
using res2_T = typename ExtractPipeType<res2_pipe>::value_type;
using res3_T = typename ExtractPipeType<res3_pipe>::value_type;
using res4_T = typename ExtractPipeType<res4_pipe>::value_type;
constexpr auto datasize = std::tuple_size<data_T>{};
CloneLoop:
[[intel::initiation_interval(1)]] for (int i = 0; i < N / datasize; i++) {
data_T in_data = data_pipe::read();
res1_T out_data1;
res2_T out_data2;
res3_T out_data3;
res4_T out_data4;

ClonePack:
#pragma unroll
for (int j = 0; j < datasize; j++) {
out_data1[j] = in_data[j];
out_data2[j] = in_data[j];
out_data3[j] = in_data[j];
out_data4[j] = in_data[j];
}

res1_pipe::write(out_data1);
res2_pipe::write(out_data2);
res3_pipe::write(out_data3);
res4_pipe::write(out_data4);
}
}

template <class data_pipe, class res1_pipe, class res2_pipe, class res3_pipe, class res4_pipe, class res5_pipe, int N>
void clone_stream() {
using data_T = typename ExtractPipeType<data_pipe>::value_type;
using res1_T = typename ExtractPipeType<res1_pipe>::value_type;
using res2_T = typename ExtractPipeType<res2_pipe>::value_type;
using res3_T = typename ExtractPipeType<res3_pipe>::value_type;
using res4_T = typename ExtractPipeType<res4_pipe>::value_type;
using res5_T = typename ExtractPipeType<res5_pipe>::value_type;
constexpr auto datasize = std::tuple_size<data_T>{};
CloneLoop:
[[intel::initiation_interval(1)]] for (int i = 0; i < N / datasize; i++) {
data_T in_data = data_pipe::read();
res1_T out_data1;
res2_T out_data2;
res3_T out_data3;
res4_T out_data4;
res5_T out_data5;

ClonePack:
#pragma unroll
for (int j = 0; j < datasize; j++) {
out_data1[j] = in_data[j];
out_data2[j] = in_data[j];
out_data3[j] = in_data[j];
out_data4[j] = in_data[j];
out_data5[j] = in_data[j];
}

res1_pipe::write(out_data1);
res2_pipe::write(out_data2);
res3_pipe::write(out_data3);
res4_pipe::write(out_data4);
res5_pipe::write(out_data5);
}
}

template <class data_pipe, class res1_pipe, class res2_pipe, class res3_pipe, class res4_pipe, class res5_pipe,
class res6_pipe, int N>
void clone_stream() {
using data_T = typename ExtractPipeType<data_pipe>::value_type;
using res1_T = typename ExtractPipeType<res1_pipe>::value_type;
using res2_T = typename ExtractPipeType<res2_pipe>::value_type;
using res3_T = typename ExtractPipeType<res3_pipe>::value_type;
using res4_T = typename ExtractPipeType<res4_pipe>::value_type;
using res5_T = typename ExtractPipeType<res5_pipe>::value_type;
using res6_T = typename ExtractPipeType<res6_pipe>::value_type;
constexpr auto datasize = std::tuple_size<data_T>{};
CloneLoop:
[[intel::initiation_interval(1)]] for (int i = 0; i < N / datasize; i++) {
data_T in_data = data_pipe::read();
res1_T out_data1;
res2_T out_data2;
res3_T out_data3;
res4_T out_data4;
res5_T out_data5;
res6_T out_data6;

ClonePack:
#pragma unroll
for (int j = 0; j < datasize; j++) {
out_data1[j] = in_data[j];
out_data2[j] = in_data[j];
out_data3[j] = in_data[j];
out_data4[j] = in_data[j];
out_data5[j] = in_data[j];
out_data6[j] = in_data[j];
}

res1_pipe::write(out_data1);
res2_pipe::write(out_data2);
res3_pipe::write(out_data3);
res4_pipe::write(out_data4);
res5_pipe::write(out_data5);
res6_pipe::write(out_data6);
}
}

template <class data_pipe, class res1_pipe, class res2_pipe, class res3_pipe, class res4_pipe, class res5_pipe,
class res6_pipe, class res7_pipe, int N>
void clone_stream() {
using data_T = typename ExtractPipeType<data_pipe>::value_type;
using res1_T = typename ExtractPipeType<res1_pipe>::value_type;
using res2_T = typename ExtractPipeType<res2_pipe>::value_type;
using res3_T = typename ExtractPipeType<res3_pipe>::value_type;
using res4_T = typename ExtractPipeType<res4_pipe>::value_type;
using res5_T = typename ExtractPipeType<res5_pipe>::value_type;
using res6_T = typename ExtractPipeType<res6_pipe>::value_type;
using res7_T = typename ExtractPipeType<res7_pipe>::value_type;
constexpr auto datasize = std::tuple_size<data_T>{};
CloneLoop:
[[intel::initiation_interval(1)]] for (int i = 0; i < N / datasize; i++) {
data_T in_data = data_pipe::read();
res1_T out_data1;
res2_T out_data2;
res3_T out_data3;
res4_T out_data4;
res5_T out_data5;
res6_T out_data6;
res7_T out_data7;

ClonePack:
#pragma unroll
for (int j = 0; j < datasize; j++) {
out_data1[j] = in_data[j];
out_data2[j] = in_data[j];
out_data3[j] = in_data[j];
out_data4[j] = in_data[j];
out_data5[j] = in_data[j];
out_data6[j] = in_data[j];
out_data7[j] = in_data[j];
}

res1_pipe::write(out_data1);
res2_pipe::write(out_data2);
res3_pipe::write(out_data3);
res4_pipe::write(out_data4);
res5_pipe::write(out_data5);
res6_pipe::write(out_data6);
res6_pipe::write(out_data7);
}
}

template <class data_pipe, class res_pipe, int N> void repack_stream() {
using data_T = typename ExtractPipeType<data_pipe>::value_type;
using res_T = typename ExtractPipeType<res_pipe>::value_type;
Expand Down
Loading
Loading