Skip to content

Commit

Permalink
fix: prevent BOF Tencent#5734 in the last channel handling
Browse files Browse the repository at this point in the history
Signed-off-by: Junwha Hong <[email protected]>
  • Loading branch information
junwha committed Oct 15, 2024
1 parent 1c7af00 commit c42ea73
Show file tree
Hide file tree
Showing 2 changed files with 96 additions and 6 deletions.
51 changes: 48 additions & 3 deletions src/layer/arm/shufflechannel_arm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ int ShuffleChannel_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Opt

ptr1 += 2;

for (int i = 0; i < size; i++)
for (int i = 0; i < size - 1; i++)
{
float32x4_t _p0 = vld1q_f32(ptr0);
float32x4_t _p1 = vld1q_f32(ptr1);
Expand All @@ -130,6 +130,21 @@ int ShuffleChannel_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Opt
ptr1 += 4;
outptr0 += 4;
}

for (int i = 0; i < 4; i++)
{
if (i % 2)
{
*outptr0 = *ptr1;
ptr1 += 1;
}
else
{
*outptr0 = *ptr0;
ptr0 += 1;
}
outptr0 += 1;
}
}

return 0;
Expand Down Expand Up @@ -364,7 +379,7 @@ int ShuffleChannel_arm::forward_bf16s_fp16s(const Mat& bottom_blob, Mat& top_blo

ptr1 += 4;

for (int i = 0; i < size; i++)
for (int i = 0; i < size - 1; i++)
{
uint16x4_t _p0 = vld1_u16(ptr0);
uint16x4_t _p1 = vld1_u16(ptr1);
Expand All @@ -378,6 +393,21 @@ int ShuffleChannel_arm::forward_bf16s_fp16s(const Mat& bottom_blob, Mat& top_blo
ptr1 += 8;
outptr0 += 8;
}

for (int i = 0; i < 8; i++)
{
if (i % 2)
{
*outptr0 = *ptr1;
ptr1 += 1;
}
else
{
*outptr0 = *ptr0;
ptr0 += 1;
}
outptr0 += 1;
}
}

return 0;
Expand Down Expand Up @@ -598,7 +628,7 @@ int ShuffleChannel_arm::forward_bf16s_fp16s(const Mat& bottom_blob, Mat& top_blo

ptr1 += 2;

for (int i = 0; i < size; i++)
for (int i = 0; i < size - 1; i++)
{
uint16x4_t _p0 = vld1_u16(ptr0);
uint16x4_t _p1 = vld1_u16(ptr1);
Expand All @@ -611,6 +641,21 @@ int ShuffleChannel_arm::forward_bf16s_fp16s(const Mat& bottom_blob, Mat& top_blo
ptr1 += 4;
outptr0 += 4;
}

for (int i = 0; i < 4; i++)
{
if (i % 2)
{
*outptr0 = *ptr1;
ptr1 += 1;
}
else
{
*outptr0 = *ptr0;
ptr0 += 1;
}
outptr0 += 1;
}
}

return 0;
Expand Down
51 changes: 48 additions & 3 deletions src/layer/x86/shufflechannel_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ int ShuffleChannel_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Opt

ptr1 += 8;

for (int i = 0; i < size; i++)
for (int i = 0; i < size - 1; i++)
{
__m256 _p0 = _mm256_loadu_ps(ptr0);
__m256 _p1 = _mm256_loadu_ps(ptr1);
Expand All @@ -134,6 +134,21 @@ int ShuffleChannel_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Opt
ptr1 += 16;
outptr += 16;
}

for (int i = 0; i < 16; i++)
{
if (i % 2)
{
*outptr = *ptr1;
ptr1 += 1;
}
else
{
*outptr = *ptr0;
ptr0 += 1;
}
outptr += 1;
}
}

return 0;
Expand Down Expand Up @@ -372,7 +387,7 @@ int ShuffleChannel_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Opt

ptr1 += 4;

for (int i = 0; i < size; i++)
for (int i = 0; i < size - 1; i++)
{
__m128 _p0 = _mm_loadu_ps(ptr0);
__m128 _p1 = _mm_loadu_ps(ptr1);
Expand All @@ -387,6 +402,21 @@ int ShuffleChannel_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Opt
ptr1 += 8;
outptr += 8;
}

for (int i = 0; i < 8; i++)
{
if (i % 2)
{
*outptr = *ptr1;
ptr1 += 1;
}
else
{
*outptr = *ptr0;
ptr0 += 1;
}
outptr += 1;
}
}

return 0;
Expand Down Expand Up @@ -607,7 +637,7 @@ int ShuffleChannel_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Opt

ptr1 += 2;

for (int i = 0; i < size; i++)
for (int i = 0; i < size - 1; i++)
{
__m128 _p0 = _mm_loadu_ps(ptr0);
__m128 _p1 = _mm_loadu_ps(ptr1);
Expand All @@ -620,6 +650,21 @@ int ShuffleChannel_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Opt
ptr1 += 4;
outptr += 4;
}

for (int i = 0; i < 4; i++)
{
if (i % 2)
{
*outptr = *ptr1;
ptr1 += 1;
}
else
{
*outptr = *ptr0;
ptr0 += 1;
}
outptr += 1;
}
}

return 0;
Expand Down

0 comments on commit c42ea73

Please sign in to comment.