Skip to content

Commit e580795

Browse files
hanselykiritigowda
andauthored
OpenVX - GFX942: Fix canny test case (#1450)
* goffset * temp * debug * temp * temp * temp * code cleanup * boundary check * boundary check * boundary check * feedback update * additional kernels * fix * conformance fixg * code cleanup * code cleanup * typo fix * cpu boundary check --------- Co-authored-by: Kiriti Gowda <[email protected]>
1 parent 1e5a631 commit e580795

File tree

2 files changed

+50
-27
lines changed

2 files changed

+50
-27
lines changed

amd_openvx/openvx/ago/ago_haf_cpu_canny.cpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -616,12 +616,14 @@ int HafCpu_CannyEdgeTrace_U8_U8XY
616616
const ago_coord2d_short_t offs = dir_offsets[i];
617617
vx_int16 x1 = x + offs.x;
618618
vx_int16 y1 = y + offs.y;
619-
vx_uint8 *pDst = pDstImage + y1*dstImageStrideInBytes + x1;
620-
if (*pDst == 127)
621-
{
622-
*pDst |= 0x80; // *pDst = 255
623-
*((unsigned *)pxyStack) = (y1<<16)|x1;
624-
pxyStack++;
619+
if(x1 >= 0 && x1 < dstWidth && y1 >= 0 && y1 < dstHeight) {
620+
vx_uint8 *pDst = pDstImage + y1*dstImageStrideInBytes + x1;
621+
if (*pDst == 127)
622+
{
623+
*pDst |= 0x80; // *pDst = 255
624+
*((unsigned *)pxyStack) = (y1<<16)|x1;
625+
pxyStack++;
626+
}
625627
}
626628
}
627629
}

amd_openvx/openvx/hipvx/vision_kernels.cpp

Lines changed: 42 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,10 @@ Hip_CannySobel_U16_U8_3x3_L1NORM(uint dstWidth, uint dstHeight,
4242
{ // load 136x18 bytes into local memory using 16x16 workgroup
4343
int loffset = ly * 136 + (lx << 3);
4444
int goffset = (y - 1) * srcImageStrideInBytes + x - 4;
45-
*((uint2 *)(&lbuf[loffset])) = *((uint2 *)(&pSrcImage[goffset]));
45+
if (goffset >= 0) {
46+
*((uint2 *)(&lbuf[loffset])) = *((uint2 *)(&pSrcImage[goffset]));
47+
}
48+
4649
bool doExtraLoad = false;
4750
if (ly < 2) {
4851
loffset += 16 * 136;
@@ -54,7 +57,7 @@ Hip_CannySobel_U16_U8_3x3_L1NORM(uint dstWidth, uint dstHeight,
5457
goffset = (y - ly + id - 1) * srcImageStrideInBytes + (((x >> 3) - lx) << 3) + 124;
5558
doExtraLoad = (id < 18) ? true : false;
5659
}
57-
if (doExtraLoad) {
60+
if (doExtraLoad && goffset >= 0) {
5861
*((uint2 *)(&lbuf[loffset])) = *((uint2 *)(&pSrcImage[goffset]));
5962
}
6063
__syncthreads();
@@ -267,7 +270,9 @@ Hip_CannySobel_U16_U8_5x5_L1NORM(uint dstWidth, uint dstHeight,
267270
{ // load 136x20 bytes into local memory using 16x16 workgroup
268271
int loffset = ly * 136 + (lx << 3);
269272
int goffset = (y - 2) * srcImageStrideInBytes + x - 4;
270-
*((uint2 *)(&lbuf[loffset])) = *((uint2 *)(&pSrcImage[goffset]));
273+
if (goffset >= 0) {
274+
*((uint2 *)(&lbuf[loffset])) = *((uint2 *)(&pSrcImage[goffset]));
275+
}
271276
bool doExtraLoad = false;
272277
if (ly < 4) {
273278
loffset += 16 * 136;
@@ -279,7 +284,7 @@ Hip_CannySobel_U16_U8_5x5_L1NORM(uint dstWidth, uint dstHeight,
279284
goffset = (y - ly + id - 2) * srcImageStrideInBytes + (((x >> 3) - lx) << 3) + 124;
280285
doExtraLoad = (id < 20) ? true : false;
281286
}
282-
if (doExtraLoad) {
287+
if (doExtraLoad && goffset >= 0) {
283288
*((uint2 *)(&lbuf[loffset])) = *((uint2 *)(&pSrcImage[goffset]));
284289
}
285290
__syncthreads();
@@ -759,7 +764,9 @@ Hip_CannySobel_U16_U8_7x7_L1NORM(uint dstWidth, uint dstHeight,
759764
{ // load 136x22 bytes into local memory using 16x16 workgroup
760765
int loffset = ly * 136 + (lx << 3);
761766
int goffset = (y - 3) * srcImageStrideInBytes + x - 4;
762-
*((uint2 *)(&lbuf[loffset])) = *((uint2 *)(&pSrcImage[goffset]));
767+
if (goffset >= 0) {
768+
*((uint2 *)(&lbuf[loffset])) = *((uint2 *)(&pSrcImage[goffset]));
769+
}
763770
bool doExtraLoad = false;
764771
if (ly < 6) {
765772
loffset += 16 * 136;
@@ -771,7 +778,7 @@ Hip_CannySobel_U16_U8_7x7_L1NORM(uint dstWidth, uint dstHeight,
771778
goffset = (y - ly + id - 3) * srcImageStrideInBytes + (((x >> 3) - lx) << 3) + 124;
772779
doExtraLoad = (id < 22) ? true : false;
773780
}
774-
if (doExtraLoad) {
781+
if (doExtraLoad && goffset >= 0) {
775782
*((uint2 *)(&lbuf[loffset])) = *((uint2 *)(&pSrcImage[goffset]));
776783
}
777784
__syncthreads();
@@ -1646,7 +1653,9 @@ Hip_CannySobel_U16_U8_3x3_L2NORM(uint dstWidth, uint dstHeight,
16461653
{ // load 136x18 bytes into local memory using 16x16 workgroup
16471654
int loffset = ly * 136 + (lx << 3);
16481655
int goffset = (y - 1) * srcImageStrideInBytes + x - 4;
1649-
*((uint2 *)(&lbuf[loffset])) = *((uint2 *)(&pSrcImage[goffset]));
1656+
if (goffset >= 0) {
1657+
*((uint2 *)(&lbuf[loffset])) = *((uint2 *)(&pSrcImage[goffset]));
1658+
}
16501659
bool doExtraLoad = false;
16511660
if (ly < 2) {
16521661
loffset += 16 * 136;
@@ -1658,7 +1667,7 @@ Hip_CannySobel_U16_U8_3x3_L2NORM(uint dstWidth, uint dstHeight,
16581667
goffset = (y - ly + id - 1) * srcImageStrideInBytes + (((x >> 3) - lx) << 3) + 124;
16591668
doExtraLoad = (id < 18) ? true : false;
16601669
}
1661-
if (doExtraLoad) {
1670+
if (doExtraLoad && goffset >= 0) {
16621671
*((uint2 *)(&lbuf[loffset])) = *((uint2 *)(&pSrcImage[goffset]));
16631672
}
16641673
__syncthreads();
@@ -1871,7 +1880,9 @@ Hip_CannySobel_U16_U8_5x5_L2NORM(uint dstWidth, uint dstHeight,
18711880
{ // load 136x20 bytes into local memory using 16x16 workgroup
18721881
int loffset = ly * 136 + (lx << 3);
18731882
int goffset = (y - 2) * srcImageStrideInBytes + x - 4;
1874-
*((uint2 *)(&lbuf[loffset])) = *((uint2 *)(&pSrcImage[goffset]));
1883+
if (goffset >= 0) {
1884+
*((uint2 *)(&lbuf[loffset])) = *((uint2 *)(&pSrcImage[goffset]));
1885+
}
18751886
bool doExtraLoad = false;
18761887
if (ly < 4) {
18771888
loffset += 16 * 136;
@@ -1883,7 +1894,7 @@ Hip_CannySobel_U16_U8_5x5_L2NORM(uint dstWidth, uint dstHeight,
18831894
goffset = (y - ly + id - 2) * srcImageStrideInBytes + (((x >> 3) - lx) << 3) + 124;
18841895
doExtraLoad = (id < 20) ? true : false;
18851896
}
1886-
if (doExtraLoad) {
1897+
if (doExtraLoad && goffset >= 0) {
18871898
*((uint2 *)(&lbuf[loffset])) = *((uint2 *)(&pSrcImage[goffset]));
18881899
}
18891900
__syncthreads();
@@ -2361,7 +2372,9 @@ Hip_CannySobel_U16_U8_7x7_L2NORM(uint dstWidth, uint dstHeight,
23612372
{ // load 136x22 bytes into local memory using 16x16 workgroup
23622373
int loffset = ly * 136 + (lx << 3);
23632374
int goffset = (y - 3) * srcImageStrideInBytes + x - 4;
2364-
*((uint2 *)(&lbuf[loffset])) = *((uint2 *)(&pSrcImage[goffset]));
2375+
if (goffset >= 0) {
2376+
*((uint2 *)(&lbuf[loffset])) = *((uint2 *)(&pSrcImage[goffset]));
2377+
}
23652378
bool doExtraLoad = false;
23662379
if (ly < 6) {
23672380
loffset += 16 * 136;
@@ -2373,7 +2386,7 @@ Hip_CannySobel_U16_U8_7x7_L2NORM(uint dstWidth, uint dstHeight,
23732386
goffset = (y - ly + id - 3) * srcImageStrideInBytes + (((x >> 3) - lx) << 3) + 124;
23742387
doExtraLoad = (id < 22) ? true : false;
23752388
}
2376-
if (doExtraLoad) {
2389+
if (doExtraLoad && goffset >= 0) {
23772390
*((uint2 *)(&lbuf[loffset])) = *((uint2 *)(&pSrcImage[goffset]));
23782391
}
23792392
__syncthreads();
@@ -3263,7 +3276,7 @@ Hip_CannySuppThreshold_U8XY_U16_3x3(uint dstWidth, uint dstHeight,
32633276
goffset = (y - ly + id - 1) * srcImageStrideInBytes + ((x - lx) << 3) + 124;
32643277
doExtraLoad = (id < 18) ? true : false;
32653278
}
3266-
if (doExtraLoad) {
3279+
if (doExtraLoad && goffset >= 0) {
32673280
*((uint2 *)(&lbuf[loffset])) = *((uint2 *)(&pSrcImage[goffset]));
32683281
}
32693282
__syncthreads();
@@ -3715,7 +3728,9 @@ Hip_HarrisSobel_HG3_U8_3x3(uint dstWidth, uint dstHeight,
37153728
{ // load 136x18 bytes into local memory using 16x16 workgroup
37163729
int loffset = ly * 136 + (lx << 3);
37173730
int goffset = (y - 1) * srcImageStrideInBytes + x - 4;
3718-
*((uint2 *)(&lbuf[loffset])) = *((uint2 *)(&pSrcImage[goffset]));
3731+
if (goffset >= 0) {
3732+
*((uint2 *)(&lbuf[loffset])) = *((uint2 *)(&pSrcImage[goffset]));
3733+
}
37193734
bool doExtraLoad = false;
37203735
if (ly < 2) {
37213736
loffset += 16 * 136;
@@ -3727,7 +3742,7 @@ Hip_HarrisSobel_HG3_U8_3x3(uint dstWidth, uint dstHeight,
37273742
goffset = (y - ly + id - 1) * srcImageStrideInBytes + (((x >> 3) - lx) << 3) + 124;
37283743
doExtraLoad = (id < 18) ? true : false;
37293744
}
3730-
if (doExtraLoad) {
3745+
if (doExtraLoad && goffset >= 0) {
37313746
*((uint2 *)(&lbuf[loffset])) = *((uint2 *)(&pSrcImage[goffset]));
37323747
}
37333748
__syncthreads();
@@ -3933,7 +3948,9 @@ Hip_HarrisSobel_HG3_U8_5x5(uint dstWidth, uint dstHeight,
39333948
{ // load 136x20 bytes into local memory using 16x16 workgroup
39343949
int loffset = ly * 136 + (lx << 3);
39353950
int goffset = (y - 2) * srcImageStrideInBytes + x - 4;
3936-
*((uint2 *)(&lbuf[loffset])) = *((uint2 *)(&pSrcImage[goffset]));
3951+
if (goffset >= 0) {
3952+
*((uint2 *)(&lbuf[loffset])) = *((uint2 *)(&pSrcImage[goffset]));
3953+
}
39373954
bool doExtraLoad = false;
39383955
if (ly < 4) {
39393956
loffset += 16 * 136;
@@ -3945,7 +3962,7 @@ Hip_HarrisSobel_HG3_U8_5x5(uint dstWidth, uint dstHeight,
39453962
goffset = (y - ly + id - 2) * srcImageStrideInBytes + (((x >> 3) - lx) << 3) + 124;
39463963
doExtraLoad = (id < 20) ? true : false;
39473964
}
3948-
if (doExtraLoad) {
3965+
if (doExtraLoad && goffset >= 0) {
39493966
*((uint2 *)(&lbuf[loffset])) = *((uint2 *)(&pSrcImage[goffset]));
39503967
}
39513968
__syncthreads();
@@ -4412,7 +4429,9 @@ Hip_HarrisSobel_HG3_U8_7x7(uint dstWidth, uint dstHeight,
44124429
{ // load 136x22 bytes into local memory using 16x16 workgroup
44134430
int loffset = ly * 136 + (lx << 3);
44144431
int goffset = (y - 3) * srcImageStrideInBytes + x - 4;
4415-
*((uint2 *)(&lbuf[loffset])) = *((uint2 *)(&pSrcImage[goffset]));
4432+
if (goffset >= 0) {
4433+
*((uint2 *)(&lbuf[loffset])) = *((uint2 *)(&pSrcImage[goffset]));
4434+
}
44164435
bool doExtraLoad = false;
44174436
if (ly < 6) {
44184437
loffset += 16 * 136;
@@ -4424,7 +4443,7 @@ Hip_HarrisSobel_HG3_U8_7x7(uint dstWidth, uint dstHeight,
44244443
goffset = (y - ly + id - 3) * srcImageStrideInBytes + (((x >> 3) - lx) << 3) + 124;
44254444
doExtraLoad = (id < 22) ? true : false;
44264445
}
4427-
if (doExtraLoad) {
4446+
if (doExtraLoad && goffset >= 0) {
44284447
*((uint2 *)(&lbuf[loffset])) = *((uint2 *)(&pSrcImage[goffset]));
44294448
}
44304449
__syncthreads();
@@ -6297,7 +6316,9 @@ Hip_NonMaxSupp_XY_ANY_3x3(char *pDstList, uint dstListOffset, uint capacityOfLis
62976316
{ // load 136x18 bytes into local memory using 16x16 workgroup
62986317
int loffset = ly * 136 + (lx << 3);
62996318
int goffset = (gy - 1) * srcImageStrideInBytes + (gx << 3) - 4;
6300-
*((uint2 *)(&lbuf[loffset])) = *((uint2 *)(&pSrcImage[goffset]));
6319+
if (goffset >= 0) {
6320+
*((uint2 *)(&lbuf[loffset])) = *((uint2 *)(&pSrcImage[goffset]));
6321+
}
63016322
bool doExtraLoad = false;
63026323
if (ly < 2) {
63036324
loffset += 16 * 136;
@@ -6309,7 +6330,7 @@ Hip_NonMaxSupp_XY_ANY_3x3(char *pDstList, uint dstListOffset, uint capacityOfLis
63096330
goffset = (gy - ly + id - 1) * srcImageStrideInBytes + ((gx - lx) << 3) + 124;
63106331
doExtraLoad = (id < 18) ? true : false;
63116332
}
6312-
if (doExtraLoad) {
6333+
if (doExtraLoad && goffset >= 0) {
63136334
*((uint2 *)(&lbuf[loffset])) = *((uint2 *)(&pSrcImage[goffset]));
63146335
}
63156336
__syncthreads();

0 commit comments

Comments
 (0)