Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added corrections to re-enable reciprocal test in math_brute_force suite for relaxed math mode #2221

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 28 additions & 10 deletions test_conformance/math_brute_force/binary_operator_double.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,12 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
cl_double *s;
cl_double *s2;

bool reciprocal = strcmp(name, "reciprocal") == 0;
const double reciprocalArrayX[] = { 1.0 };
const double *specialValuesX =
reciprocal ? reciprocalArrayX : specialValues;
size_t specialValuesCountX = reciprocal ? 1 : specialValuesCount;

Force64BitFPUPrecision();

cl_event e[VECTOR_SIZE_COUNT];
Expand Down Expand Up @@ -242,7 +248,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
cl_ulong *p = (cl_ulong *)gIn + thread_id * buffer_elements;
cl_ulong *p2 = (cl_ulong *)gIn2 + thread_id * buffer_elements;
cl_uint idx = 0;
int totalSpecialValueCount = specialValuesCount * specialValuesCount;
int totalSpecialValueCount = specialValuesCountX * specialValuesCount;
int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;

// Test edge cases
Expand All @@ -252,14 +258,15 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
cl_double *fp2 = (cl_double *)p2;
uint32_t x, y;

x = (job_id * buffer_elements) % specialValuesCount;
x = (job_id * buffer_elements) % specialValuesCountX;
y = (job_id * buffer_elements) / specialValuesCount;

for (; idx < buffer_elements; idx++)
{
fp[idx] = specialValues[x];
fp[idx] = specialValuesX[x];
fp2[idx] = specialValues[y];
if (++x >= specialValuesCount)
++x;
if (x >= specialValuesCountX)
{
x = 0;
y++;
Expand All @@ -271,7 +278,8 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
// Init any remaining values
for (; idx < buffer_elements; idx++)
{
p[idx] = genrand_int64(d);
p[idx] =
reciprocal ? ((cl_ulong *)specialValuesX)[0] : genrand_int64(d);
p2[idx] = genrand_int64(d);
}

Expand Down Expand Up @@ -375,8 +383,13 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
r = (cl_double *)gOut_Ref + thread_id * buffer_elements;
s = (cl_double *)gIn + thread_id * buffer_elements;
s2 = (cl_double *)gIn2 + thread_id * buffer_elements;
for (size_t j = 0; j < buffer_elements; j++)
r[j] = (cl_double)func.f_ff(s[j], s2[j]);

if (reciprocal)
for (size_t j = 0; j < buffer_elements; j++)
r[j] = (float)func.f_f(s2[j]);
else
for (size_t j = 0; j < buffer_elements; j++)
r[j] = (cl_double)func.f_ff(s[j], s2[j]);

// Read the data back -- no need to wait for the first N-1 buffers but wait
// for the last buffer. This is an in order queue.
Expand Down Expand Up @@ -406,7 +419,9 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
if (t[j] != q[j])
{
cl_double test = ((cl_double *)q)[j];
long double correct = func.f_ff(s[j], s2[j]);
long double correct =
reciprocal ? func.f_f(s2[j]) : func.f_ff(s[j], s2[j]);

float err = Bruteforce_Ulp_Error_Double(test, correct);
int fail = !(fabsf(err) <= ulps);

Expand Down Expand Up @@ -479,8 +494,11 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
}
else if (IsDoubleSubnormal(s2[j]))
{
long double correct2 = func.f_ff(s[j], 0.0);
long double correct3 = func.f_ff(s[j], -0.0);
long double correct2 =
reciprocal ? func.f_f(0.0) : func.f_ff(s[j], 0.0);
long double correct3 =
reciprocal ? func.f_f(-0.0) : func.f_ff(s[j], -0.0);

float err2 =
Bruteforce_Ulp_Error_Double(test, correct2);
float err3 =
Expand Down
70 changes: 51 additions & 19 deletions test_conformance/math_brute_force/binary_operator_float.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,11 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
cl_float *s2 = 0;
RoundingMode oldRoundMode;

bool reciprocal = strcmp(name, "reciprocal") == 0;
const float reciprocalArrayX[] = { 1.f };
const float *specialValuesX = reciprocal ? reciprocalArrayX : specialValues;
size_t specialValuesCountX = reciprocal ? 1 : specialValuesCount;

if (relaxedMode)
{
func = job->f->rfunc;
Expand Down Expand Up @@ -239,23 +244,23 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements;
cl_uint idx = 0;
int totalSpecialValueCount = specialValuesCount * specialValuesCount;
int totalSpecialValueCount = specialValuesCountX * specialValuesCount;
int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;

if (job_id <= (cl_uint)lastSpecialJobIndex)
{
// Insert special values
uint32_t x, y;

x = (job_id * buffer_elements) % specialValuesCount;
x = (job_id * buffer_elements) % specialValuesCountX;
y = (job_id * buffer_elements) / specialValuesCount;

for (; idx < buffer_elements; idx++)
{
p[idx] = ((cl_uint *)specialValues)[x];
p[idx] = ((cl_uint *)specialValuesX)[x];
p2[idx] = ((cl_uint *)specialValues)[y];
++x;
if (x >= specialValuesCount)
if (x >= specialValuesCountX)
{
x = 0;
y++;
Expand All @@ -269,13 +274,19 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
if (pj < 0x20800000 || pj > 0x5e800000) p[idx] = 0x7fc00000;
if (p2j < 0x20800000 || p2j > 0x5e800000) p2[idx] = 0x7fc00000;
}
else if (relaxedMode && reciprocal)
{
cl_uint p2j = p2[idx] & 0x7fffffff;
// Replace values outside [2^-126, 2^126] with QNaN
if (p2j < 0x00807d99 || p2j > 0x7e800000) p2[idx] = 0x7fc00000;
}
}
}

// Init any remaining values
for (; idx < buffer_elements; idx++)
{
p[idx] = genrand_int32(d);
p[idx] = reciprocal ? ((cl_uint *)specialValuesX)[0] : genrand_int32(d);
p2[idx] = genrand_int32(d);

if (relaxedMode && strcmp(name, "divide") == 0)
Expand All @@ -286,6 +297,12 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
if (pj < 0x20800000 || pj > 0x5e800000) p[idx] = 0x7fc00000;
if (p2j < 0x20800000 || p2j > 0x5e800000) p2[idx] = 0x7fc00000;
}
else if (relaxedMode && reciprocal)
{
cl_uint p2j = p2[idx] & 0x7fffffff;
// Replace values outside [2^-126, 2^126] with QNaN
if (p2j < 0x00807d99 || p2j > 0x7e800000) p2[idx] = 0x7fc00000;
}
}

if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
Expand Down Expand Up @@ -402,18 +419,31 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
s2 = (float *)gIn2 + thread_id * buffer_elements;
if (gInfNanSupport)
{
for (size_t j = 0; j < buffer_elements; j++)
r[j] = (float)func.f_ff(s[j], s2[j]);
if (reciprocal)
for (size_t j = 0; j < buffer_elements; j++)
r[j] = (float)func.f_f(s2[j]);
else
for (size_t j = 0; j < buffer_elements; j++)
r[j] = (float)func.f_ff(s[j], s2[j]);
}
else
{
for (size_t j = 0; j < buffer_elements; j++)
{
feclearexcept(FE_OVERFLOW);
r[j] = (float)func.f_ff(s[j], s2[j]);
overflow[j] =
FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW));
}
if (reciprocal)
for (size_t j = 0; j < buffer_elements; j++)
{
feclearexcept(FE_OVERFLOW);
r[j] = (float)func.f_f(s2[j]);
overflow[j] =
FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW));
}
else
for (size_t j = 0; j < buffer_elements; j++)
{
feclearexcept(FE_OVERFLOW);
r[j] = (float)func.f_ff(s[j], s2[j]);
overflow[j] =
FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW));
}
}

if (gIsInRTZMode) (void)set_round(oldRoundMode, kfloat);
Expand Down Expand Up @@ -448,7 +478,8 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
if (t[j] != q[j])
{
float test = ((float *)q)[j];
double correct = func.f_ff(s[j], s2[j]);
double correct =
reciprocal ? func.f_f(s2[j]) : func.f_ff(s[j], s2[j]);

// Per section 10 paragraph 6, accept any result if an input or
// output is a infinity or NaN or overflow
Expand Down Expand Up @@ -485,7 +516,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
}

// retry per section 6.5.3.3
if (IsFloatSubnormal(s[j]))
if (!reciprocal && IsFloatSubnormal(s[j]))
{
double correct2, correct3;
float err2, err3;
Expand Down Expand Up @@ -591,8 +622,10 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)

if (!gInfNanSupport) feclearexcept(FE_OVERFLOW);

correct2 = func.f_ff(s[j], 0.0);
correct3 = func.f_ff(s[j], -0.0);
correct2 =
reciprocal ? func.f_f(0.0) : func.f_ff(s[j], 0.0);
correct3 =
reciprocal ? func.f_f(-0.0) : func.f_ff(s[j], -0.0);

// Per section 10 paragraph 6, accept any result if an
// input or output is a infinity or NaN or overflow
Expand Down Expand Up @@ -625,7 +658,6 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
}
}


if (fabsf(err) > tinfo->maxError)
{
tinfo->maxError = fabsf(err);
Expand Down
48 changes: 35 additions & 13 deletions test_conformance/math_brute_force/binary_operator_half.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,12 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
std::vector<float> s(0), s2(0);
RoundingMode oldRoundMode;

bool reciprocal = strcmp(name, "reciprocal") == 0;
const cl_half reciprocalArrayHalfX[] = { 0x3c00 };
const cl_half *specialValuesHalfX =
reciprocal ? reciprocalArrayHalfX : specialValuesHalf;
size_t specialValuesHalfCountX = reciprocal ? 1 : specialValuesHalfCount;

cl_event e[VECTOR_SIZE_COUNT];
cl_half *out[VECTOR_SIZE_COUNT];

Expand Down Expand Up @@ -148,22 +154,23 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
cl_half *p2 = (cl_half *)gIn2 + thread_id * buffer_elements;
cl_uint idx = 0;
int totalSpecialValueCount =
specialValuesHalfCount * specialValuesHalfCount;
specialValuesHalfCountX * specialValuesHalfCount;
int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;

if (job_id <= (cl_uint)lastSpecialJobIndex)
{
// Insert special values
uint32_t x, y;

x = (job_id * buffer_elements) % specialValuesHalfCount;
x = (job_id * buffer_elements) % specialValuesHalfCountX;
y = (job_id * buffer_elements) / specialValuesHalfCount;

for (; idx < buffer_elements; idx++)
{
p[idx] = specialValuesHalf[x];
p[idx] = specialValuesHalfX[x];
p2[idx] = specialValuesHalf[y];
if (++x >= specialValuesHalfCount)
++x;
if (x >= specialValuesHalfCountX)
{
x = 0;
y++;
Expand All @@ -175,7 +182,8 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
// Init any remaining values
for (; idx < buffer_elements; idx++)
{
p[idx] = (cl_half)genrand_int32(d);
p[idx] = reciprocal ? ((cl_half *)specialValuesHalfX)[0]
: (cl_half)genrand_int32(d);
p2[idx] = (cl_half)genrand_int32(d);
}
if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
Expand Down Expand Up @@ -283,11 +291,23 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
s.resize(buffer_elements);
s2.resize(buffer_elements);

for (size_t j = 0; j < buffer_elements; j++)
if (reciprocal)
{
for (size_t j = 0; j < buffer_elements; j++)
{
s[j] = HTF(p[j]);
s2[j] = HTF(p2[j]);
r[j] = HFF(func.f_f(s2[j]));
}
}
else
{
s[j] = HTF(p[j]);
s2[j] = HTF(p2[j]);
r[j] = HFF(func.f_ff(s[j], s2[j]));
for (size_t j = 0; j < buffer_elements; j++)
{
s[j] = HTF(p[j]);
s2[j] = HTF(p2[j]);
r[j] = HFF(func.f_ff(s[j], s2[j]));
}
}

if (ftz) RestoreFPState(&oldMode);
Expand Down Expand Up @@ -320,7 +340,8 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
if (r[j] != q[j])
{
float test = HTF(q[j]);
float correct = func.f_ff(s[j], s2[j]);
float correct =
reciprocal ? func.f_f(s2[j]) : func.f_ff(s[j], s2[j]);

// Per section 10 paragraph 6, accept any result if an input or
// output is a infinity or NaN or overflow
Expand Down Expand Up @@ -446,9 +467,10 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
double correct2, correct3;
float err2, err3;

correct2 = func.f_ff(s[j], 0.0);
correct3 = func.f_ff(s[j], -0.0);

correct2 =
reciprocal ? func.f_f(0.0) : func.f_ff(s[j], 0.0);
correct3 =
reciprocal ? func.f_f(-0.0) : func.f_ff(s[j], -0.0);

// Per section 10 paragraph 6, accept any result if an
// input or output is a infinity or NaN or overflow
Expand Down
20 changes: 19 additions & 1 deletion test_conformance/math_brute_force/function_list.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,10 @@
#define reference_copysign NULL
#define reference_sqrt NULL
#define reference_sqrtl NULL
#define reference_reciprocal NULL
#define reference_reciprocall NULL
#define reference_relaxed_reciprocal NULL

#define reference_divide NULL
#define reference_dividel NULL
#define reference_relaxed_divide NULL
Expand Down Expand Up @@ -346,7 +350,6 @@ const Func functionList[] = {

ENTRY(pown, 16.0f, 16.0f, 4.0f, FTZ_OFF, binaryF_i),
ENTRY(powr, 16.0f, 16.0f, 4.0f, FTZ_OFF, binaryF),
//ENTRY(reciprocal, 1.0f, 1.0f, FTZ_OFF, unaryF),
ENTRY(remainder, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF),
ENTRY(remquo, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF_two_results_i),
ENTRY(rint, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF),
Expand Down Expand Up @@ -418,6 +421,21 @@ const Func functionList[] = {
// basic operations
OPERATOR_ENTRY(add, "+", 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryOperatorF),
OPERATOR_ENTRY(subtract, "-", 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryOperatorF),
//ENTRY(reciprocal, 1.0f, 1.0f, FTZ_OFF, unaryF),
{ "reciprocal",
"/",
{ (void*)reference_reciprocal },
{ (void*)reference_reciprocall },
{ (void*)reference_relaxed_reciprocal },
2.5f,
0.0f,
0.0f,
3.0f,
2.5f,
INFINITY,
FTZ_OFF,
RELAXED_ON,
binaryOperatorF },
{ "divide",
"/",
{ (void*)reference_divide },
Expand Down
Loading
Loading