Skip to content

Commit

Permalink
Merge pull request eclipse-omr#7102 from BradleyWood/bitwisemasking
Browse files Browse the repository at this point in the history
x86: Support masked bitwise operations for i8, i16
  • Loading branch information
0xdaryl authored Aug 31, 2023
2 parents 54d66f8 + 7cbddeb commit 873ac5d
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 14 deletions.
39 changes: 29 additions & 10 deletions compiler/x/codegen/OMRTreeEvaluator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4065,18 +4065,12 @@ TR::InstOpCode OMR::X86::TreeEvaluator::getNativeSIMDOpcode(TR::ILOpCodes opcode
break;
case TR::vand:
binaryOp = BinaryArithmeticAnd;
// Masking opcodes require lanewise support for each element type, however, int8/int16
// bitwise instructions with masking are not supported without AVX-512. In non-masking
// operations, the element type does not matter.
if (!isMaskOp) elementType = TR::Int32;
break;
case TR::vor:
binaryOp = BinaryArithmeticOr;
if (!isMaskOp) elementType = TR::Int32;
break;
case TR::vxor:
binaryOp = BinaryArithmeticXor;
if (!isMaskOp) elementType = TR::Int32;
break;
case TR::vmmin:
case TR::vmin:
Expand Down Expand Up @@ -4476,6 +4470,24 @@ TR::Register* OMR::X86::TreeEvaluator::vectorBinaryArithmeticEvaluator(TR::Node*
TR::Register *tmpNaNReg = NULL;

bool useRegMemForm = cg->comp()->target().cpu.supportsAVX() && !mask;
bool maskTypeMismatch = false;

if (et == TR::Int8 || et == TR::Int16)
{
switch (node->getOpCode().getVectorOperation())
{
case TR::vand:
case TR::vor:
case TR::vxor:
// There are no native opcodes meant specifically for these element types
// Therefore, if masking is required, we cannot use a single instruction
// to perform these masked bitwise operations because of the element type mismatch.
maskTypeMismatch = true;
break;
default:
break;
}
}

if (useRegMemForm)
{
Expand Down Expand Up @@ -4540,7 +4552,7 @@ TR::Register* OMR::X86::TreeEvaluator::vectorBinaryArithmeticEvaluator(TR::Node*
TR::Register *rSrcReg = tmpNaNReg ? vectorFPNaNHelper(node, tmpNaNReg, lhsReg, rhsReg, NULL, cg) : rhsReg;
if (maskReg)
{
binaryVectorMaskHelper(nativeOpcode, simdEncoding, node, resultReg, lhsReg, rSrcReg, maskReg, cg);
binaryVectorMaskHelper(nativeOpcode, simdEncoding, node, resultReg, lhsReg, rSrcReg, maskReg, cg, maskTypeMismatch);
}
else
{
Expand All @@ -4551,7 +4563,7 @@ TR::Register* OMR::X86::TreeEvaluator::vectorBinaryArithmeticEvaluator(TR::Node*
else if (maskReg)
{
TR::Register *rSrcReg = tmpNaNReg ? vectorFPNaNHelper(node, tmpNaNReg, lhsReg, rhsReg, NULL, cg) : rhsReg;
binaryVectorMaskHelper(nativeOpcode, simdEncoding, node, resultReg, lhsReg, rSrcReg, maskReg, cg);
binaryVectorMaskHelper(nativeOpcode, simdEncoding, node, resultReg, lhsReg, rSrcReg, maskReg, cg, maskTypeMismatch);
}
else
{
Expand Down Expand Up @@ -5373,7 +5385,8 @@ OMR::X86::TreeEvaluator::binaryVectorMaskHelper(TR::InstOpCode opcode,
TR::Register *lhsReg,
TR::Register *rhsReg,
TR::Register *maskReg,
TR::CodeGenerator *cg)
TR::CodeGenerator *cg,
bool maskTypeMismatch)
{
TR_ASSERT_FATAL(encoding != OMR::X86::Bad, "No suitable encoding method for opcode");
bool vectorMask = maskReg->getKind() == TR_VRF;
Expand All @@ -5393,13 +5406,19 @@ OMR::X86::TreeEvaluator::binaryVectorMaskHelper(TR::InstOpCode opcode,
cg->stopUsingRegister(tmpReg);
return resultReg;
}
else if (vectorMask)
else if (vectorMask && maskTypeMismatch)
{
generateRegRegRegInstruction(opcode.getMnemonic(), node, tmpReg, lhsReg, rhsReg, cg, encoding);
vectorMergeMaskHelper(node, resultReg, tmpReg, maskReg, cg);
cg->stopUsingRegister(tmpReg);
return resultReg;
}
else if (vectorMask)
{
generateRegMaskRegRegInstruction(opcode.getMnemonic(), node, tmpReg, maskReg, lhsReg, rhsReg, cg, encoding);
cg->stopUsingRegister(tmpReg);
return resultReg;
}
else
{
TR::InstOpCode movOpcode = TR::InstOpCode::MOVDQURegReg;
Expand Down
3 changes: 2 additions & 1 deletion compiler/x/codegen/OMRTreeEvaluator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -409,7 +409,8 @@ class OMR_EXTENSIBLE TreeEvaluator: public OMR::TreeEvaluator
TR::Register *lhsReg,
TR::Register *rhsReg,
TR::Register *maskReg,
TR::CodeGenerator *cg);
TR::CodeGenerator *cg,
bool maskTypeMismatch = false);
static TR::Register *ternaryVectorMaskHelper(TR::InstOpCode opcode,
OMR::X86::Encoding encoding,
TR::Node *node,
Expand Down
6 changes: 3 additions & 3 deletions compiler/x/codegen/X86OpcodeTable.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,9 +89,9 @@ static const TR::InstOpCode::Mnemonic VectorBinaryArithmeticOpCodesForReg[NumBin
{ TR::InstOpCode::PSUBBRegReg, TR::InstOpCode::PSUBWRegReg, TR::InstOpCode::PSUBDRegReg, TR::InstOpCode::PSUBQRegReg, TR::InstOpCode::SUBPSRegReg, TR::InstOpCode::SUBPDRegReg }, // BinaryArithmeticSub
{ TR::InstOpCode::bad, TR::InstOpCode::PMULLWRegReg, TR::InstOpCode::PMULLDRegReg, TR::InstOpCode::bad, TR::InstOpCode::MULPSRegReg, TR::InstOpCode::MULPDRegReg }, // BinaryArithmeticMul
{ TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::DIVPSRegReg, TR::InstOpCode::DIVPDRegReg }, // BinaryArithmeticDiv
{ TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::PANDRegReg, TR::InstOpCode::PANDRegReg, TR::InstOpCode::bad, TR::InstOpCode::bad }, // BinaryArithmeticAnd
{ TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::PORRegReg, TR::InstOpCode::PORRegReg, TR::InstOpCode::bad, TR::InstOpCode::bad }, // BinaryArithmeticOr,
{ TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::PXORRegReg, TR::InstOpCode::PXORRegReg, TR::InstOpCode::bad, TR::InstOpCode::bad }, // BinaryArithmeticXor
{ TR::InstOpCode::PANDRegReg, TR::InstOpCode::PANDRegReg, TR::InstOpCode::PANDRegReg, TR::InstOpCode::PANDRegReg, TR::InstOpCode::bad, TR::InstOpCode::bad }, // BinaryArithmeticAnd
{ TR::InstOpCode::PORRegReg, TR::InstOpCode::PORRegReg, TR::InstOpCode::PORRegReg, TR::InstOpCode::PORRegReg, TR::InstOpCode::bad, TR::InstOpCode::bad }, // BinaryArithmeticOr,
{ TR::InstOpCode::PXORRegReg, TR::InstOpCode::PXORRegReg, TR::InstOpCode::PXORRegReg, TR::InstOpCode::PXORRegReg, TR::InstOpCode::bad, TR::InstOpCode::bad }, // BinaryArithmeticXor
{ TR::InstOpCode::PMINSBRegReg, TR::InstOpCode::PMINSWRegReg, TR::InstOpCode::PMINSDRegReg, TR::InstOpCode::PMINSQRegReg, TR::InstOpCode::MINPSRegReg, TR::InstOpCode::MINPDRegReg }, // BinaryArithmeticMin
{ TR::InstOpCode::PMAXSBRegReg, TR::InstOpCode::PMAXSWRegReg, TR::InstOpCode::PMAXSDRegReg, TR::InstOpCode::PMAXSQRegReg, TR::InstOpCode::MAXPSRegReg, TR::InstOpCode::MAXPDRegReg }, // BinaryArithmeticMax
{ TR::InstOpCode::bad, TR::InstOpCode::VPSLLVWRegRegReg, TR::InstOpCode::VPSLLVDRegRegReg, TR::InstOpCode::VPSLLVQRegRegReg, TR::InstOpCode::bad, TR::InstOpCode::bad }, // BinaryLogicalShiftLeft
Expand Down

0 comments on commit 873ac5d

Please sign in to comment.