From 12fbec55de7a5b1a48909cd8b6338d547961bf72 Mon Sep 17 00:00:00 2001 From: BradleyWood Date: Tue, 12 Sep 2023 12:23:34 -0600 Subject: [PATCH] x86: Implement vbitselect evaluator Signed-off-by: BradleyWood --- compiler/x/amd64/codegen/OMRTreeEvaluator.cpp | 6 --- compiler/x/amd64/codegen/OMRTreeEvaluator.hpp | 1 - compiler/x/codegen/OMRCodeGenerator.cpp | 3 ++ compiler/x/codegen/OMRTreeEvaluator.cpp | 53 +++++++++++++++++++ compiler/x/codegen/OMRTreeEvaluator.hpp | 1 + compiler/x/i386/codegen/OMRTreeEvaluator.cpp | 6 --- compiler/x/i386/codegen/OMRTreeEvaluator.hpp | 1 - fvtest/compilertriltest/VectorTest.cpp | 2 - 8 files changed, 57 insertions(+), 16 deletions(-) diff --git a/compiler/x/amd64/codegen/OMRTreeEvaluator.cpp b/compiler/x/amd64/codegen/OMRTreeEvaluator.cpp index d5c660e8d0a..e9be4e5596b 100644 --- a/compiler/x/amd64/codegen/OMRTreeEvaluator.cpp +++ b/compiler/x/amd64/codegen/OMRTreeEvaluator.cpp @@ -1558,12 +1558,6 @@ OMR::X86::AMD64::TreeEvaluator::vcalliEvaluator(TR::Node *node, TR::CodeGenerato return TR::TreeEvaluator::unImpOpEvaluator(node, cg); } -TR::Register* -OMR::X86::AMD64::TreeEvaluator::vbitselectEvaluator(TR::Node *node, TR::CodeGenerator *cg) - { - return TR::TreeEvaluator::unImpOpEvaluator(node, cg); - } - TR::Register* OMR::X86::AMD64::TreeEvaluator::vcastEvaluator(TR::Node *node, TR::CodeGenerator *cg) { diff --git a/compiler/x/amd64/codegen/OMRTreeEvaluator.hpp b/compiler/x/amd64/codegen/OMRTreeEvaluator.hpp index df539e15ff2..17ecb9a5cd8 100644 --- a/compiler/x/amd64/codegen/OMRTreeEvaluator.hpp +++ b/compiler/x/amd64/codegen/OMRTreeEvaluator.hpp @@ -303,7 +303,6 @@ class OMR_EXTENSIBLE TreeEvaluator: public OMR::X86::TreeEvaluator static TR::Register *vreturnEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *vRegLoadEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *vRegStoreEvaluator(TR::Node *node, TR::CodeGenerator *cg); - static TR::Register *vbitselectEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *vsetelemEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *vsplatsEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *vstoreEvaluator(TR::Node *node, TR::CodeGenerator *cg); diff --git a/compiler/x/codegen/OMRCodeGenerator.cpp b/compiler/x/codegen/OMRCodeGenerator.cpp index 9c0a807c444..9cd9329b1a3 100644 --- a/compiler/x/codegen/OMRCodeGenerator.cpp +++ b/compiler/x/codegen/OMRCodeGenerator.cpp @@ -1123,6 +1123,9 @@ bool OMR::X86::CodeGenerator::getSupportsOpCodeForAutoSIMD(TR::CPU *cpu, TR::ILO return false; } break; + case TR::vbitselect: + if (et.isFloatingPoint()) return false; + // Intentional fallthrough case TR::vneg: switch (ot.getVectorLength()) { case TR::VectorLength128: diff --git a/compiler/x/codegen/OMRTreeEvaluator.cpp b/compiler/x/codegen/OMRTreeEvaluator.cpp index cfab85388f8..d397dca4a05 100644 --- a/compiler/x/codegen/OMRTreeEvaluator.cpp +++ b/compiler/x/codegen/OMRTreeEvaluator.cpp @@ -6212,6 +6212,59 @@ OMR::X86::TreeEvaluator::vmbyteswapEvaluator(TR::Node *node, TR::CodeGenerator * return TR::TreeEvaluator::unImpOpEvaluator(node, cg); } +TR::Register* +OMR::X86::TreeEvaluator::vbitselectEvaluator(TR::Node *node, TR::CodeGenerator *cg) + { + TR::DataType et = node->getDataType().getVectorElementType(); + TR::VectorLength vl = node->getDataType().getVectorLength(); + + TR::Node *firstChild = node->getFirstChild(); + TR::Node *secondChild = node->getSecondChild(); + TR::Node *thirdChild = node->getThirdChild(); + + TR::Register *firstReg = cg->evaluate(firstChild); + TR::Register *secondReg = cg->evaluate(secondChild); + TR::Register *thirdReg = cg->evaluate(thirdChild); + TR::Register *resultReg = cg->allocateRegister(TR_VRF); + + TR_ASSERT_FATAL(et.isIntegral(), "vbitselect is for integer operations"); + + TR::InstOpCode xorOpcode = TR::InstOpCode::PXORRegReg; + TR::InstOpCode andOpcode = TR::InstOpCode::PANDRegReg; + + OMR::X86::Encoding xorEncoding = xorOpcode.getSIMDEncoding(&cg->comp()->target().cpu, vl); + OMR::X86::Encoding andEncoding = xorOpcode.getSIMDEncoding(&cg->comp()->target().cpu, vl); + + TR_ASSERT_FATAL(xorEncoding != OMR::X86::Bad, "No encoding method for pxor opcode"); + TR_ASSERT_FATAL(andEncoding != OMR::X86::Bad, "No encoding method for pand opcode"); + + // inputA[i] ^ ((inputA[i] ^ inputB[i]) & inputC[i]) + + if (xorEncoding != Legacy) + { + generateRegRegRegInstruction(xorOpcode.getMnemonic(), node, resultReg, firstReg, secondReg, cg, xorEncoding); + } + else + { + TR::InstOpCode movOpcode = TR::InstOpCode::MOVDQURegReg; + OMR::X86::Encoding movEncoding = xorOpcode.getSIMDEncoding(&cg->comp()->target().cpu, vl); + + TR_ASSERT_FATAL(movEncoding != OMR::X86::Bad, "No encoding method for movdqu opcode"); + generateRegRegInstruction(movOpcode.getMnemonic(), node, resultReg, firstReg, cg, movEncoding); + generateRegRegInstruction(xorOpcode.getMnemonic(), node, resultReg, secondReg, cg, xorEncoding); + } + + generateRegRegInstruction(andOpcode.getMnemonic(), node, resultReg, thirdReg, cg, xorEncoding); + generateRegRegInstruction(xorOpcode.getMnemonic(), node, resultReg, firstReg, cg, xorEncoding); + + node->setRegister(resultReg); + cg->decReferenceCount(firstChild); + cg->decReferenceCount(secondChild); + cg->decReferenceCount(thirdChild); + + return resultReg; + } + TR::Register* OMR::X86::TreeEvaluator::vcompressbitsEvaluator(TR::Node *node, TR::CodeGenerator *cg) { diff --git a/compiler/x/codegen/OMRTreeEvaluator.hpp b/compiler/x/codegen/OMRTreeEvaluator.hpp index e112009b8dc..a18bb3f284b 100644 --- a/compiler/x/codegen/OMRTreeEvaluator.hpp +++ b/compiler/x/codegen/OMRTreeEvaluator.hpp @@ -381,6 +381,7 @@ class OMR_EXTENSIBLE TreeEvaluator: public OMR::TreeEvaluator static TR::Register *vmbitswapEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *vbyteswapEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *vmbyteswapEvaluator(TR::Node *node, TR::CodeGenerator *cg); + static TR::Register *vbitselectEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *vcompressbitsEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *vmcompressbitsEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *vexpandbitsEvaluator(TR::Node *node, TR::CodeGenerator *cg); diff --git a/compiler/x/i386/codegen/OMRTreeEvaluator.cpp b/compiler/x/i386/codegen/OMRTreeEvaluator.cpp index 8d7038fe509..8deb6712655 100644 --- a/compiler/x/i386/codegen/OMRTreeEvaluator.cpp +++ b/compiler/x/i386/codegen/OMRTreeEvaluator.cpp @@ -1637,12 +1637,6 @@ OMR::X86::I386::TreeEvaluator::vcalliEvaluator(TR::Node *node, TR::CodeGenerator return TR::TreeEvaluator::unImpOpEvaluator(node, cg); } -TR::Register* -OMR::X86::I386::TreeEvaluator::vbitselectEvaluator(TR::Node *node, TR::CodeGenerator *cg) - { - return TR::TreeEvaluator::unImpOpEvaluator(node, cg); - } - TR::Register* OMR::X86::I386::TreeEvaluator::vcastEvaluator(TR::Node *node, TR::CodeGenerator *cg) { diff --git a/compiler/x/i386/codegen/OMRTreeEvaluator.hpp b/compiler/x/i386/codegen/OMRTreeEvaluator.hpp index 2457aca9478..1b779e44067 100644 --- a/compiler/x/i386/codegen/OMRTreeEvaluator.hpp +++ b/compiler/x/i386/codegen/OMRTreeEvaluator.hpp @@ -284,7 +284,6 @@ class OMR_EXTENSIBLE TreeEvaluator: public OMR::X86::TreeEvaluator static TR::Register *vreturnEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *vRegLoadEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *vRegStoreEvaluator(TR::Node *node, TR::CodeGenerator *cg); - static TR::Register *vbitselectEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *vsetelemEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *vsplatsEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *vstoreEvaluator(TR::Node *node, TR::CodeGenerator *cg); diff --git a/fvtest/compilertriltest/VectorTest.cpp b/fvtest/compilertriltest/VectorTest.cpp index 83ff6760da5..6ec35d3efd1 100644 --- a/fvtest/compilertriltest/VectorTest.cpp +++ b/fvtest/compilertriltest/VectorTest.cpp @@ -773,8 +773,6 @@ TEST_F(VectorTest, VInt8BitSelect) { SKIP_ON_S390(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)"; SKIP_ON_S390X(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)"; SKIP_ON_RISCV(MissingImplementation); - SKIP_ON_X86(MissingImplementation); - SKIP_ON_HAMMER(MissingImplementation); Tril::DefaultCompiler compiler(trees); ASSERT_EQ(0, compiler.compile()) << "Compilation failed unexpectedly\n" << "Input trees: " << inputTrees;