From 7717f1e225ece890bf699ee2419bf2476ff00810 Mon Sep 17 00:00:00 2001 From: KONNO Kazuhiro Date: Thu, 10 Oct 2024 11:53:54 +0900 Subject: [PATCH] AArch64: Implement arrayTranslateTRTO This commit implements arrayTranslateTRTO (ASCII conversion) for AArch64. Signed-off-by: KONNO Kazuhiro --- compiler/aarch64/codegen/OMRCodeGenerator.cpp | 6 + compiler/aarch64/codegen/OMRTreeEvaluator.cpp | 34 +++++- .../aarch64/runtime/ARM64ArrayTranslate.spp | 112 ++++++++++++++++++ compiler/ras/Debug.cpp | 3 +- compiler/runtime/Helpers.inc | 5 +- 5 files changed, 152 insertions(+), 8 deletions(-) diff --git a/compiler/aarch64/codegen/OMRCodeGenerator.cpp b/compiler/aarch64/codegen/OMRCodeGenerator.cpp index 73ebdeb7227..e0dd59702c2 100644 --- a/compiler/aarch64/codegen/OMRCodeGenerator.cpp +++ b/compiler/aarch64/codegen/OMRCodeGenerator.cpp @@ -196,6 +196,12 @@ OMR::ARM64::CodeGenerator::initialize() cg->setSupportsArraySet(); } + static bool disableTRTO = (feGetEnv("TR_disableTRTO") != NULL); + if (!disableTRTO) + { + cg->setSupportsArrayTranslateTRTO(); + } + static bool disableTRTO255 = (feGetEnv("TR_disableTRTO255") != NULL); if (!disableTRTO255) { diff --git a/compiler/aarch64/codegen/OMRTreeEvaluator.cpp b/compiler/aarch64/codegen/OMRTreeEvaluator.cpp index a68fea0df66..7332730c994 100644 --- a/compiler/aarch64/codegen/OMRTreeEvaluator.cpp +++ b/compiler/aarch64/codegen/OMRTreeEvaluator.cpp @@ -6433,26 +6433,38 @@ OMR::ARM64::TreeEvaluator::arraytranslateEvaluator(TR::Node *node, TR::CodeGener // Number of translated elements is returned TR::Compilation *comp = cg->comp(); + bool arrayTranslateTRTO255 = false; TR_ASSERT_FATAL(!node->isSourceByteArrayTranslate(), "Source is byte[] for arraytranslate"); TR_ASSERT_FATAL(node->isTargetByteArrayTranslate(), "Target is char[] for arraytranslate"); - TR_ASSERT_FATAL(node->getChild(3)->getOpCodeValue() == TR::iconst && node->getChild(3)->getInt() == 0x0ff00ff00, "Non-ISO8859 stop character for arraytranslate"); + TR_ASSERT_FATAL(node->getChild(3)->getOpCodeValue() == TR::iconst, "Non-constant stop char for arraytranslate"); + + if (node->getChild(3)->getInt() == 0x0ff00ff00) + { + arrayTranslateTRTO255 = true; + } + else + { + TR_ASSERT_FATAL(node->getChild(3)->getInt() == 0x0ff80ff80, "Unknown stop char for arraytranslate"); + } static bool verboseArrayTranslate = (feGetEnv("TR_verboseArrayTranslate") != NULL); if (verboseArrayTranslate) { - fprintf(stderr, "arrayTranslateTRTO255: %s @ %s\n", + fprintf(stderr, "arrayTranslateTRTO: %s @ %s [isTO255: %d]\n", comp->signature(), - comp->getHotnessName(comp->getMethodHotness()) + comp->getHotnessName(comp->getMethodHotness()), + arrayTranslateTRTO255 ); } TR::Register *inputReg = cg->gprClobberEvaluate(node->getChild(0)); TR::Register *outputReg = cg->gprClobberEvaluate(node->getChild(1)); + TR::Register *stopCharReg = arrayTranslateTRTO255 ? NULL : cg->gprClobberEvaluate(node->getChild(3)); TR::Register *inputLenReg = cg->gprClobberEvaluate(node->getChild(4)); TR::Register *outputLenReg = cg->allocateRegister(); - int numDeps = 10; + int numDeps = arrayTranslateTRTO255 ? 10 : 12; TR::RegisterDependencyConditions *deps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(1, numDeps, cg->trMemory()); @@ -6461,6 +6473,10 @@ OMR::ARM64::TreeEvaluator::arraytranslateEvaluator(TR::Node *node, TR::CodeGener deps->addPostCondition(outputLenReg, TR::RealRegister::x0); deps->addPostCondition(outputReg, TR::RealRegister::x1); deps->addPostCondition(inputLenReg, TR::RealRegister::x2); + if (!arrayTranslateTRTO255) + { + deps->addPostCondition(stopCharReg, TR::RealRegister::x3); + } // Clobbered by the helper TR::Register *clobberedReg; @@ -6477,9 +6493,14 @@ OMR::ARM64::TreeEvaluator::arraytranslateEvaluator(TR::Node *node, TR::CodeGener cg->stopUsingRegister(clobberedReg); deps->addPostCondition(clobberedReg = cg->allocateRegister(TR_VRF), TR::RealRegister::v2); cg->stopUsingRegister(clobberedReg); + if (!arrayTranslateTRTO255) + { + deps->addPostCondition(clobberedReg = cg->allocateRegister(TR_VRF), TR::RealRegister::v3); + cg->stopUsingRegister(clobberedReg); + } // Array Translate helper call - TR_RuntimeHelper helper = TR_ARM64arrayTranslateTRTO255; + TR_RuntimeHelper helper = arrayTranslateTRTO255 ? TR_ARM64arrayTranslateTRTO255 : TR_ARM64arrayTranslateTRTO; TR::SymbolReference *helperSym = cg->symRefTab()->findOrCreateRuntimeHelper(helper); uintptr_t addr = reinterpret_cast(helperSym->getMethodAddress()); generateImmSymInstruction(cg, TR::InstOpCode::bl, node, addr, deps, helperSym, NULL); @@ -6493,6 +6514,9 @@ OMR::ARM64::TreeEvaluator::arraytranslateEvaluator(TR::Node *node, TR::CodeGener if (outputReg != node->getChild(1)->getRegister()) cg->stopUsingRegister(outputReg); + if (!arrayTranslateTRTO255 && stopCharReg != node->getChild(3)->getRegister()) + cg->stopUsingRegister(stopCharReg); + if (inputLenReg != node->getChild(4)->getRegister()) cg->stopUsingRegister(inputLenReg); diff --git a/compiler/aarch64/runtime/ARM64ArrayTranslate.spp b/compiler/aarch64/runtime/ARM64ArrayTranslate.spp index 65840c4326d..b7b07f00e4e 100644 --- a/compiler/aarch64/runtime/ARM64ArrayTranslate.spp +++ b/compiler/aarch64/runtime/ARM64ArrayTranslate.spp @@ -23,11 +23,123 @@ #include "aarch64/runtime/arm64asmdefs.inc" + .globl FUNC_LABEL(__arrayTranslateTRTO) .globl FUNC_LABEL(__arrayTranslateTRTO255) .text .align 2 +// ---- +// arrayTranslateTRTO +// ---- +// TO stands for Two bytes to One byte +// +// uint16 input[]; +// uint8 output[]; +// int32 len; +// uint16 mask; // 0xff00ff00 (ISO8859) or 0xff80ff80 (ASCII) +// +// int32 i = 0; +// while (i < len) { +// uint16 ch = input[i]; +// if ((ch & mask) != 0) break; +// output[i] = ch & 0xFF; +// i++; +// } +// return i; +// +// in: x0: input +// x1: output +// x2: len +// x3: mask +// out: x0: num of translated elements +// trash: x4-x6, v0-v3 + +FUNC_LABEL(__arrayTranslateTRTO): + // preserve output address + mov x6, x1 + // load mask to a SIMD register + dup v3.8h, w3 + cmp w2, #16 + b.cc atTRTO_15 + lsr w4, w2, #4 +atTRTO_16Loop: + // load 16 elements + ldp q0, q1, [x0] + // mask first 8 elements + and v2.16b, v0.16b, v3.16b + // fail when any one of them is non-zero + umaxp v2.4s, v2.4s, v2.4s + mov x5, v2.D[0] + cbnz x5, atTRTO_Fail + // mask next 8 elements + and v2.16b, v1.16b, v3.16b + // fail when any one of them is non-zero + umaxp v2.4s, v2.4s, v2.4s + mov x5, v2.D[0] + cbnz x5, atTRTO_Fail + // collect lower 8 bits + uzp1 v2.16b, v0.16b, v1.16b + add x0, x0, #32 + subs w4, w4, #1 + // store 16 elements + str q2, [x1], #16 + b.ne atTRTO_16Loop +atTRTO_15: + // 15 elements or less remaining + tst w2, #8 + b.eq atTRTO_7 + // load 8 elements + ldr q0, [x0] + // mask 8 elements + and v2.16b, v0.16b, v3.16b + // fail when any one of them is non-zero + umaxp v2.4s, v2.4s, v2.4s + mov x5, v2.D[0] + cbnz x5, atTRTO_Fail + // collect lower 8 bits + xtn v2.8b, v0.8h + add x0, x0, #16 + // store 8 elements + str d2, [x1], #8 +atTRTO_7: + // 7 elements or less remaining + tst w2, #4 + b.eq atTRTO_3 + // load 4 elements + ldr d0, [x0] + // mask 4 elements + and v2.8b, v0.8b, v3.8b + // fail when any one of them is non-zero + mov x5, v2.D[0] + cbnz x5, atTRTO_Fail + // collect lower 8 bits + xtn v2.8b, v0.8h + add x0, x0, #8 + // store 4 elements + str s2, [x1], #4 +atTRTO_3: + // 3 elements or less remaining + ands w4, w2, #3 +atTRTO_1Loop: + b.eq atTRTO_Done + ldrh w5, [x0], #2 + tst w5, w3 + b.ne atTRTO_Done + subs w4, w4, #1 + strb w5, [x1], #1 + b atTRTO_1Loop +atTRTO_Fail: + ldrh w5, [x0], #2 + tst w5, w3 + b.ne atTRTO_Done + strb w5, [x1], #1 + b atTRTO_Fail +atTRTO_Done: + // number of translated elements + sub x0, x1, x6 + ret + // ---- // arrayTranslateTRTO255 // ---- diff --git a/compiler/ras/Debug.cpp b/compiler/ras/Debug.cpp index 77d68dc8ad4..d2ac4f457b8 100644 --- a/compiler/ras/Debug.cpp +++ b/compiler/ras/Debug.cpp @@ -4240,7 +4240,8 @@ TR_Debug::getRuntimeHelperName(int32_t index) case TR_ARM64backwardArrayCopy: return "__backwardArrayCopy"; case TR_ARM64interfaceCompleteSlot2: return "_interfaceCompleteSlot2"; case TR_ARM64interfaceSlotsUnavailable: return "_interfaceSlotsUnavailable"; - case TR_ARM64PatchGCRHelper: return "_patchGCRHelper" ; + case TR_ARM64PatchGCRHelper: return "_patchGCRHelper"; + case TR_ARM64arrayTranslateTRTO: return "__arrayTranslateTRTO"; case TR_ARM64arrayTranslateTRTO255: return "__arrayTranslateTRTO255"; } } diff --git a/compiler/runtime/Helpers.inc b/compiler/runtime/Helpers.inc index e101e2ae8d2..fae1b17c348 100644 --- a/compiler/runtime/Helpers.inc +++ b/compiler/runtime/Helpers.inc @@ -497,8 +497,9 @@ SETVAL(TR_ARM64interfaceCompleteSlot2,TR_FSRH+42) SETVAL(TR_ARM64interfaceSlotsUnavailable,TR_FSRH+43) SETVAL(TR_ARM64PatchGCRHelper,TR_FSRH+44) SETVAL(TR_ARM64fieldWatchHelper,TR_FSRH+45) -SETVAL(TR_ARM64arrayTranslateTRTO255,TR_FSRH+46) -SETVAL(TR_ARM64numRuntimeHelpers,TR_FSRH+47) +SETVAL(TR_ARM64arrayTranslateTRTO,TR_FSRH+46) +SETVAL(TR_ARM64arrayTranslateTRTO255,TR_FSRH+47) +SETVAL(TR_ARM64numRuntimeHelpers,TR_FSRH+48) SETVAL(TR_S390longDivide,TR_FSRH) SETVAL(TR_S390interfaceCallHelper,TR_FSRH+1)