Skip to content

Commit

Permalink
AArch64: Implement arraytranslateTRTO255
Browse files Browse the repository at this point in the history
This commit implements arraytranslateTRTO255 for AArch64.

Signed-off-by: KONNO Kazuhiro <[email protected]>
  • Loading branch information
knn-k committed Oct 31, 2024
1 parent 03bf478 commit ea32e0b
Show file tree
Hide file tree
Showing 7 changed files with 226 additions and 10 deletions.
3 changes: 2 additions & 1 deletion compiler/aarch64/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ compiler_library(aarch64
${CMAKE_CURRENT_LIST_DIR}/codegen/UnaryEvaluator.cpp
${CMAKE_CURRENT_LIST_DIR}/env/OMRCPU.cpp
${CMAKE_CURRENT_LIST_DIR}/env/OMRDebugEnv.cpp
${CMAKE_CURRENT_LIST_DIR}/runtime/ARM64arrayCopy.spp
${CMAKE_CURRENT_LIST_DIR}/runtime/ARM64ArrayCopy.spp
${CMAKE_CURRENT_LIST_DIR}/runtime/ARM64ArrayTranslate.spp
${CMAKE_CURRENT_LIST_DIR}/runtime/CodeSync.cpp
)
15 changes: 11 additions & 4 deletions compiler/aarch64/codegen/OMRCodeGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -190,10 +190,17 @@ OMR::ARM64::CodeGenerator::initialize()
cg->setSupportsArrayCmpLen();
}
}
if (!comp->getOption(TR_DisableArraySetOpts))
{
cg->setSupportsArraySet();
}

if (!comp->getOption(TR_DisableArraySetOpts))
{
cg->setSupportsArraySet();
}

static bool disableTRTO255 = (feGetEnv("TR_disableTRTO255") != NULL);
if (!disableTRTO255)
{
cg->setSupportsArrayTranslateTRTO255();
}
}

void
Expand Down
84 changes: 80 additions & 4 deletions compiler/aarch64/codegen/OMRTreeEvaluator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6400,10 +6400,86 @@ OMR::ARM64::TreeEvaluator::arraytranslateAndTestEvaluator(TR::Node *node, TR::Co

TR::Register *
OMR::ARM64::TreeEvaluator::arraytranslateEvaluator(TR::Node *node, TR::CodeGenerator *cg)
{
// TODO:ARM64: Enable TR::TreeEvaluator::arraytranslateEvaluator in compiler/aarch64/codegen/TreeEvaluatorTable.hpp when Implemented.
return OMR::ARM64::TreeEvaluator::unImpOpEvaluator(node, cg);
}
{
// tree looks as follows:
// arraytranslate
// (0) input ptr
// (1) output ptr
// (2) translation table (dummy)
// (3) stop character (terminal character, either 0xff00ff00 (ISO8859) or 0xff80ff80 (ASCII)
// (4) input length (in elements)
// (5) stopping char (dummy)
//
// Number of translated elements is returned

TR::Compilation *comp = cg->comp();

TR_ASSERT_FATAL(!node->isSourceByteArrayTranslate(), "Source is byte[] for arraytranslate");
TR_ASSERT_FATAL(node->isTargetByteArrayTranslate(), "Target is char[] for arraytranslate");
TR_ASSERT_FATAL(node->getChild(3)->getOpCodeValue() == TR::iconst && node->getChild(3)->getInt() == 0x0ff00ff00, "Non-ISO8859 stop character for arraytranslate");

static bool verboseArrayTranslate = (feGetEnv("TR_verboseArrayTranslate") != NULL);
if (verboseArrayTranslate)
{
fprintf(stderr, "arrayTranslateTRTO255: %s @ %s\n",
comp->signature(),
comp->getHotnessName(comp->getMethodHotness())
);
}

TR::Register *inputReg = cg->gprClobberEvaluate(node->getChild(0));
TR::Register *outputReg = cg->gprClobberEvaluate(node->getChild(1));
TR::Register *inputLenReg = cg->gprClobberEvaluate(node->getChild(4));
TR::Register *outputLenReg = cg->allocateRegister();

int numDeps = 10;

TR::RegisterDependencyConditions *deps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(1, numDeps, cg->trMemory());

deps->addPreCondition(inputReg, TR::RealRegister::x0);

deps->addPostCondition(outputLenReg, TR::RealRegister::x0);
deps->addPostCondition(outputReg, TR::RealRegister::x1);
deps->addPostCondition(inputLenReg, TR::RealRegister::x2);

// Clobbered by the helper
TR::Register *clobberedReg;
deps->addPostCondition(clobberedReg = cg->allocateRegister(), TR::RealRegister::x4);
cg->stopUsingRegister(clobberedReg);
deps->addPostCondition(clobberedReg = cg->allocateRegister(), TR::RealRegister::x5);
cg->stopUsingRegister(clobberedReg);
deps->addPostCondition(clobberedReg = cg->allocateRegister(), TR::RealRegister::x6);
cg->stopUsingRegister(clobberedReg);

deps->addPostCondition(clobberedReg = cg->allocateRegister(TR_VRF), TR::RealRegister::v0);
cg->stopUsingRegister(clobberedReg);
deps->addPostCondition(clobberedReg = cg->allocateRegister(TR_VRF), TR::RealRegister::v1);
cg->stopUsingRegister(clobberedReg);
deps->addPostCondition(clobberedReg = cg->allocateRegister(TR_VRF), TR::RealRegister::v2);
cg->stopUsingRegister(clobberedReg);

// Array Translate helper call
TR_RuntimeHelper helper = TR_ARM64arrayTranslateTRTO255;
TR::SymbolReference *helperSym = cg->symRefTab()->findOrCreateRuntimeHelper(helper);
uintptr_t addr = reinterpret_cast<uintptr_t>(helperSym->getMethodAddress());
generateImmSymInstruction(cg, TR::InstOpCode::bl, node, addr, deps, helperSym, NULL);

for (uint32_t i = 0; i < node->getNumChildren(); i++)
cg->decReferenceCount(node->getChild(i));

if (inputReg != node->getChild(0)->getRegister())
cg->stopUsingRegister(inputReg);

if (outputReg != node->getChild(1)->getRegister())
cg->stopUsingRegister(outputReg);

if (inputLenReg != node->getChild(4)->getRegister())
cg->stopUsingRegister(inputLenReg);

cg->machine()->setLinkRegisterKilled(true);
node->setRegister(outputLenReg);
return outputLenReg;
}

TR::Register *
OMR::ARM64::TreeEvaluator::arraysetEvaluator(TR::Node *node, TR::CodeGenerator *cg)
Expand Down
File renamed without changes.
130 changes: 130 additions & 0 deletions compiler/aarch64/runtime/ARM64ArrayTranslate.spp
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
/*******************************************************************************
* Copyright IBM Corp. and others 2024
*
* This program and the accompanying materials are made available under
* the terms of the Eclipse Public License 2.0 which accompanies this
* distribution and is available at https://www.eclipse.org/legal/epl-2.0/
* or the Apache License, Version 2.0 which accompanies this distribution
* and is available at https://www.apache.org/licenses/LICENSE-2.0.
*
* This Source Code may also be made available under the following Secondary
* Licenses when the conditions for such availability set forth in the
* Eclipse Public License, v. 2.0 are satisfied: GNU General Public License,
* version 2 with the GNU Classpath Exception [1] and GNU General Public
* License, version 2 with the OpenJDK Assembly Exception [2].
*
* [1] https://www.gnu.org/software/classpath/license.html
* [2] https://openjdk.org/legal/assembly-exception.html
*
* SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0-only WITH Classpath-exception-2.0 OR GPL-2.0-only WITH OpenJDK-assembly-exception-1.0
*******************************************************************************/

.file "ARM64ArrayTranslate.s"

#include "aarch64/runtime/arm64asmdefs.inc"

.globl FUNC_LABEL(__arrayTranslateTRTO255)

.text
.align 2

// ----
// arrayTranslateTRTO255
// ----
// TO stands for Two bytes to One byte
//
// uint16 input[];
// uint8 output[];
// int32 len;
//
// int32 i = 0;
// for (i < len) {
// uint16 ch = input[i];
// if (ch > 0xFF) break;
// output[i] = ch & 0xFF;
// i++;
// }
// return i;
//
// in: x0: input
// x1: output
// x2: len
// out: x0: num of translated elements
// trash: x4-x6, v0-v2

FUNC_LABEL(__arrayTranslateTRTO255):
// preserve output address
mov x6, x1
cmp w2, #16
b.cc atTRTO255_15
lsr w4, w2, #4
atTRTO255_16Loop:
// load 16 elements
ldp q0, q1, [x0]
// collect upper 8 bits
uzp2 v2.16b, v0.16b, v1.16b
// fail when any one of them is non-zero
umaxp v2.4s, v2.4s, v2.4s
mov x5, v2.D[0]
cbnz x5, atTRTO255_Fail
// collect lower 8 bits
uzp1 v2.16b, v0.16b, v1.16b
add x0, x0, #32
subs w4, w4, #1
// store 16 elements
str q2, [x1], #16
b.ne atTRTO255_16Loop
atTRTO255_15:
// 15 elements or less remaining
tst w2, #8
b.eq atTRTO255_7
// load 8 elements
ldr q0, [x0]
// collect upper 8 bits
trn2 v2.16b, v0.16b, v0.16b
// fail when any one of them is non-zero
umaxp v2.4s, v2.4s, v2.4s
mov x5, v2.D[0]
cbnz x5, atTRTO255_Fail
// collect lower 8 bits
xtn v2.8b, v0.8h
add x0, x0, #16
// store 8 elements
str d2, [x1], #8
atTRTO255_7:
// 7 elements or less remaining
tst w2, #4
b.eq atTRTO255_3
// load 4 elements
ldr d0, [x0]
// collect upper 8 bits
trn2 v2.8b, v0.8b, v0.8b
// fail when any one of them is non-zero
mov x5, v2.D[0]
cbnz x5, atTRTO255_Fail
// collect lower 8 bits
xtn v2.8b, v0.8h
add x0, x0, #8
// store 4 elements
str s2, [x1], #4
atTRTO255_3:
// 3 elements or less remaining
ands w4, w2, #3
atTRTO255_1Loop:
b.eq atTRTO255_Done
ldrh w5, [x0], #2
cmp w5, #256
b.cs atTRTO255_Done
subs w4, w4, #1
strb w5, [x1], #1
b atTRTO255_1Loop
atTRTO255_Fail:
ldrh w5, [x0], #2
cmp w5, #256
b.cs atTRTO255_Done
strb w5, [x1], #1
b atTRTO255_Fail
atTRTO255_Done:
// number of translated elements
sub x0, x1, x6
ret
1 change: 1 addition & 0 deletions compiler/ras/Debug.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4237,6 +4237,7 @@ TR_Debug::getRuntimeHelperName(int32_t index)
case TR_ARM64interfaceCompleteSlot2: return "_interfaceCompleteSlot2";
case TR_ARM64interfaceSlotsUnavailable: return "_interfaceSlotsUnavailable";
case TR_ARM64PatchGCRHelper: return "_patchGCRHelper" ;
case TR_ARM64arrayTranslateTRTO255: return "__arrayTranslateTRTO255";
}
}
#endif
Expand Down
3 changes: 2 additions & 1 deletion compiler/runtime/Helpers.inc
Original file line number Diff line number Diff line change
Expand Up @@ -497,7 +497,8 @@ SETVAL(TR_ARM64interfaceCompleteSlot2,TR_FSRH+42)
SETVAL(TR_ARM64interfaceSlotsUnavailable,TR_FSRH+43)
SETVAL(TR_ARM64PatchGCRHelper,TR_FSRH+44)
SETVAL(TR_ARM64fieldWatchHelper,TR_FSRH+45)
SETVAL(TR_ARM64numRuntimeHelpers,TR_FSRH+46)
SETVAL(TR_ARM64arrayTranslateTRTO255,TR_FSRH+46)
SETVAL(TR_ARM64numRuntimeHelpers,TR_FSRH+47)

SETVAL(TR_S390longDivide,TR_FSRH)
SETVAL(TR_S390interfaceCallHelper,TR_FSRH+1)
Expand Down

0 comments on commit ea32e0b

Please sign in to comment.