From 5e8ada31327c4d2fbfc5cb42d918f305de1adebb Mon Sep 17 00:00:00 2001 From: KONNO Kazuhiro Date: Tue, 12 Nov 2024 17:23:21 +0900 Subject: [PATCH] Stop recognizing UTF16_Encoder.encodeUTF16 methods This commit removes the code for encodeUTF16Big() and encodeUTF16Little() methods in sun.nio.cs.UTF16_Encoder from method recognition, and removes related helper functions. Signed-off-by: KONNO Kazuhiro --- .../share/classes/com/ibm/jit/JITHelpers.java | 4 - runtime/compiler/build/files/host/p.mk | 1 - runtime/compiler/build/files/host/x.mk | 1 - .../codegen/J9RecognizedMethodsEnum.hpp | 5 - runtime/compiler/compile/J9Compilation.cpp | 10 - runtime/compiler/env/j9method.cpp | 5 - .../compiler/p/codegen/J9TreeEvaluator.cpp | 98 ---- runtime/compiler/p/runtime/CMakeLists.txt | 1 - .../compiler/p/runtime/J9PPCEncodeUTF16.spp | 523 ------------------ runtime/compiler/runtime/Runtime.cpp | 14 - .../compiler/x/codegen/J9TreeEvaluator.cpp | 83 --- .../compiler/x/codegen/J9TreeEvaluator.hpp | 1 - runtime/compiler/x/runtime/.gitignore | 1 - runtime/compiler/x/runtime/CMakeLists.txt | 1 - .../compiler/x/runtime/X86EncodeUTF16.nasm | 185 ------- .../compiler/z/codegen/J9CodeGenerator.cpp | 6 +- .../compiler/z/codegen/J9TreeEvaluator.cpp | 355 ------------ .../compiler/z/codegen/J9TreeEvaluator.hpp | 2 - 18 files changed, 1 insertion(+), 1295 deletions(-) delete mode 100644 runtime/compiler/p/runtime/J9PPCEncodeUTF16.spp delete mode 100644 runtime/compiler/x/runtime/X86EncodeUTF16.nasm diff --git a/jcl/src/java.base/share/classes/com/ibm/jit/JITHelpers.java b/jcl/src/java.base/share/classes/com/ibm/jit/JITHelpers.java index 74748ad8209..4280486aaf9 100644 --- a/jcl/src/java.base/share/classes/com/ibm/jit/JITHelpers.java +++ b/jcl/src/java.base/share/classes/com/ibm/jit/JITHelpers.java @@ -65,10 +65,6 @@ private static JITHelpers jitHelpers() { return helpers; } - public native int transformedEncodeUTF16Big(long src, long dest, int num); - - public native int transformedEncodeUTF16Little(long src, long dest, int num); - /* * Constants for getSuperclass. */ diff --git a/runtime/compiler/build/files/host/p.mk b/runtime/compiler/build/files/host/p.mk index 49ada1f34f9..ab846055364 100644 --- a/runtime/compiler/build/files/host/p.mk +++ b/runtime/compiler/build/files/host/p.mk @@ -29,7 +29,6 @@ JIT_PRODUCT_SOURCE_FILES+=\ compiler/p/runtime/J9PPCArrayTranslate.spp \ compiler/p/runtime/J9PPCCRC32.spp \ compiler/p/runtime/J9PPCCRC32_wrapper.c \ - compiler/p/runtime/J9PPCEncodeUTF16.spp \ compiler/p/runtime/Math.spp \ compiler/p/runtime/PPCHWProfiler.cpp \ compiler/p/runtime/PPCRelocationTarget.cpp \ diff --git a/runtime/compiler/build/files/host/x.mk b/runtime/compiler/build/files/host/x.mk index 7a34e1bb5a4..dd60923e066 100644 --- a/runtime/compiler/build/files/host/x.mk +++ b/runtime/compiler/build/files/host/x.mk @@ -26,7 +26,6 @@ JIT_PRODUCT_SOURCE_FILES+=\ compiler/x/runtime/X86RelocationTarget.cpp \ compiler/x/runtime/X86ArrayTranslate.nasm \ compiler/x/runtime/X86Codert.nasm \ - compiler/x/runtime/X86EncodeUTF16.nasm \ compiler/x/runtime/X86LockReservation.nasm \ compiler/x/runtime/X86PicBuilder.nasm \ compiler/x/runtime/X86Unresolveds.nasm diff --git a/runtime/compiler/codegen/J9RecognizedMethodsEnum.hpp b/runtime/compiler/codegen/J9RecognizedMethodsEnum.hpp index 193bf181cb8..69401efa41f 100644 --- a/runtime/compiler/codegen/J9RecognizedMethodsEnum.hpp +++ b/runtime/compiler/codegen/J9RecognizedMethodsEnum.hpp @@ -514,11 +514,6 @@ sun_nio_cs_UTF_8_Encoder_encodeUTF_8, sun_nio_cs_ext_IBM1388_Encoder_encodeArrayLoop, - sun_nio_cs_UTF_16_Encoder_encodeUTF16Big, - sun_nio_cs_UTF_16_Encoder_encodeUTF16Little, - com_ibm_jit_JITHelpers_transformedEncodeUTF16Big, - com_ibm_jit_JITHelpers_transformedEncodeUTF16Little, - java_lang_Integer_bitCount, java_lang_Integer_highestOneBit, java_lang_Integer_lowestOneBit, diff --git a/runtime/compiler/compile/J9Compilation.cpp b/runtime/compiler/compile/J9Compilation.cpp index d649dcb6a90..d3c5cb62636 100644 --- a/runtime/compiler/compile/J9Compilation.cpp +++ b/runtime/compiler/compile/J9Compilation.cpp @@ -449,8 +449,6 @@ J9::Compilation::isConverterMethod(TR::RecognizedMethod rm) case TR::sun_nio_cs_ext_SBCS_Decoder_decodeSBCS: case TR::sun_nio_cs_UTF_8_Encoder_encodeUTF_8: case TR::sun_nio_cs_UTF_8_Decoder_decodeUTF_8: - case TR::sun_nio_cs_UTF_16_Encoder_encodeUTF16Big: - case TR::sun_nio_cs_UTF_16_Encoder_encodeUTF16Little: return true; default: return false; @@ -497,14 +495,6 @@ J9::Compilation::canTransformConverterMethod(TR::RecognizedMethod rm) case TR::sun_nio_cs_ext_SBCS_Decoder_decodeSBCS: return genTRxx; - // devinmp: I'm not sure whether these could be transformed in AOT, but - // they haven't been so far. - case TR::sun_nio_cs_UTF_16_Encoder_encodeUTF16Little: - return !aot && self()->cg()->getSupportsEncodeUtf16LittleWithSurrogateTest(); - - case TR::sun_nio_cs_UTF_16_Encoder_encodeUTF16Big: - return !aot && self()->cg()->getSupportsEncodeUtf16BigWithSurrogateTest(); - default: return false; } diff --git a/runtime/compiler/env/j9method.cpp b/runtime/compiler/env/j9method.cpp index 238a417be0c..e9722c0b384 100644 --- a/runtime/compiler/env/j9method.cpp +++ b/runtime/compiler/env/j9method.cpp @@ -3192,8 +3192,6 @@ void TR_ResolvedJ9Method::construct() {x(TR::com_ibm_jit_JITHelpers_getPackedDataSizeFromJ9Class64, "getPackedDataSizeFromJ9Class64", "(J)J")}, {x(TR::com_ibm_jit_JITHelpers_getComponentTypeFromJ9Class32, "getComponentTypeFromJ9Class32", "(I)I")}, {x(TR::com_ibm_jit_JITHelpers_getComponentTypeFromJ9Class64, "getComponentTypeFromJ9Class64", "(J)J")}, - {x(TR::com_ibm_jit_JITHelpers_transformedEncodeUTF16Big, "transformedEncodeUTF16Big", "(JJI)I")}, - {x(TR::com_ibm_jit_JITHelpers_transformedEncodeUTF16Little, "transformedEncodeUTF16Little", "(JJI)I")}, {x(TR::com_ibm_jit_JITHelpers_getIntFromObject, "getIntFromObject", "(Ljava/lang/Object;J)I")}, {x(TR::com_ibm_jit_JITHelpers_getIntFromObjectVolatile, "getIntFromObjectVolatile", "(Ljava/lang/Object;J)I")}, {x(TR::com_ibm_jit_JITHelpers_getLongFromObject, "getLongFromObject", "(Ljava/lang/Object;J)J")}, @@ -3781,8 +3779,6 @@ void TR_ResolvedJ9Method::construct() {x(TR::sun_nio_cs_ext_SBCS_Decoder_decodeSBCS, "decodeSBCS", "([BII[CI[C)I")}, {x(TR::sun_nio_cs_UTF_8_Encoder_encodeUTF_8, "encodeUTF_8", "([CII[BI)I")}, {x(TR::sun_nio_cs_UTF_8_Decoder_decodeUTF_8, "decodeUTF_8", "([BII[CI)I")}, - {x(TR::sun_nio_cs_UTF_16_Encoder_encodeUTF16Big, "encodeUTF16Big", "([CII[BI)I")}, - {x(TR::sun_nio_cs_UTF_16_Encoder_encodeUTF16Little, "encodeUTF16Little", "([CII[BI)I")}, { TR::unknownMethod} }; @@ -4191,7 +4187,6 @@ void TR_ResolvedJ9Method::construct() { "java/lang/reflect/Method", MethodMethods }, { "sun/nio/cs/UTF_8$Decoder", EncodeMethods }, { "sun/nio/cs/UTF_8$Encoder", EncodeMethods }, - { "sun/nio/cs/UTF16_Encoder", EncodeMethods }, { "jdk/internal/misc/Unsafe", UnsafeMethods }, { 0 } }; diff --git a/runtime/compiler/p/codegen/J9TreeEvaluator.cpp b/runtime/compiler/p/codegen/J9TreeEvaluator.cpp index 39f12c89c4c..e9575a8a00b 100644 --- a/runtime/compiler/p/codegen/J9TreeEvaluator.cpp +++ b/runtime/compiler/p/codegen/J9TreeEvaluator.cpp @@ -10597,95 +10597,6 @@ static TR::Register *inlineStringHashcode(TR::Node *node, TR::CodeGenerator *cg) return hashReg; } -static TR::Register *inlineEncodeUTF16(TR::Node *node, TR::CodeGenerator *cg) - { - // tree looks like: - // icall com.ibm.jit.JITHelpers.encodeUtf16{Big,Little}() - // input ptr - // output ptr - // input length (in elements) - // Number of elements converted returned - - TR::MethodSymbol *symbol = node->getSymbol()->castToMethodSymbol(); - bool bigEndian = symbol->getRecognizedMethod() == TR::com_ibm_jit_JITHelpers_transformedEncodeUTF16Big; - - // Set up register dependencies - const int gprClobberCount = 5; - const int fprClobberCount = 4; - const int vrClobberCount = 6; - const int crClobberCount = 2; - const int totalDeps = crClobberCount + gprClobberCount + fprClobberCount + vrClobberCount + 3; - TR::RegisterDependencyConditions *deps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(1, totalDeps, cg->trMemory()); - - TR::Register *inputReg = cg->gprClobberEvaluate(node->getChild(0)); - TR::Register *outputReg = cg->gprClobberEvaluate(node->getChild(1)); - TR::Register *inputLenReg = cg->gprClobberEvaluate(node->getChild(2)); - TR::Register *outputLenReg = cg->allocateRegister(); - - // Allocate clobbered registers - TR::Register *gprClobbers[gprClobberCount], *fprClobbers[fprClobberCount], *vrClobbers[vrClobberCount], *crClobbers[crClobberCount]; - for (int i = 0; i < gprClobberCount; ++i) gprClobbers[i] = cg->allocateRegister(TR_GPR); - for (int i = 0; i < fprClobberCount; ++i) fprClobbers[i] = cg->allocateRegister(TR_FPR); - for (int i = 0; i < vrClobberCount; ++i) vrClobbers[i] = cg->allocateRegister(TR_VRF); - for (int i = 0; i < crClobberCount; ++i) crClobbers[i] = cg->allocateRegister(TR_CCR); - - // Add the pre and post conditions - // Input and output registers - deps->addPreCondition(inputReg, TR::RealRegister::gr3); - - deps->addPostCondition(outputLenReg, TR::RealRegister::gr3); - deps->addPostCondition(outputReg, TR::RealRegister::gr4); - deps->addPostCondition(inputLenReg, TR::RealRegister::gr5); - - //CCR. - deps->addPostCondition(crClobbers[0], TR::RealRegister::cr0); - deps->addPostCondition(crClobbers[1], TR::RealRegister::cr6); - - //GPRs + Trampoline - deps->addPostCondition(gprClobbers[0], TR::RealRegister::gr6); - deps->addPostCondition(gprClobbers[1], TR::RealRegister::gr7); - deps->addPostCondition(gprClobbers[2], TR::RealRegister::gr8); - deps->addPostCondition(gprClobbers[3], TR::RealRegister::gr9); - deps->addPostCondition(gprClobbers[4], TR::RealRegister::gr11); - - //VR's - deps->addPostCondition(vrClobbers[0], TR::RealRegister::vr0); - deps->addPostCondition(vrClobbers[1], TR::RealRegister::vr1); - deps->addPostCondition(vrClobbers[2], TR::RealRegister::vr2); - deps->addPostCondition(vrClobbers[3], TR::RealRegister::vr3); - deps->addPostCondition(vrClobbers[4], TR::RealRegister::vr4); - deps->addPostCondition(vrClobbers[5], TR::RealRegister::vr5); - - //FP/VSR - deps->addPostCondition(fprClobbers[0], TR::RealRegister::fp0); - deps->addPostCondition(fprClobbers[1], TR::RealRegister::fp1); - deps->addPostCondition(fprClobbers[2], TR::RealRegister::fp2); - deps->addPostCondition(fprClobbers[3], TR::RealRegister::fp3); - - // Generate helper call - TR_RuntimeHelper helper; - helper = bigEndian ? TR_PPCencodeUTF16Big : TR_PPCencodeUTF16Little; - TR::SymbolReference *helperSym = cg->comp()->getSymRefTab()->findOrCreateRuntimeHelper(helper); - generateDepImmSymInstruction(cg, TR::InstOpCode::bl, node, (uintptr_t)helperSym->getMethodAddress(), deps, helperSym); - - for (uint32_t i = 0; i < node->getNumChildren(); ++i) cg->decReferenceCount(node->getChild(i)); - - // Spill the clobbered registers - if (inputReg != node->getChild(0)->getRegister()) cg->stopUsingRegister(inputReg); - if (outputReg != node->getChild(1)->getRegister()) cg->stopUsingRegister(outputReg); - if (inputLenReg != node->getChild(2)->getRegister()) cg->stopUsingRegister(inputLenReg); - for (int i = 0; i < gprClobberCount; ++i) cg->stopUsingRegister(gprClobbers[i]); - for (int i = 0; i < vrClobberCount; ++i) cg->stopUsingRegister(vrClobbers[i]); - for (int i = 0; i < fprClobberCount; ++i) cg->stopUsingRegister(fprClobbers[i]); - for (int i = 0; i < crClobberCount; ++i) cg->stopUsingRegister(crClobbers[i]); - - cg->machine()->setLinkRegisterKilled(true); - cg->setHasCall(); - node->setRegister(outputLenReg); - - return outputLenReg; - } - static TR::Register *inlineIntrinsicIndexOf_P10(TR::Node *node, TR::CodeGenerator *cg, bool isLatin1) { static bool disableIndexOfStringIntrinsic = feGetEnv("TR_DisableIndexOfStringIntrinsic") != NULL; @@ -12125,15 +12036,6 @@ J9::Power::CodeGenerator::inlineDirectCall(TR::Node *node, TR::Register *&result } break; - case TR::com_ibm_jit_JITHelpers_transformedEncodeUTF16Big: - case TR::com_ibm_jit_JITHelpers_transformedEncodeUTF16Little: - if (comp->target().cpu.isAtLeast(OMR_PROCESSOR_PPC_P7) && comp->target().cpu.supportsFeature(OMR_FEATURE_PPC_HAS_VSX)) - { - resultReg = inlineEncodeUTF16(node, cg); - return true; - } - break; - case TR::java_lang_StringLatin1_indexOfChar: case TR::java_lang_StringUTF16_indexOfCharUnsafe: case TR::com_ibm_jit_JITHelpers_intrinsicIndexOfLatin1: diff --git a/runtime/compiler/p/runtime/CMakeLists.txt b/runtime/compiler/p/runtime/CMakeLists.txt index 4253cc430aa..e96bb674e29 100644 --- a/runtime/compiler/p/runtime/CMakeLists.txt +++ b/runtime/compiler/p/runtime/CMakeLists.txt @@ -40,7 +40,6 @@ j9jit_files( ${omr_SOURCE_DIR}/compiler/p/runtime/OMRCodeCacheConfig.cpp p/runtime/J9PPCArrayCopy.spp p/runtime/J9PPCArrayTranslate.spp - p/runtime/J9PPCEncodeUTF16.spp p/runtime/J9PPCCRC32.spp p/runtime/J9PPCCRC32_wrapper.c p/runtime/CodeSync.cpp diff --git a/runtime/compiler/p/runtime/J9PPCEncodeUTF16.spp b/runtime/compiler/p/runtime/J9PPCEncodeUTF16.spp deleted file mode 100644 index 0807d2ead51..00000000000 --- a/runtime/compiler/p/runtime/J9PPCEncodeUTF16.spp +++ /dev/null @@ -1,523 +0,0 @@ -!! Copyright IBM Corp. and others 2000 -!! -!! This program and the accompanying materials are made available under -!! the terms of the Eclipse Public License 2.0 which accompanies this -!! distribution and is available at https://www.eclipse.org/legal/epl-2.0/ -!! or the Apache License, Version 2.0 which accompanies this distribution and -!! is available at https://www.apache.org/licenses/LICENSE-2.0. -!! -!! This Source Code may also be made available under the following -!! Secondary Licenses when the conditions for such availability set -!! forth in the Eclipse Public License, v. 2.0 are satisfied: GNU -!! General Public License, version 2 with the GNU Classpath -!! Exception [1] and GNU General Public License, version 2 with the -!! OpenJDK Assembly Exception [2]. -!! -!! [1] https://www.gnu.org/software/classpath/license.html -!! [2] https://openjdk.org/legal/assembly-exception.html -!! -!! SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0-only WITH Classpath-exception-2.0 OR GPL-2.0-only WITH OpenJDK-assembly-exception-1.0 - -#include "j9cfg.h" -#include "jilconsts.inc" -#include "p/runtime/ppcasmdefines.inc" - -#define SURR_MASK_RES_SWAP 0xD8 -#define SURR_MASK_RES 0xD800 - -#ifdef AIXPPC - .globl .__encodeUTF16Big - .globl __encodeUTF16Big{DS} - .globl .__encodeUTF16Little - .globl __encodeUTF16Little{DS} - -#elif defined(LINUXPPC64) - .globl FUNC_LABEL(__encodeUTF16Big) - .type FUNC_LABEL(__encodeUTF16Big),@function - .globl FUNC_LABEL(__encodeUTF16Little) - .type FUNC_LABEL(__encodeUTF16Little),@function - -#elif defined(LINUX) || defined(NEUTRINO) - .globl __encodeUTF16Big - .globl __encodeUTF16Little -#endif - -#ifdef AIXPPC -! .text section - .align 5 - .csect Utf16ToByte_TEXT{PR} -#elif defined(LINUXPPC64) - .section ".text" - .align 5 -#else - .align 5 -#endif - -!------------------------------------------------------------------------------- -! entry encodeUTF16Big -!------------------------------------------------------------------------------- -! The conversion process copies 2-byte UTF16 characters to the destination -! unless a surrogate pair is encountered, when the process stops. A surrogate -! pair is a pair of UTF16 code points in range U+10000 - U+10FFF, represented by -! a sequence of 4 bytes in range 0xD800 - 0xDBFF and 0xDC00 - 0xDFFF (2 bytes -! per respective code point). The surrogate pair can appear swapped in a text -! stream or missing one of the code points. We can generalize the test for it -! by masking a code points with 0xF800 and checking if the result is equal to -! 0xD800. -! The conversion process is implemented as follows: -! -! size_t i = 0; -! while (i < n) { -! uint16_t u16 = *(src + i); -! -! // surrogate check -! if ((u16 & 0xF800) == 0xD800) break; -! -! *(dest++) = u16; -! ++i; -! } -! -! return i; - -#ifdef AIXPPC -.__encodeUTF16Big: - .function .__encodeUTF16Big,startproc.__encodeUTF16Big,16,0,(endproc.__encodeUTF16Big-startproc.__encodeUTF16Big) - .machine "push" - .machine "pwr7" -#elif defined(LINUXPPC64) -FUNC_LABEL(__encodeUTF16Big): -#else -__encodeUTF16Big: -#endif - -!---------------------------------------------------------------------- -! input: -! r3 = input ptr -! r4 = output ptr -! r5 = num_elements -! output: -! r3 = number_elements_processed -! r4 = input ptr -! clobbered: -! r3 = input ptr -! r4 = output ptr -! r5 = element to process count -! r6 = elements to process in vec loop -! r7 = elements to process in residue loop -! r8 = temp storage area -! r9 = result of masking code points with 0xF800 -! r11 = 0xD800 mask result for half word reads -! vr0 = 0xF800 mask for the vector reads -! vr1 = 0xD800 mask result for vector reads -! vr2 = input half words 0 - 7 -! vr3 = input half words 8 - 15 -! vr4 = temp mask generation reg -! vr5 = half word rotate amount register for endian correction (0008) -! fp0-3 = output registers (using FP registers to perform 16-byte misaligned stores) -! cr0 = all sorts of condition checks - - startproc.__encodeUTF16Big: - -#ifdef STANDALONE - ! Prologue - stdu 1, -112(1) - mflr r0 - std r0, 128(1) - std r4, 160(1) -#else - staddr r4, -ALen(J9SP) ! preserve start of output buffer for number of elements processed calculation -#endif - - cmpi cr0, 0, r5, 0 ! done if no elements to process - beq .L.__done_big - li r11, 0 ! load surrogate masking result - -#if !defined(__LITTLE_ENDIAN__) - ori r11, r11, SURR_MASK_RES -#else - ori r11, r11, SURR_MASK_RES_SWAP -#endif - -.L.__align_loop_big: - andi. r8, r3, 0xF ! bits in 0xf? - beq cr0, .L.__main_big ! 16 byte aligned if clear -#if !defined(__LITTLE_ENDIAN__) - lhz r8, 0(r3) ! load code pt - rlwinm r9, r8, 0, 16, 20 ! mask out equivalent of 0xF800 from code point -#else - lhbrx r8, 0, r3 - rlwinm r9, r8, 0, 24, 28 ! mask out equivalent of 0xF800 from code point -#endif - cmpw cr0, r9, r11 ! surrogate if == 0xD800 - beq cr0, .L.__done_big ! skip processing if surrogate - sth r8, 0(r4) ! store hb at dest - addi r4, r4, 2 - addi r3, r3, 2 - addi r5, r5, -1 - cmpi cr0, 0, r5, 0 - beq .L.__done_big ! the count reached zero before alignment - b .L.__align_loop_big -.L.__main_big: - cmpi cr0, 0, r5, 16 ! if we have less than 16 items to process, jump into the alignment residue loop - ble cr0, .L.__alignResidue_big - rlwinm r6, r5, 32-4, 4, 31 ! r6 = n / 16, processed in vector loop - mtctr r6 - rlwinm r7, r5, 0, 28, 31 ! r7 = n % 16, processed in residue loop - - ! set up HB and LB permute masks - vspltisb vr0, -8 ! vr0: F8F8F8F8F8F8F8F8F8F8F8F8F8F8F8F8 - vspltisb vr4, 8 ! vr4: 08080808080808080808080808080808 - vslh vr0, vr0, vr4 ! vr0: F800F800F800F800F800F800F800F800 - li r8, 0xD ! r8: 0xD - lvsl vr1, 0, r8 ! vr1: 0D0E0F101112131415161718191A1B1C - vspltisb vr4, 4 ! vr4: 04040404040404040404040404040404 - vslb vr1, vr1, vr4 ! vr1: D0E0F101112131415161718191A1B1C0 - li r8, 0x8 ! r8: 0x8 - lvsl vr4, 0, r8 ! vr4: 08090A0B0C0D0E0F1011121314151617 - vaddubs vr1, vr1, vr4 ! vr1: D8E9FB0C1D2E3F5061728394A5B6C7D7 - vspltb vr1, vr1, 0 ! vr1: D8D8D8D8D8D8D8D8D8D8D8D8D8D8D8D8 - vspltisb vr4, 8 ! vr4: 08080808080808080808080808080808 - vslh vr1, vr1, vr4 ! vr1: D800D800D800D800D800D800D800D800 - li r8, 16 ! r8: input/output ptr increment - -#if defined(__LITTLE_ENDIAN__) - vspltish vr5, 8 ! vr5: 00080008000800080008000800080008 -#endif - -.L.__vectorLoop_big: - lvx vr2, 0, r3 ! vr2: hw 0 - 7 - lvx vr3, r3, r8 ! vr3: hw 8 - 15 - vand vr4, vr2, vr0 ! vr4: high bytes masked with F8 - vcmpequh_r vr4, vr4, vr1 ! vr4: all 0 if no HB & F8 == D8 - bne cr6, .L.__alignResidue_big ! skip to residue processing if we have a surrogate - vand vr4, vr3, vr0 ! vr4: high bytes masked with F8 - vcmpequh_r vr4, vr4, vr1 ! vr4: all 0 if no HB & F8 == D8 - bne cr6, .L.__alignResidue_big ! skip to residue processing if we have a surrogate - -#if defined(__LITTLE_ENDIAN__) - vrlh vr2, vr2, vr5 ! vr2: hw 0 - 7 byte-swapped - vrlh vr3, vr3, vr5 ! vr3: hw 8 - 15 byte-swapped - xxlor vs1, vs34, vs34 ! least significant double word of vr2 - xxpermdi vs0, vs34, vs34, 2 ! most significant double word of vr2 -#else - xxlor vs0, vs34, vs34 ! most significant double word of vr2 - xxpermdi vs1, vs34, vs34, 2 ! least significant double word of vr2 -#endif - stfd fp0, 0(r4) ! hw 0 - 7 stored at the output ptr - stfd fp1, 8(r4) ! hw 8 - 15 stored at the output ptr -#if defined(__LITTLE_ENDIAN__) - xxlor vs3, vs35, vs35 ! least significant double word of vr3 - xxpermdi vs2, vs35, vs35, 2 ! most significant double word of vr3 -#else - xxlor vs2, vs35, vs35 ! most significant double word of vr3 - xxpermdi vs3, vs35, vs35, 2 ! least significant double word of vr3 -#endif - stfd fp2, 16(r4) ! hw 16 - 23 stored at the output ptr - stfd fp3, 24(r4) ! hw 16 - 23 stored at the output ptr - addi r3, r3, 32 ! bump input ptr - addi r4, r4, 32 ! bump output ptr - subi r5, r5, 16 ! decrement the element count - bdnz .L.__vectorLoop_big - cmpi cr0, 0, r5, 0 ! check for work to be done in residue loop - beq cr0, .L.__done_big ! nothing to do, return - -! residue work starts here -.L.__alignResidue_big: - mtctr r5 ! move count of remaining elements into ctr - addi r3, r3, -2 ! dec. input ptr so we use lhzu instruction to load - addi r4, r4, -2 ! ditto for the output ptr. - -.L.__residueLoop_big: -#if !defined(__LITTLE_ENDIAN__) - lhzu r8, 2(r3) ! load utf16 code point and bump src - rlwinm r9, r8, 0, 16, 20 ! repeat test against surrogate mask -#else - addi r3, r3, 2 - lhbrx r8, 0, r3 - rlwinm r9, r8, 0, 24, 28 ! repeat test against surrogate mask -#endif - cmpw cr0, r9, r11 - beq cr0, .L.__doneResidue_big ! stop copying if we have a surrogate - sthu r8, 2(r4) - addi r5, r5, -1 - bdnz .L.__residueLoop_big - -.L.__doneResidue_big: - addi r4, r4, 2 ! inc. output ptr so we can use it for calculating number of elements processed - -.L.__done_big: -#ifdef STANDALONE - ld r8, 160(1) -#else - laddr r8, -ALen(J9SP) -#endif - sub r3, r4, r8 ! r3 = output_ptr - output_base_ptr = 2x num elements processed - srwi r3, r3, 1 ! divide diff by 2 to obtain num elements processed - -#ifdef STANDALONE - ! Prologue - ld 0, 128(1) - mtlr 0 - ld 1, 0(1) -#endif - blr - -#ifdef AIXPPC - .machine "pop" -#endif - endproc.__encodeUTF16Big: - - -!------------------------------------------------------------------------------- -! entry encodeUTF16Little -!------------------------------------------------------------------------------- -! The conversion process copies 2-byte UTF16 characters to the destination -! unless a surrogate pair is encountered, when the process stops. A surrogate -! pair is a pair of UTF16 code points in range U+10000 - U+10FFF, represented by -! a sequence of 4 bytes in range 0xD800 - 0xDBFF and 0xDC00 - 0xDFFF (2 bytes -! per respective code point). The surrogate pair can appear swapped in a text -! stream or missing one of the code points. We can generalize the test for it -! by masking a code points with 0xF800 and checking if the result is equal to -! 0xD800. -! The conversion process is implemented as follows: -! -! size_t i = 0; -! while (i < n) { -! uint16_t u16 = *(src + i); -! -! // surrogate check -! if ((u16 & 0xF800) == 0xD800) break; -! -! *(dest++) = u16; -! ++i; -! } -! -! return i; - -#ifdef AIXPPC -.__encodeUTF16Little: - .function .__encodeUTF16Little,startproc.__encodeUTF16Little,16,0,(endproc.__encodeUTF16Little-startproc.__encodeUTF16Little) - .machine "push" - .machine "pwr7" -#elif defined(LINUXPPC64) -FUNC_LABEL(__encodeUTF16Little): -#else -__encodeUTF16Little: -#endif - -!---------------------------------------------------------------------- -! input: -! r3 = input ptr -! r4 = output ptr -! r5 = num_elements -! output: -! r3 = number_elements_processed -! clobbered: -! r3 = output ptr -! r4 = input ptr -! r5 = element to process count -! r6 = elements to process in vec loop -! r7 = elements to process in residue loop -! r8 = temp storage area -! r9 = result of masking code points with 0xF800 -! r11 = 0xD800 mask result for half word reads -! vr0 = 0xF800 mask for the vector reads -! vr1 = 0xD800 mask result for vector reads -! vr2 = input half words 0 - 7 -! vr3 = input half words 8 - 15 -! vr4 = temp mask generation reg -! vr5 = half word rotate amount register for endian correction (0008) -! fp0-3 = output registers (using FP registers to perform 16-byte misaligned stores) -! cr0 = all sorts of condition checks - - startproc.__encodeUTF16Little: - -#ifdef STANDALONE - ! Prologue - stdu 1, -112(1) - mflr r0 - std r0, 128(1) - std r4, 160(1) -#else - staddr r4, -ALen(J9SP) ! preserve start of output buffer for number of elements processed calculation -#endif - - cmpi cr0, 0, r5, 0 ! done if no elements to process - beq .L.__done - li r11, 0 ! load surrogate masking result - -#if defined(__LITTLE_ENDIAN__) - ori r11, r11, SURR_MASK_RES -#else - ori r11, r11, SURR_MASK_RES_SWAP -#endif - -.L.__align_loop: - andi. r8, r3, 0xF ! bits in 0xf? - beq cr0, .L.__main ! 16 byte aligned if clear - -#if defined(__LITTLE_ENDIAN__) - lhz r8, 0(r3) ! load code pt - rlwinm r9, r8, 0, 16, 20 ! mask out equivalent of 0xF800 from code point -#else - lhbrx r8, r0, r3 - rlwinm r9, r8, 0, 24, 28 ! mask out equivalent of 0xF8 from code point -#endif - - cmpw cr0, r9, r11 ! surrogate if == 0xD8 - beq cr0, .L.__done ! skip processing if surrogate - sth r8, 0(r4) ! store hb at dest - addi r3, r3, 2 - addi r4, r4, 2 - addi r5, r5, -1 - cmpi cr0, 0, r5, 0 - beq .L.__done ! the count reached zero before alignment - b .L.__align_loop -.L.__main: - cmpi cr0, 0, r5, 16 ! if we have less than 16 items to process, jump into the alignment residue loop - ble cr0, .L.__alignResidue - rlwinm r6, r5, 32-4, 4, 31 ! r6 = n / 16, processed in vector loop - mtctr r6 - rlwinm r7, r5, 0, 28, 31 ! r7 = n % 16, processed in residue loop - ! set up HB and LB permute masks - vspltisb vr0, -8 ! vr0: F8F8F8F8F8F8F8F8F8F8F8F8F8F8F8F8 - vspltisb vr4, 8 ! vr4: 08080808080808080808080808080808 - vslh vr0, vr0, vr4 ! vr0: F800F800F800F800F800F800F800F800 - li r8, 0xD ! r8: 0xD - lvsl vr1, 0, r8 ! vr1: 0D0E0F101112131415161718191A1B1C - vspltisb vr4, 4 ! vr4: 04040404040404040404040404040404 - vslb vr1, vr1, vr4 ! vr1: D0E0F101112131415161718191A1B1C0 - li r8, 0x8 ! r8: 0x8 - lvsl vr4, 0, r8 ! vr4: 08090A0B0C0D0E0F1011121314151617 - vaddubs vr1, vr1, vr4 ! vr1: D8E9FB0C1D2E3F5061728394A5B6C7D7 - vspltb vr1, vr1, 0 ! vr1: D8D8D8D8D8D8D8D8D8D8D8D8D8D8D8D8 - vspltisb vr4, 8 ! vr4: 08080808080808080808080808080808 - vslh vr1, vr1, vr4 ! vr1: D800D800D800D800D800D800D800D800 - -#if !defined(__LITTLE_ENDIAN__) - vspltish vr5, 8 ! vr5: 00080008000800080008000800080008 -#endif - - li r8, 16 ! r8: input/output ptr increment -.L.__vectorLoop: - lvx vr2, 0, r3 ! vr2: hw 0 - 7 - lvx vr3, r3, r8 ! vr3: hw 8 - 15 - vand vr4, vr2, vr0 ! vr4: high bytes masked with F8 - vcmpequh_r vr4, vr4, vr1 ! vr4: all 0 if no HB & F8 == D8 - bne cr6, .L.__alignResidue ! skip to residue processing if we have a surrogate - vand vr4, vr3, vr0 ! vr4: high bytes masked with F8 - vcmpequh_r vr4, vr4, vr1 ! vr4: all 0 if no HB & F8 == D8 - bne cr6, .L.__alignResidue ! skip to residue processing if we have a surrogate - -#if !defined(__LITTLE_ENDIAN__) - vrlh vr2, vr2, vr5 ! vr2: hw 0 - 7 byte-swapped - vrlh vr3, vr3, vr5 ! vr3: hw 8 - 15 byte-swapped - xxlor vs1, vs34, vs34 ! MSDW of vr2 - xxpermdi vs0, vs34, vs34, 2 -#else - xxlor vs0, vs34, vs34 ! LSDW of vr2 - xxpermdi vs1, vs34, vs34, 2 -#endif - stfd fp0, 0(r4) ! hw 0 - 7 stored at the output ptr - stfd fp1, 8(r4) ! hw 8 - 15 stored at the output ptr - -#if !defined(__LITTLE_ENDIAN__) - xxlor vs3, vs35, vs35 ! MSDW of vr3 - xxpermdi vs2, vs35, vs35, 2 ! LSDW of vr3 -#else - xxlor vs2, vs35, vs35 ! LSDW of vr3 - xxpermdi vs3, vs35, vs35, 2 ! MSDW of vr3 -#endif - stfd fp2, 16(r4) ! hw 16 - 23 stored at the output ptr - stfd fp3, 24(r4) ! hw 16 - 23 stored at the output ptr - - addi r3, r3, 32 ! bump output ptr - addi r4, r4, 32 ! bump input ptr - subi r5, r5, 16 ! decrement the element count - bdnz .L.__vectorLoop - cmpi cr0, 0, r5, 0 ! check for work to be done in residue loop - beq cr0, .L.__done ! nothing to do, return - -! residue work starts here -.L.__alignResidue: - mtctr r5 ! move count of remaining elements into ctr - addi r3, r3, -2 ! dec. input ptr so we use lhzu instruction to load - addi r4, r4, -2 ! ditto for the output ptr. - -.L.__residueLoop: -#if defined(__LITTLE_ENDIAN__) - lhzu r8, 2(r3) ! load utf16 code point and bump src - rlwinm r9, r8, 0, 16, 20 ! repeat test against surrogate mask -#else - addi r3, r3, 2 - lhbrx r8, r0, r3 - rlwinm r9, r8, 0, 24, 28 ! repeat test against surrogate mask -#endif - - cmpw cr0, r9, r11 - beq cr0, .L.__doneResidue ! stop copying if we have a surrogate - sthu r8, 2(r4) ! store lw at dest - addi r5, r5, -1 - bdnz .L.__residueLoop - -.L.__doneResidue: - addi r4, r4, 2 ! inc. output ptr so we can use it for calculating number of elements processed - -.L.__done: -#ifdef STANDALONE - ld r8, 160(1) -#else - laddr r8, -ALen(J9SP) -#endif - sub r3, r4, r8 ! r3 = output_ptr - output_base_ptr = 2x num elements processed - srwi r3, r3, 1 ! divide diff by 2 to obtain num elements processed - -#ifdef STANDALONE - ! Prologue - ld 0, 128(1) - mtlr 0 - ld 1, 0(1) -#endif - blr - -#ifdef AIXPPC - .machine "pop" -#endif - endproc.__encodeUTF16Little: - -! .data section -#ifdef AIXPPC - .toc - .csect __encodeUTF16Big{DS} - ADDR .__encodeUTF16Big - ADDR TOC{TC0} - ADDR 0x00000000 -! End csect __encodeUTF16Big{DS} - - .csect __encodeUTF16Little{DS} - ADDR .__encodeUTF16Little - ADDR TOC{TC0} - ADDR 0x00000000 -! End csect __encodeUTF16Little{DS} - -#elif defined(LINUXPPC64) - .section ".toc" -#if !defined(__LITTLE_ENDIAN__) - .section ".opd","aw" - .align 3 - .globl __encodeUTF16Big - .size __encodeUTF16Big,24 -__encodeUTF16Big: - .quad .__encodeUTF16Big - .quad .TOC.@tocbase - .long 0x00000000 - .long 0x00000000 - .globl __encodeUTF16Little - .size __encodeUTF16Little,24 -__encodeUTF16Little: - .quad .__encodeUTF16Little - .quad .TOC.@tocbase - .long 0x00000000 - .long 0x00000000 -#endif -#endif diff --git a/runtime/compiler/runtime/Runtime.cpp b/runtime/compiler/runtime/Runtime.cpp index af74dbde33b..cfd735467fc 100644 --- a/runtime/compiler/runtime/Runtime.cpp +++ b/runtime/compiler/runtime/Runtime.cpp @@ -342,9 +342,6 @@ JIT_HELPER(icallVMprJavaSendVirtualL); JIT_HELPER(icallVMprJavaSendVirtualF); JIT_HELPER(icallVMprJavaSendVirtualD); -JIT_HELPER(encodeUTF16Big); -JIT_HELPER(encodeUTF16Little); - #ifdef J9VM_OPT_JAVA_CRYPTO_ACCELERATION JIT_HELPER(doAESENCEncrypt); JIT_HELPER(doAESENCDecrypt); @@ -369,9 +366,6 @@ JIT_HELPER(SSEfloatRemainderIA32Thunk); JIT_HELPER(SSEdoubleRemainderIA32Thunk); JIT_HELPER(SSEdouble2LongIA32); -JIT_HELPER(encodeUTF16Big); -JIT_HELPER(encodeUTF16Little); - JIT_HELPER(SMPVPicInit); #endif /* TR_HOST_64BIT */ @@ -438,8 +432,6 @@ JIT_HELPER(__forwardWordArrayCopy_dp); JIT_HELPER(__forwardHalfWordArrayCopy_dp); JIT_HELPER(__referenceArrayCopy); JIT_HELPER(__generalArrayCopy); -JIT_HELPER(__encodeUTF16Big); -JIT_HELPER(__encodeUTF16Little); JIT_HELPER(__quadWordArrayCopy_vsx); JIT_HELPER(__forwardQuadWordArrayCopy_vsx); @@ -1225,8 +1217,6 @@ void initializeCodeRuntimeHelperTable(J9JITConfig *jitConfig, char isSMP) SET(TR_AMD64arrayTranslateTRTO, (void *)arrayTranslateTRTO, TR_Helper); SET(TR_AMD64arrayTranslateTROTNoBreak, (void *)arrayTranslateTROTNoBreak, TR_Helper); SET(TR_AMD64arrayTranslateTROT, (void *)arrayTranslateTROT, TR_Helper); - SET(TR_AMD64encodeUTF16Big, (void *)encodeUTF16Big, TR_Helper); - SET(TR_AMD64encodeUTF16Little, (void *)encodeUTF16Little, TR_Helper); #ifdef J9VM_OPT_JAVA_CRYPTO_ACCELERATION SET(TR_AMD64doAESENCEncrypt, (void *)doAESENCEncrypt, TR_Helper); SET(TR_AMD64doAESENCDecrypt, (void *)doAESENCDecrypt, TR_Helper); @@ -1273,8 +1263,6 @@ void initializeCodeRuntimeHelperTable(J9JITConfig *jitConfig, char isSMP) SET(TR_IA32arrayTranslateTRTO, (void *)arrayTranslateTRTO, TR_Helper); SET(TR_IA32arrayTranslateTROTNoBreak, (void *)arrayTranslateTROTNoBreak, TR_Helper); SET(TR_IA32arrayTranslateTROT, (void *)arrayTranslateTROT, TR_Helper); - SET(TR_IA32encodeUTF16Big, (void *)encodeUTF16Big, TR_Helper); - SET(TR_IA32encodeUTF16Little, (void *)encodeUTF16Little, TR_Helper); SET(TR_jitAddPicToPatchOnClassUnload, (void *)jitAddPicToPatchOnClassUnload, TR_Helper); @@ -1390,8 +1378,6 @@ void initializeCodeRuntimeHelperTable(J9JITConfig *jitConfig, char isSMP) SET(TR_PPCarrayTranslateTRTO255, (void *) __arrayTranslateTRTO255, TR_Helper); SET(TR_PPCarrayTranslateTROT255, (void *) __arrayTranslateTROT255, TR_Helper); SET(TR_PPCarrayTranslateTROT, (void *) __arrayTranslateTROT, TR_Helper); - SET(TR_PPCencodeUTF16Big, (void *) __encodeUTF16Big, TR_Helper); - SET(TR_PPCencodeUTF16Little, (void *) __encodeUTF16Little, TR_Helper); #elif defined(TR_HOST_ARM) SET(TR_ARMdouble2Long, (void *) __double2Long, TR_Helper); diff --git a/runtime/compiler/x/codegen/J9TreeEvaluator.cpp b/runtime/compiler/x/codegen/J9TreeEvaluator.cpp index 5c2b2506903..c0e81973150 100644 --- a/runtime/compiler/x/codegen/J9TreeEvaluator.cpp +++ b/runtime/compiler/x/codegen/J9TreeEvaluator.cpp @@ -11661,10 +11661,6 @@ J9::X86::TreeEvaluator::directCallEvaluator(TR::Node *node, TR::CodeGenerator *c return inlineIntrinsicIndexOf(node, cg, false); break; - case TR::com_ibm_jit_JITHelpers_transformedEncodeUTF16Big: - case TR::com_ibm_jit_JITHelpers_transformedEncodeUTF16Little: - return TR::TreeEvaluator::encodeUTF16Evaluator(node, cg); - case TR::java_lang_String_hashCodeImplDecompressed: if (cg->getSupportsInlineStringHashCode()) returnRegister = inlineStringHashCode(node, false, cg); @@ -11978,85 +11974,6 @@ J9::X86::TreeEvaluator::inlineStringLatin1Inflate(TR::Node *node, TR::CodeGenera return NULL; } -TR::Register * -J9::X86::TreeEvaluator::encodeUTF16Evaluator(TR::Node *node, TR::CodeGenerator *cg) - { - // tree looks like: - // icall com.ibm.jit.JITHelpers.encodeUTF16{Big,Little}() - // input ptr - // output ptr - // input length (in elements) - // Number of elements translated is returned - - TR::MethodSymbol *symbol = node->getSymbol()->castToMethodSymbol(); - bool bigEndian = symbol->getRecognizedMethod() == TR::com_ibm_jit_JITHelpers_transformedEncodeUTF16Big; - - // Set up register dependencies - const int gprClobberCount = 2; - const int maxFprClobberCount = 5; - const int fprClobberCount = bigEndian ? 5 : 4; // xmm4 only needed for big-endian - TR::Register *srcPtrReg, *dstPtrReg, *lengthReg, *resultReg; - TR::Register *gprClobbers[gprClobberCount], *fprClobbers[maxFprClobberCount]; - bool killSrc = TR::TreeEvaluator::stopUsingCopyRegAddr(node->getChild(0), srcPtrReg, cg); - bool killDst = TR::TreeEvaluator::stopUsingCopyRegAddr(node->getChild(1), dstPtrReg, cg); - bool killLen = TR::TreeEvaluator::stopUsingCopyRegInteger(node->getChild(2), lengthReg, cg); - resultReg = cg->allocateRegister(); - for (int i = 0; i < gprClobberCount; i++) - gprClobbers[i] = cg->allocateRegister(); - for (int i = 0; i < fprClobberCount; i++) - fprClobbers[i] = cg->allocateRegister(TR_FPR); - - int depCount = 11; - TR::RegisterDependencyConditions *deps = - generateRegisterDependencyConditions((uint8_t)0, depCount, cg); - - deps->addPostCondition(srcPtrReg, TR::RealRegister::esi, cg); - deps->addPostCondition(dstPtrReg, TR::RealRegister::edi, cg); - deps->addPostCondition(lengthReg, TR::RealRegister::edx, cg); - deps->addPostCondition(resultReg, TR::RealRegister::eax, cg); - - deps->addPostCondition(gprClobbers[0], TR::RealRegister::ecx, cg); - deps->addPostCondition(gprClobbers[1], TR::RealRegister::ebx, cg); - - deps->addPostCondition(fprClobbers[0], TR::RealRegister::xmm0, cg); - deps->addPostCondition(fprClobbers[1], TR::RealRegister::xmm1, cg); - deps->addPostCondition(fprClobbers[2], TR::RealRegister::xmm2, cg); - deps->addPostCondition(fprClobbers[3], TR::RealRegister::xmm3, cg); - if (bigEndian) - deps->addPostCondition(fprClobbers[4], TR::RealRegister::xmm4, cg); - - deps->stopAddingConditions(); - - // Generate helper call - TR_RuntimeHelper helper; - if (cg->comp()->target().is64Bit()) - helper = bigEndian ? TR_AMD64encodeUTF16Big : TR_AMD64encodeUTF16Little; - else - helper = bigEndian ? TR_IA32encodeUTF16Big : TR_IA32encodeUTF16Little; - - generateHelperCallInstruction(node, helper, deps, cg); - - // Free up registers - for (int i = 0; i < gprClobberCount; i++) - cg->stopUsingRegister(gprClobbers[i]); - for (int i = 0; i < fprClobberCount; i++) - cg->stopUsingRegister(fprClobbers[i]); - - for (uint16_t i = 0; i < node->getNumChildren(); i++) - cg->decReferenceCount(node->getChild(i)); - - TR_LiveRegisters *liveRegs = cg->getLiveRegisters(TR_GPR); - if (killSrc) - liveRegs->registerIsDead(srcPtrReg); - if (killDst) - liveRegs->registerIsDead(dstPtrReg); - if (killLen) - liveRegs->registerIsDead(lengthReg); - - node->setRegister(resultReg); - return resultReg; - } - /* * The CaseConversionManager is used to store info about the conversion. It defines the lower bound and upper bound value depending on diff --git a/runtime/compiler/x/codegen/J9TreeEvaluator.hpp b/runtime/compiler/x/codegen/J9TreeEvaluator.hpp index 9a34a6bbeae..b8f0fe942d1 100644 --- a/runtime/compiler/x/codegen/J9TreeEvaluator.hpp +++ b/runtime/compiler/x/codegen/J9TreeEvaluator.hpp @@ -132,7 +132,6 @@ class OMR_EXTENSIBLE TreeEvaluator: public J9::TreeEvaluator */ static void generateFillInDataBlockSequenceForUnresolvedField (TR::CodeGenerator *cg, TR::Node *node, TR::Snippet *dataSnippet, bool isWrite, TR::Register *sideEffectRegister, TR::Register *dataSnippetRegister); static TR::Register *directCallEvaluator(TR::Node *node, TR::CodeGenerator *cg); - static TR::Register *encodeUTF16Evaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *toUpperIntrinsicUTF16Evaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *toLowerIntrinsicUTF16Evaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *toUpperIntrinsicLatin1Evaluator(TR::Node *node, TR::CodeGenerator *cg); diff --git a/runtime/compiler/x/runtime/.gitignore b/runtime/compiler/x/runtime/.gitignore index 62fa885b13a..a0a30d216f4 100644 --- a/runtime/compiler/x/runtime/.gitignore +++ b/runtime/compiler/x/runtime/.gitignore @@ -25,4 +25,3 @@ /X86Unresolveds.s /X86Unresolveds.asm /X86Crypto.s -/X86EncodeUTF16.s diff --git a/runtime/compiler/x/runtime/CMakeLists.txt b/runtime/compiler/x/runtime/CMakeLists.txt index a80b59adefe..295618c213e 100644 --- a/runtime/compiler/x/runtime/CMakeLists.txt +++ b/runtime/compiler/x/runtime/CMakeLists.txt @@ -25,7 +25,6 @@ j9jit_files( x/runtime/Recomp.cpp x/runtime/X86ArrayTranslate.nasm x/runtime/X86Codert.nasm - x/runtime/X86EncodeUTF16.nasm x/runtime/X86LockReservation.nasm x/runtime/X86PicBuilder.nasm x/runtime/X86RelocationTarget.cpp diff --git a/runtime/compiler/x/runtime/X86EncodeUTF16.nasm b/runtime/compiler/x/runtime/X86EncodeUTF16.nasm deleted file mode 100644 index 8095f5faf67..00000000000 --- a/runtime/compiler/x/runtime/X86EncodeUTF16.nasm +++ /dev/null @@ -1,185 +0,0 @@ -; Copyright IBM Corp. and others 2014 -; -; This program and the accompanying materials are made available under -; the terms of the Eclipse Public License 2.0 which accompanies this -; distribution and is available at https://www.eclipse.org/legal/epl-2.0/ -; or the Apache License, Version 2.0 which accompanies this distribution and -; is available at https://www.apache.org/licenses/LICENSE-2.0. -; -; This Source Code may also be made available under the following -; Secondary Licenses when the conditions for such availability set -; forth in the Eclipse Public License, v. 2.0 are satisfied: GNU -; General Public License, version 2 with the GNU Classpath -; Exception [1] and GNU General Public License, version 2 with the -; OpenJDK Assembly Exception [2]. -; -; [1] https://www.gnu.org/software/classpath/license.html -; [2] https://openjdk.org/legal/assembly-exception.html -; -; SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0-only WITH Classpath-exception-2.0 OR GPL-2.0-only WITH OpenJDK-assembly-exception-1.0 - - SURROGATE_MASK equ 0f800h - SURROGATE_MASK32 equ 0f800f800h - SURROGATE_BITS equ 0d800h - SURROGATE_BITS32 equ 0d800d800h - SSE_MIN_CHARS equ 32 - -%include "jilconsts.inc" - -segment .text - - DECLARE_GLOBAL encodeUTF16Big - DECLARE_GLOBAL encodeUTF16Little - - align 16 -encodeUTF16Big_shufmask: - dq 0607040502030001h - dq 0e0f0c0d0a0b0809h - -%macro DefineUTF16EncodeHelper 2 ; args: helperName, bigEndian -; UTF16 encoding for BMP characters -; pseudocode(uint8_t *dest, uint16_t *src, int n): -; { -; for (int i = 0; i < n; i++) -; { -; uint16_t c = src[i]; -; if ((c & SURROGATE_MASK) == SURROGATE_BITS) break; -; #if bigEndian -; *dest++ = (uint8_t)(c >> 8); -; *dest++ = (uint8_t)(c & 0xff); -; #else -; *dest++ = (uint8_t)(c & 0xff); -; *dest++ = (uint8_t)(c >> 8); -; #endif -; } -; return i; -; } - -; NB. c is a surrogate code unit -; iff SURROGATE_MIN = 0xd800 <= c <= 0xdfff = SURROGATE_MAX -; iff (c & SURROGATE_MASK) == SURROGATE_BITS, -; where SURROGATE_MASK = 0xf800, SURROGATE_BITS = 0xd800 - -; registers: -; _rdi dest ptr (into byte array) -; _rsi src ptr (into char array) -; _rdx n -; [_r]cx c (one-at-a-time); tmp when using SSE -; bx c & SURROGATE_MASK (one-at-a-time) -; _rax original n / return value -; xmm0 constant SURROGATE_MASK vector (0xf800..f800) -; xmm1 constant SURROGATE_BITS vector (0xd800..d800) -; xmm2 current 8 characters (8-at-a-time) -; xmm3 surrogate bitmask -; xmm4 byte shuffle mask (big-endian only) - - align 16 -%1: ; helperName - ; Remember original count - - ; will subtract at return to compute number converted - mov _rax, _rdx - cmp _rdx, 0 - je Lend_%1 ; helpername - sub _rdi, _rsi ; relative to _rsi, only advance _rsi - cmp _rdx, SSE_MIN_CHARS - jl Lresidue_loop_%1 ; helperName - -Lprealign_%1: ; helperName - test _rsi, 0fh - jz Laligned16_%1 ; helperName - - mov cx, word [_rsi] - - ; return if surrogate - mov bx, cx - and bx, SURROGATE_MASK - cmp bx, SURROGATE_BITS - je Lend_%1 ; helperName - - ; not surrogate -%if %2 ;bigEndian - xchg cl, ch -%endif - mov word [_rsi + _rdi], cx - add _rsi, 2 - dec _rdx - jg Lprealign_%1 ; helperName - jmp Lend_%1 ; helperName - -Laligned16_%1: ; helperName - sub _rdx, 8 - jl Lresidue_%1 ; helperName - - ; initialize constant vectors: - ; SURROGATE_MASK - mov ecx, SURROGATE_MASK32 - movd xmm0, ecx - pshufd xmm0, xmm0, 0 - - ; SURROGATE_BITS - mov ecx, SURROGATE_BITS32 - movd xmm1, ecx - pshufd xmm1, xmm1, 0 - -%if %2 ;&bigEndian - ; shuffle mask for PSHUFB - movdqa xmm4, oword [rel encodeUTF16Big_shufmask] -%endif - -L8_at_a_time_%1: ; helperName - ; read 8 chars - ; should this use movdqu, start once 8-byte aligned? - movdqa xmm2, oword [_rsi] - - ; jump to residue loop if any are surrogate - movdqa xmm3, xmm2 - pand xmm3, xmm0 - pcmpeqw xmm3, xmm1 - ptest xmm3, xmm3 ; SSE4.1 - jnz Lresidue_%1 ; helperName - - ; no surrogates -%if %2 ;&bigEndian - pshufb xmm2, xmm4 ; SSSE3 -%endif - - ; write 8 chars - movdqu oword [_rsi + _rdi], xmm2 - - add _rsi, 16 - sub _rdx, 8 - jge L8_at_a_time_%1 ; helperName - -Lresidue_%1: ; helperName - add _rdx, 8 - cmp _rdx, 0 - je Lend_%1 ; helperName - -Lresidue_loop_%1: ; helperName - mov cx, word [_rsi] - - ; return if surrogate - mov bx, cx - and bx, SURROGATE_MASK - cmp bx, SURROGATE_BITS - je Lend_%1 ; helperName - - ; not surrogate -%if %2 ;&bigEndian - xchg cl, ch -%endif - mov word [_rsi + _rdi], cx - add _rsi, 2 - dec _rdx - jg Lresidue_loop_%1 ; helperName - -Lend_%1: ;&helperName: - sub _rax, _rdx - ret - -%endmacro - -; Expand out the two helpers - -DefineUTF16EncodeHelper encodeUTF16Big, 1 -DefineUTF16EncodeHelper encodeUTF16Little, 0 diff --git a/runtime/compiler/z/codegen/J9CodeGenerator.cpp b/runtime/compiler/z/codegen/J9CodeGenerator.cpp index 745bb8c31e7..75aea131fdc 100644 --- a/runtime/compiler/z/codegen/J9CodeGenerator.cpp +++ b/runtime/compiler/z/codegen/J9CodeGenerator.cpp @@ -4024,17 +4024,13 @@ J9::Z::CodeGenerator::inlineDirectCall( resultReg = TR::TreeEvaluator::inlineStringLatin1Inflate(node, cg); return resultReg != NULL; } - break; + break; case TR::java_lang_StringCoding_hasNegatives: if (cg->getSupportsInlineStringCodingHasNegatives()) { resultReg = TR::TreeEvaluator::inlineStringCodingHasNegatives(node, cg); return true; } - break; - case TR::com_ibm_jit_JITHelpers_transformedEncodeUTF16Big: - return resultReg = comp->getOption(TR_DisableUTF16BEEncoder) ? TR::TreeEvaluator::inlineUTF16BEEncodeSIMD(node, cg) - : TR::TreeEvaluator::inlineUTF16BEEncode (node, cg); break; case TR::java_lang_Integer_stringSize: case TR::java_lang_Long_stringSize: diff --git a/runtime/compiler/z/codegen/J9TreeEvaluator.cpp b/runtime/compiler/z/codegen/J9TreeEvaluator.cpp index 43d7570f86c..207c72c4aac 100644 --- a/runtime/compiler/z/codegen/J9TreeEvaluator.cpp +++ b/runtime/compiler/z/codegen/J9TreeEvaluator.cpp @@ -1946,152 +1946,6 @@ J9::Z::TreeEvaluator::inlineIntrinsicIndexOf(TR::Node * node, TR::CodeGenerator return indexRegister; } -TR::Register* -J9::Z::TreeEvaluator::inlineUTF16BEEncode(TR::Node *node, TR::CodeGenerator *cg) - { - TR::Compilation* comp = cg->comp(); - - // Create the necessary registers - TR::Register* output = cg->gprClobberEvaluate(node->getChild(1)); - TR::Register* input = cg->gprClobberEvaluate(node->getChild(0)); - - TR::Register* inputLen = cg->gprClobberEvaluate(node->getChild(2)); - TR::Register* inputLen8 = cg->allocateRegister(); - - TR::Register* temp1 = cg->allocateRegister(); - TR::Register* temp2 = cg->allocateRegister(); - - // Number of bytes currently translated (also used as a stride register) - TR::Register* translated = cg->allocateRegister(); - - // Convert input length in number of characters to number of bytes - generateRSInstruction(cg, TR::InstOpCode::getShiftLeftLogicalSingleOpCode(), node, inputLen, inputLen, 1); - - // Calculate inputLen8 = inputLen / 8 - generateRSInstruction(cg, TR::InstOpCode::SRLK, node, inputLen8, inputLen, 3); - - // Initialize the number of translated bytes to 0 - generateRREInstruction(cg, TR::InstOpCode::getXORRegOpCode(), node, translated, translated); - - // Create the necessary labels - TR::LabelSymbol * processChar4 = generateLabelSymbol( cg); - TR::LabelSymbol * processChar4End = generateLabelSymbol( cg); - TR::LabelSymbol * processChar1 = generateLabelSymbol( cg); - TR::LabelSymbol * processChar1End = generateLabelSymbol( cg); - TR::LabelSymbol * processChar1Copy = generateLabelSymbol( cg); - - const uint16_t surrogateRange1 = 0xD800; - const uint16_t surrogateRange2 = 0xDFFF; - - const uint32_t surrogateMaskAND = 0xF800F800; - const uint32_t surrogateMaskXOR = 0xD800D800; - - TR::RegisterDependencyConditions* dependencies = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 7, cg); - - // ----------------- Incoming branch ----------------- - - generateS390LabelInstruction(cg, TR::InstOpCode::label, node, processChar4); - processChar4->setStartInternalControlFlow(); - - // Branch to the end if there are no more multiples of 4 chars left to process - generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::getCmpLogicalOpCode(), node, inputLen8, 0, TR::InstOpCode::COND_MASK8, processChar4End, false, false, NULL, dependencies); - - // Load 4 input characters from memory and make a copy - generateRXInstruction(cg, TR::InstOpCode::LG, node, temp1, generateS390MemoryReference(input, translated, 0, cg)); - generateRREInstruction(cg, TR::InstOpCode::LGR, node, temp2, temp1); - - // AND temp2 by the surrogate mask - generateRILInstruction(cg, TR::InstOpCode::NIHF, node, temp2, surrogateMaskAND); - generateRILInstruction(cg, TR::InstOpCode::NILF, node, temp2, surrogateMaskAND); - - // XOR temp2 by the surrogate mask and branch if CC = 1 (meaning there is a surrogate) - generateRILInstruction(cg, TR::InstOpCode::XIHF, node, temp2, surrogateMaskXOR); - generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_CC1, node, processChar4End); - generateRILInstruction(cg, TR::InstOpCode::XILF, node, temp2, surrogateMaskXOR); - generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_CC1, node, processChar4End); - - generateRXInstruction(cg, TR::InstOpCode::STG, node, temp1, generateS390MemoryReference(output, translated, 0, cg)); - - // Advance the number of bytes processed - generateRIInstruction(cg, TR::InstOpCode::getAddHalfWordImmOpCode(), node, translated, 8); - - // Branch back to the start of the loop - generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_MASK15, node, processChar4); - - // ----------------- Incoming branch ----------------- - - generateS390LabelInstruction(cg, TR::InstOpCode::label, node, processChar4End); - processChar4End->setEndInternalControlFlow(); - generateS390LabelInstruction(cg, TR::InstOpCode::label, node, processChar1); - processChar1->setStartInternalControlFlow(); - - // Branch to the end if there are no more characters left to process - generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::getCmpRegOpCode(), node, translated, inputLen, TR::InstOpCode::COND_BNL, processChar1End, false, false); - - // Load an input character from memory - generateRXInstruction(cg, TR::InstOpCode::LLH, node, temp1, generateS390MemoryReference(input, translated, 0, cg)); - - // Compare the input character against the lower bound surrogate character range - generateRILInstruction(cg, TR::InstOpCode::getCmpImmOpCode(), node, temp1, surrogateRange1); - - // Branch if < (non-surrogate char) - generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_MASK4, node, processChar1Copy); - - // Compare the input character against the upper bound surrogate character range - generateRILInstruction(cg, TR::InstOpCode::getCmpImmOpCode(), node, temp1, surrogateRange2); - - // Branch if > (non-surrogate char) - generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_MASK2, node, processChar1Copy); - - // If we get here it must be a surrogate char - generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_MASK15, node, processChar1End); - - // ----------------- Incoming branch ----------------- - - generateS390LabelInstruction(cg, TR::InstOpCode::label, node, processChar1Copy); - - // Store the lower byte of the character into the output buffer - generateRXInstruction (cg, TR::InstOpCode::STH, node, temp1, generateS390MemoryReference(output, translated, 0, cg)); - - // Advance the number of bytes processed - generateRIInstruction(cg, TR::InstOpCode::getAddHalfWordImmOpCode(), node, translated, 2); - - // Branch back to the start of the loop - generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_MASK15, node, processChar1); - - // Set up the proper register dependencies - dependencies->addPostCondition(input, TR::RealRegister::AssignAny); - dependencies->addPostCondition(inputLen, TR::RealRegister::AssignAny); - dependencies->addPostCondition(inputLen8, TR::RealRegister::AssignAny); - dependencies->addPostCondition(temp1, TR::RealRegister::AssignAny); - dependencies->addPostCondition(temp2, TR::RealRegister::AssignAny); - dependencies->addPostCondition(output, TR::RealRegister::AssignAny); - dependencies->addPostCondition(translated, TR::RealRegister::AssignAny); - - // ----------------- Incoming branch ----------------- - - generateS390LabelInstruction(cg, TR::InstOpCode::label, node, processChar1End, dependencies); - processChar1End->setEndInternalControlFlow(); - - // Convert translated length in number of bytes to number of characters - generateRSInstruction(cg, TR::InstOpCode::getShiftRightLogicalSingleOpCode(), node, translated, translated, 1); - - // Cleanup nodes before returning - cg->decReferenceCount(node->getChild(0)); - cg->decReferenceCount(node->getChild(1)); - cg->decReferenceCount(node->getChild(2)); - - // Cleanup registers before returning - cg->stopUsingRegister(input); - cg->stopUsingRegister(inputLen); - cg->stopUsingRegister(inputLen8); - cg->stopUsingRegister(temp1); - cg->stopUsingRegister(temp2); - cg->stopUsingRegister(output); - - return node->setRegister(translated); - } - /** * \brief Generate inline assembly for CRC32C.updateBytes and CRC32C.updateDirectByteBuffer * \details @@ -2544,215 +2398,6 @@ J9::Z::TreeEvaluator::inlineCRC32CUpdateBytes(TR::Node *node, TR::CodeGenerator return crc; } -TR::Register* -J9::Z::TreeEvaluator::inlineUTF16BEEncodeSIMD(TR::Node *node, TR::CodeGenerator *cg) - { - TR::Compilation* comp = cg->comp(); - - // Create the necessary registers - TR::Register* output = cg->gprClobberEvaluate(node->getChild(1)); - TR::Register* input = cg->gprClobberEvaluate(node->getChild(0)); - - TR::Register* inputLen; - TR::Register* inputLen16 = cg->allocateRegister(); - TR::Register* inputLenMinus1 = inputLen16; - - // Number of characters currently translated - TR::Register* translated = cg->allocateRegister(); - - // Initialize the number of translated characters to 0 - generateRREInstruction(cg, TR::InstOpCode::getXORRegOpCode(), node, translated, translated); - - TR::Node* inputLenNode = node->getChild(2); - - // Optimize the constant length case - bool isLenConstant = inputLenNode->getOpCode().isLoadConst() && performTransformation(comp, "O^O [%p] Reduce input length to constant.\n", inputLenNode); - - if (isLenConstant) - { - inputLen = cg->allocateRegister(); - - // Convert input length in number of characters to number of bytes - generateLoad32BitConstant(cg, inputLenNode, ((getIntegralValue(inputLenNode) * 2)), inputLen, true); - generateLoad32BitConstant(cg, inputLenNode, ((getIntegralValue(inputLenNode) * 2) >> 4) << 4, inputLen16, true); - } - else - { - inputLen = cg->gprClobberEvaluate(inputLenNode, true); - - // Convert input length in number of characters to number of bytes - generateRSInstruction(cg, TR::InstOpCode::getShiftLeftLogicalSingleOpCode(), node, inputLen, inputLen, 1); - - // Sign extend the value if needed - if (cg->comp()->target().is64Bit() && !(inputLenNode->getOpCode().isLong())) - { - generateRRInstruction(cg, TR::InstOpCode::getLoadRegWidenOpCode(), node, inputLen, inputLen); - generateRRInstruction(cg, TR::InstOpCode::getLoadRegWidenOpCode(), node, inputLen16, inputLen); - } - else - { - generateRRInstruction(cg, TR::InstOpCode::getLoadRegOpCode(), node, inputLen16, inputLen); - } - - // Truncate the 4 right most bits - generateRIInstruction(cg, TR::InstOpCode::NILL, node, inputLen16, static_cast (0xFFF0)); - } - - // Create the necessary vector registers - TR::Register* vInput = cg->allocateRegister(TR_VRF); - TR::Register* vSurrogate = cg->allocateRegister(TR_VRF); // Track index of first surrogate char - - TR::Register* vRange = cg->allocateRegister(TR_VRF); - TR::Register* vRangeControl = cg->allocateRegister(TR_VRF); - - // Initialize the vector registers - uint16_t surrogateRange1 = 0xD800; - uint16_t surrogateRange2 = 0xDFFF; - - uint16_t surrogateControl1 = 0xA000; // >= comparison - uint16_t surrogateControl2 = 0xC000; // <= comparison - - generateVRIaInstruction(cg, TR::InstOpCode::VGBM, node, vRange, 0, 0 /*unused*/); - generateVRIaInstruction(cg, TR::InstOpCode::VGBM, node, vRangeControl, 0, 0 /*unused*/); - - generateVRIaInstruction(cg, TR::InstOpCode::VLEIH, node, vRange, surrogateRange1, 0); - generateVRIaInstruction(cg, TR::InstOpCode::VLEIH, node, vRange, surrogateRange2, 1); - - generateVRIaInstruction(cg, TR::InstOpCode::VLEIH, node, vRangeControl, surrogateControl1, 0); - generateVRIaInstruction(cg, TR::InstOpCode::VLEIH, node, vRangeControl, surrogateControl2, 1); - - // Create the necessary labels - TR::LabelSymbol * process8Chars = generateLabelSymbol(cg); - TR::LabelSymbol * process8CharsEnd = generateLabelSymbol(cg); - - TR::LabelSymbol * processUnder8Chars = generateLabelSymbol(cg); - TR::LabelSymbol * processUnder8CharsEnd = generateLabelSymbol(cg); - - TR::LabelSymbol * processSurrogate = generateLabelSymbol(cg); - TR::LabelSymbol * processSurrogateEnd = generateLabelSymbol(cg); - - // Branch to the end if there are no more multiples of 8 chars left to process - generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::getCmpLogicalOpCode(), node, inputLen16, 0, TR::InstOpCode::COND_MASK8, process8CharsEnd, false, false); - - // ----------------- Incoming branch ----------------- - - generateS390LabelInstruction(cg, TR::InstOpCode::label, node, process8Chars); - process8Chars->setStartInternalControlFlow(); - - // Load 16 bytes (8 chars) into vector register - generateVRXInstruction(cg, TR::InstOpCode::VL, node, vInput, generateS390MemoryReference(input, translated, 0, cg)); - - // Check for vector surrogates and branch to copy the non-surrogate bytes - generateVRRdInstruction(cg, TR::InstOpCode::VSTRC, node, vSurrogate, vInput, vRange, vRangeControl, 0x1, 1); - generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_CC1, node, processSurrogate); - - // Store the result - generateVRXInstruction(cg, TR::InstOpCode::VST, node, vInput, generateS390MemoryReference(output, translated, 0, cg)); - - // Advance the stride register - generateRIInstruction(cg, TR::InstOpCode::getAddHalfWordImmOpCode(), node, translated, 16); - - // Loop back if there is at least 8 chars left to process - generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::getCmpRegOpCode(), node, translated, inputLen16, TR::InstOpCode::COND_BL, process8Chars, false, false); - - generateS390LabelInstruction(cg, TR::InstOpCode::label, node, process8CharsEnd); - process8CharsEnd->setEndInternalControlFlow(); - - // ----------------- Incoming branch ----------------- - - generateS390LabelInstruction(cg, TR::InstOpCode::label, node, processUnder8Chars); - processUnder8Chars->setStartInternalControlFlow(); - - // Calculate the number of residue bytes available - generateRRInstruction(cg, TR::InstOpCode::getSubstractRegOpCode(), node, inputLen, translated); - - // Branch to the end if there is no residue - generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_CC0, node, processUnder8CharsEnd); - - // VLL and VSTL work on indices so we must subtract 1 - generateRIEInstruction(cg, TR::InstOpCode::getAddLogicalRegRegImmediateOpCode(), node, inputLenMinus1, inputLen, -1); - - // Zero out the input register to avoid invalid VSTRC result - generateVRIaInstruction(cg, TR::InstOpCode::VGBM, node, vInput, 0, 0 /*unused*/); - - // VLL instruction can only handle memory references of type D(B), so increment the base input address - generateRRInstruction (cg, TR::InstOpCode::getAddRegOpCode(), node, input, translated); - - // Load residue bytes into vector register - generateVRSbInstruction(cg, TR::InstOpCode::VLL, node, vInput, inputLenMinus1, generateS390MemoryReference(input, 0, cg)); - - // Check for vector surrogates and branch to copy the non-surrogate bytes - generateVRRdInstruction(cg, TR::InstOpCode::VSTRC, node, vSurrogate, vInput, vRange, vRangeControl, 0x1, 1); - - generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_CC3, node, processSurrogateEnd); - - // ----------------- Incoming branch ----------------- - - generateS390LabelInstruction(cg, TR::InstOpCode::label, node, processSurrogate); - - // Extract the index of the first surrogate char - generateVRScInstruction(cg, TR::InstOpCode::VLGV, node, inputLen, vSurrogate, generateS390MemoryReference(7, cg), 0); - - // Return in the case of saturation at index 0 - generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::getCmpLogicalOpCode(), node, inputLen, 0, TR::InstOpCode::COND_CC0, processUnder8CharsEnd, false, false); - - // VLL and VSTL work on indices so we must subtract 1 - generateRIEInstruction(cg, TR::InstOpCode::getAddLogicalRegRegImmediateOpCode(), node, inputLenMinus1, inputLen, -1); - - // ----------------- Incoming branch ----------------- - - generateS390LabelInstruction(cg, TR::InstOpCode::label, node, processSurrogateEnd); - - // VSTL instruction can only handle memory references of type D(B), so increment the base output address - generateRRInstruction (cg, TR::InstOpCode::getAddRegOpCode(), node, output, translated); - - // Store the result - generateVRSbInstruction(cg, TR::InstOpCode::VSTL, node, vInput, inputLenMinus1, generateS390MemoryReference(output, 0, cg), 0); - - // Advance the stride register - generateRRInstruction(cg, TR::InstOpCode::getAddRegOpCode(), node, translated, inputLen); - - // Set up the proper register dependencies - TR::RegisterDependencyConditions* dependencies = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 9, cg); - - dependencies->addPostCondition(input, TR::RealRegister::AssignAny); - dependencies->addPostCondition(inputLen, TR::RealRegister::AssignAny); - dependencies->addPostCondition(inputLen16, TR::RealRegister::AssignAny); - dependencies->addPostCondition(output, TR::RealRegister::AssignAny); - dependencies->addPostCondition(translated, TR::RealRegister::AssignAny); - - dependencies->addPostCondition(vInput, TR::RealRegister::AssignAny); - dependencies->addPostCondition(vSurrogate, TR::RealRegister::AssignAny); - dependencies->addPostCondition(vRange, TR::RealRegister::AssignAny); - dependencies->addPostCondition(vRangeControl, TR::RealRegister::AssignAny); - - // ----------------- Incoming branch ----------------- - - generateS390LabelInstruction(cg, TR::InstOpCode::label, node, processUnder8CharsEnd, dependencies); - processUnder8CharsEnd->setEndInternalControlFlow(); - - // Convert translated length in number of bytes to number of characters - generateRSInstruction(cg, TR::InstOpCode::getShiftRightLogicalSingleOpCode(), node, translated, translated, 1); - - // Cleanup nodes before returning - cg->decReferenceCount(node->getChild(0)); - cg->decReferenceCount(node->getChild(1)); - cg->decReferenceCount(node->getChild(2)); - - // Cleanup registers before returning - cg->stopUsingRegister(input); - cg->stopUsingRegister(inputLen); - cg->stopUsingRegister(inputLen16); - cg->stopUsingRegister(output); - - cg->stopUsingRegister(vInput); - cg->stopUsingRegister(vSurrogate); - cg->stopUsingRegister(vRange); - cg->stopUsingRegister(vRangeControl); - - return node->setRegister(translated); - } - static TR::Register* hashCodeHelper(TR::Node* node, TR::CodeGenerator* cg, TR::DataType elementType, TR::Node* nodeHash, bool isSigned) { diff --git a/runtime/compiler/z/codegen/J9TreeEvaluator.hpp b/runtime/compiler/z/codegen/J9TreeEvaluator.hpp index 5d86a0ab155..e9b6d522a39 100644 --- a/runtime/compiler/z/codegen/J9TreeEvaluator.hpp +++ b/runtime/compiler/z/codegen/J9TreeEvaluator.hpp @@ -138,8 +138,6 @@ class OMR_EXTENSIBLE TreeEvaluator: public J9::TreeEvaluator */ static TR::Register *inlineStringHashCode(TR::Node *node, TR::CodeGenerator *cg, bool isCompressed); static TR::Register *inlineVectorizedHashCode(TR::Node* node, TR::CodeGenerator* cg); - static TR::Register *inlineUTF16BEEncodeSIMD(TR::Node *node, TR::CodeGenerator *cg); - static TR::Register* inlineUTF16BEEncode (TR::Node *node, TR::CodeGenerator *cg); static TR::Register *inlineCRC32CUpdateBytes(TR::Node *node, TR::CodeGenerator *cg, bool isDirectBuffer); static TR::Register *zdloadEvaluator(TR::Node *node, TR::CodeGenerator *cg);