Skip to content

8355644: Optimize Math.cbrt for AArch64 platforms with an intrinsic implementation #25085

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 5 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions src/hotspot/cpu/aarch64/assembler_aarch64.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3192,6 +3192,20 @@ template<typename R, typename... Rx>
f(0b000001, 15, 10), rf(Vn, 5), rf(Vd, 0);
}

void ins(FloatRegister Vd, int index1, SIMD_RegVariant T, FloatRegister Vs, int index2) {
starti;
assert(T != Q, "invalid size");
// switch(T) {
// case D: f(index1, 20); f(0b1000, 19, 16); break;
// case S: f(index1, 20, 19); f(0b100, 18, 16); break;
// default: ShouldNotReachHere(); break;
// }
f(0b01101110000, 31, 21);
f((1 << T) | (index1 << (T + 1)), 20, 16);
f(0, 15), f(index2, 14, 11);
f(1, 10), rf(Vs, 5), rf(Vd, 0);
}

// Advanced SIMD scalar copy
void dup(FloatRegister Vd, SIMD_RegVariant T, FloatRegister Vn, int index = 0)
{
Expand Down
8 changes: 7 additions & 1 deletion src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -750,7 +750,7 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
if (x->id() == vmIntrinsics::_dexp || x->id() == vmIntrinsics::_dlog ||
x->id() == vmIntrinsics::_dpow || x->id() == vmIntrinsics::_dcos ||
x->id() == vmIntrinsics::_dsin || x->id() == vmIntrinsics::_dtan ||
x->id() == vmIntrinsics::_dlog10) {
x->id() == vmIntrinsics::_dlog10 || x->id() == vmIntrinsics::_dcbrt) {
do_LibmIntrinsic(x);
return;
}
Expand Down Expand Up @@ -867,6 +867,12 @@ void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) {
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), getThreadTemp(), result_reg, cc->args());
}
break;
case vmIntrinsics::_dcbrt:
assert(StubRoutines::dcbrt() != nullptr, "cbrt intrinsic not found");
if (StubRoutines::dcbrt() != nullptr) {
__ call_runtime_leaf(StubRoutines::dcbrt(), getThreadTemp(), result_reg, cc->args());
}
break;
default: ShouldNotReachHere();
}
__ move(result_reg, calc_result);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,8 @@ void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm,
assert(dst != rscratch2, "need rscratch2");
assert_different_registers(load_addr.base(), load_addr.index(), rscratch1, rscratch2);

__ block_comment("load_reference_barrier {");

bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators);
bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators);
bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
Expand Down Expand Up @@ -306,6 +308,8 @@ void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm,

__ bind(heap_stable);
__ leave();

__ block_comment("} load_reference_barrier");
}

//
Expand Down
14 changes: 14 additions & 0 deletions src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,8 @@ class MacroAssembler: public Assembler {
void membar(Membar_mask_bits order_constraint);

using Assembler::ldr;
using Assembler::ldrd;
using Assembler::ldrq;
using Assembler::str;
using Assembler::ldrw;
using Assembler::strw;
Expand All @@ -176,6 +178,15 @@ class MacroAssembler: public Assembler {
void str(Register Rx, const Address &adr);
void strw(Register Rx, const Address &adr);

void ldrd(FloatRegister f, Address adr, Register scratch) {
lea(scratch, adr);
ldrd(f, Address(scratch, 0));
}
void ldrq(FloatRegister f, Address adr, Register scratch) {
lea(scratch, adr);
ldrq(f, Address(scratch, 0));
}

// Frame creation and destruction shared between JITs.
void build_frame(int framesize);
void remove_frame(int framesize);
Expand Down Expand Up @@ -1531,13 +1542,16 @@ class MacroAssembler: public Assembler {

void generate_dsin_dcos(bool isCos, address npio2_hw, address two_over_pi,
address pio2, address dsin_coef, address dcos_coef);
void generate_libmCbrt();

private:
// begin trigonometric functions support block
void generate__ieee754_rem_pio2(address npio2_hw, address two_over_pi, address pio2);
void generate__kernel_rem_pio2(address two_over_pi, address pio2);
void generate_kernel_sin(FloatRegister x, bool iyIsOne, address dsin_coef);
void generate_kernel_cos(FloatRegister x, address dcos_coef);
// end trigonometric functions support block

void add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo,
Register src1, Register src2);
void add2_with_carry(Register dest_hi, Register dest_lo, Register src1, Register src2) {
Expand Down
Loading