Skip to content

Commit

Permalink
[GR-60402] Add vzeroupper upon the entrance of AMD64 sha1 and sha256 …
Browse files Browse the repository at this point in the history
…stubs.

PullRequest: graal/19671
  • Loading branch information
mur47x111 committed Dec 20, 2024
1 parent 0fa84cb commit 7eb97e5
Show file tree
Hide file tree
Showing 2 changed files with 106 additions and 40 deletions.
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2023, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand All @@ -24,8 +24,16 @@
*/
package jdk.graal.compiler.lir.amd64;

import static jdk.graal.compiler.lir.amd64.AMD64LIRHelper.pointerConstant;
import static jdk.graal.compiler.lir.amd64.AMD64LIRHelper.recordExternalAddress;
import static jdk.vm.ci.amd64.AMD64.xmm0;
import static jdk.vm.ci.amd64.AMD64.xmm1;
import static jdk.vm.ci.amd64.AMD64.xmm10;
import static jdk.vm.ci.amd64.AMD64.xmm11;
import static jdk.vm.ci.amd64.AMD64.xmm12;
import static jdk.vm.ci.amd64.AMD64.xmm13;
import static jdk.vm.ci.amd64.AMD64.xmm14;
import static jdk.vm.ci.amd64.AMD64.xmm15;
import static jdk.vm.ci.amd64.AMD64.xmm2;
import static jdk.vm.ci.amd64.AMD64.xmm3;
import static jdk.vm.ci.amd64.AMD64.xmm4;
Expand All @@ -35,20 +43,18 @@
import static jdk.vm.ci.amd64.AMD64.xmm8;
import static jdk.vm.ci.amd64.AMD64.xmm9;
import static jdk.vm.ci.code.ValueUtil.asRegister;
import static jdk.graal.compiler.lir.amd64.AMD64LIRHelper.pointerConstant;
import static jdk.graal.compiler.lir.amd64.AMD64LIRHelper.recordExternalAddress;

import jdk.graal.compiler.asm.Label;
import jdk.graal.compiler.asm.amd64.AMD64Address;
import jdk.graal.compiler.asm.amd64.AMD64Assembler.ConditionFlag;
import jdk.graal.compiler.asm.amd64.AMD64MacroAssembler;
import jdk.graal.compiler.core.amd64.AMD64LIRGenerator;
import jdk.graal.compiler.debug.GraalError;
import jdk.graal.compiler.lir.LIRInstructionClass;
import jdk.graal.compiler.lir.SyncPort;
import jdk.graal.compiler.lir.asm.ArrayDataPointerConstant;
import jdk.graal.compiler.lir.asm.CompilationResultBuilder;
import jdk.graal.compiler.lir.gen.LIRGeneratorTool;

import jdk.vm.ci.amd64.AMD64.CPUFeature;
import jdk.vm.ci.amd64.AMD64Kind;
import jdk.vm.ci.code.Register;
import jdk.vm.ci.meta.AllocatableValue;
Expand Down Expand Up @@ -76,11 +82,11 @@ public final class AMD64SHA1Op extends AMD64LIRInstruction {
@Temp({OperandFlag.REG}) private Value[] temps;
private final boolean multiBlock;

public AMD64SHA1Op(LIRGeneratorTool tool, AllocatableValue bufValue, AllocatableValue stateValue) {
public AMD64SHA1Op(AMD64LIRGenerator tool, AllocatableValue bufValue, AllocatableValue stateValue) {
this(tool, bufValue, stateValue, Value.ILLEGAL, Value.ILLEGAL, Value.ILLEGAL, false);
}

public AMD64SHA1Op(LIRGeneratorTool tool, AllocatableValue bufValue, AllocatableValue stateValue, AllocatableValue ofsValue,
public AMD64SHA1Op(AMD64LIRGenerator tool, AllocatableValue bufValue, AllocatableValue stateValue, AllocatableValue ofsValue,
AllocatableValue limitValue, AllocatableValue resultValue, boolean multiBlock) {
super(TYPE);

Expand All @@ -92,18 +98,40 @@ public AMD64SHA1Op(LIRGeneratorTool tool, AllocatableValue bufValue, Allocatable

this.multiBlock = multiBlock;

this.temps = new Value[]{
xmm0.asValue(),
xmm1.asValue(),
xmm2.asValue(),
xmm3.asValue(),
xmm4.asValue(),
xmm5.asValue(),
xmm6.asValue(),
xmm7.asValue(),
xmm8.asValue(),
xmm9.asValue(),
};
if (tool.supportsCPUFeature(CPUFeature.AVX)) {
// vzeroupper clears upper bits of xmm0-xmm15
this.temps = new Value[]{
xmm0.asValue(),
xmm1.asValue(),
xmm2.asValue(),
xmm3.asValue(),
xmm4.asValue(),
xmm5.asValue(),
xmm6.asValue(),
xmm7.asValue(),
xmm8.asValue(),
xmm9.asValue(),
xmm10.asValue(),
xmm11.asValue(),
xmm12.asValue(),
xmm13.asValue(),
xmm14.asValue(),
xmm15.asValue(),
};
} else {
this.temps = new Value[]{
xmm0.asValue(),
xmm1.asValue(),
xmm2.asValue(),
xmm3.asValue(),
xmm4.asValue(),
xmm5.asValue(),
xmm6.asValue(),
xmm7.asValue(),
xmm8.asValue(),
xmm9.asValue(),
};
}

if (multiBlock) {
this.bufTempValue = tool.newVariable(bufValue.getValueKind());
Expand Down Expand Up @@ -168,6 +196,12 @@ public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
Label labelDoneHash = new Label();
Label labelLoop0 = new Label();

if (masm.supports(CPUFeature.AVX)) {
// Insert vzeroupper here to avoid performance penalty of SSE-AVX transition between
// previously executed AVX instructions and the following SHA-1 instructions.
masm.vzeroupper();
}

masm.movdqu(abcd, new AMD64Address(state, 0));
masm.pinsrd(e0, new AMD64Address(state, 16), 3);
masm.movdqu(shufMask, recordExternalAddress(crb, upperWordMask));
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2023, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand All @@ -24,9 +24,17 @@
*/
package jdk.graal.compiler.lir.amd64;

import static jdk.graal.compiler.asm.amd64.AMD64Assembler.ConditionFlag.BelowEqual;
import static jdk.graal.compiler.lir.amd64.AMD64LIRHelper.pointerConstant;
import static jdk.graal.compiler.lir.amd64.AMD64LIRHelper.recordExternalAddress;
import static jdk.vm.ci.amd64.AMD64.xmm0;
import static jdk.vm.ci.amd64.AMD64.xmm1;
import static jdk.vm.ci.amd64.AMD64.xmm10;
import static jdk.vm.ci.amd64.AMD64.xmm11;
import static jdk.vm.ci.amd64.AMD64.xmm12;
import static jdk.vm.ci.amd64.AMD64.xmm13;
import static jdk.vm.ci.amd64.AMD64.xmm14;
import static jdk.vm.ci.amd64.AMD64.xmm15;
import static jdk.vm.ci.amd64.AMD64.xmm2;
import static jdk.vm.ci.amd64.AMD64.xmm3;
import static jdk.vm.ci.amd64.AMD64.xmm4;
Expand All @@ -36,20 +44,17 @@
import static jdk.vm.ci.amd64.AMD64.xmm8;
import static jdk.vm.ci.amd64.AMD64.xmm9;
import static jdk.vm.ci.code.ValueUtil.asRegister;
import static jdk.graal.compiler.asm.amd64.AMD64Assembler.ConditionFlag.BelowEqual;
import static jdk.graal.compiler.lir.amd64.AMD64LIRHelper.pointerConstant;
import static jdk.graal.compiler.lir.amd64.AMD64LIRHelper.recordExternalAddress;

import jdk.graal.compiler.asm.Label;
import jdk.graal.compiler.asm.amd64.AMD64Address;
import jdk.graal.compiler.asm.amd64.AMD64MacroAssembler;
import jdk.graal.compiler.core.amd64.AMD64LIRGenerator;
import jdk.graal.compiler.debug.GraalError;
import jdk.graal.compiler.lir.LIRInstructionClass;
import jdk.graal.compiler.lir.SyncPort;
import jdk.graal.compiler.lir.asm.ArrayDataPointerConstant;
import jdk.graal.compiler.lir.asm.CompilationResultBuilder;
import jdk.graal.compiler.lir.gen.LIRGeneratorTool;

import jdk.vm.ci.amd64.AMD64.CPUFeature;
import jdk.vm.ci.amd64.AMD64Kind;
import jdk.vm.ci.code.Register;
import jdk.vm.ci.meta.AllocatableValue;
Expand Down Expand Up @@ -79,11 +84,11 @@ public final class AMD64SHA256Op extends AMD64LIRInstruction {

private final boolean multiBlock;

public AMD64SHA256Op(LIRGeneratorTool tool, AllocatableValue bufValue, AllocatableValue stateValue) {
public AMD64SHA256Op(AMD64LIRGenerator tool, AllocatableValue bufValue, AllocatableValue stateValue) {
this(tool, bufValue, stateValue, Value.ILLEGAL, Value.ILLEGAL, Value.ILLEGAL, false);
}

public AMD64SHA256Op(LIRGeneratorTool tool, AllocatableValue bufValue, AllocatableValue stateValue, AllocatableValue ofsValue,
public AMD64SHA256Op(AMD64LIRGenerator tool, AllocatableValue bufValue, AllocatableValue stateValue, AllocatableValue ofsValue,
AllocatableValue limitValue, AllocatableValue resultValue, boolean multiBlock) {
super(TYPE);

Expand All @@ -97,19 +102,40 @@ public AMD64SHA256Op(LIRGeneratorTool tool, AllocatableValue bufValue, Allocatab

this.keyTempValue = tool.newVariable(bufValue.getValueKind());

this.temps = new Value[]{
xmm0.asValue(),
xmm1.asValue(),
xmm2.asValue(),
xmm3.asValue(),
xmm4.asValue(),
xmm5.asValue(),
xmm6.asValue(),
xmm7.asValue(),
xmm8.asValue(),
xmm9.asValue(),
xmm10.asValue(),
};
if (tool.supportsCPUFeature(CPUFeature.AVX)) {
// vzeroupper clears upper bits of xmm0-xmm15
this.temps = new Value[]{
xmm0.asValue(),
xmm1.asValue(),
xmm2.asValue(),
xmm3.asValue(),
xmm4.asValue(),
xmm5.asValue(),
xmm6.asValue(),
xmm7.asValue(),
xmm8.asValue(),
xmm9.asValue(),
xmm10.asValue(),
xmm11.asValue(),
xmm12.asValue(),
xmm13.asValue(),
xmm14.asValue(),
xmm15.asValue(),
};
} else {
this.temps = new Value[]{
xmm0.asValue(),
xmm1.asValue(),
xmm2.asValue(),
xmm3.asValue(),
xmm4.asValue(),
xmm5.asValue(),
xmm6.asValue(),
xmm7.asValue(),
xmm8.asValue(),
xmm9.asValue(),
};
}

if (multiBlock) {
this.bufTempValue = tool.newVariable(bufValue.getValueKind());
Expand Down Expand Up @@ -199,6 +225,12 @@ public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
// keyTemp replaces the hardcoded rax in the original stub.
Register keyTemp = asRegister(keyTempValue);

if (masm.supports(CPUFeature.AVX)) {
// Insert vzeroupper here to avoid performance penalty of SSE-AVX transition between
// previously executed AVX instructions and the following SHA-256 instructions.
masm.vzeroupper();
}

masm.movdqu(state0, new AMD64Address(state, 0));
masm.movdqu(state1, new AMD64Address(state, 16));

Expand Down

0 comments on commit 7eb97e5

Please sign in to comment.