diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 118d7b75b320e1..a1a4c657031cce 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -8581,6 +8581,143 @@ void Lowering::LowerShift(GenTreeOp* shift) shift->gtOp2->ClearContained(); } + if (comp->opts.OptimizationEnabled() && shift->OperIs(GT_LSH, GT_RSH, GT_RSZ) && shift->gtGetOp2()->IsCnsIntOrI()) + { + GenTree* op1 = shift->gtGetOp1(); + ssize_t c2 = shift->gtGetOp2()->AsIntCon()->IconValue(); + unsigned bitWidth = genTypeSize(shift->TypeGet()) * 8; + + // Case 1: (shift (shift x c1) c2) + // We can combine if: + // 1. Same operation (LSH/LSH, RSH/RSH, RSZ/RSZ) + bool sameOp = op1->OperIs(shift->OperGet()); + + if (sameOp && op1->gtGetOp2()->IsCnsIntOrI() && !op1->IsMultiRegNode()) + { + ssize_t c1 = op1->gtGetOp2()->AsIntCon()->IconValue(); + unsigned innerBitWidth = genTypeSize(op1->TypeGet()) * 8; + + // Only optimize if types match (simplifies width checks) + if (op1->TypeGet() == shift->TypeGet()) + { + // We use a larger type to check for overflow (though shift counts likely small) + // But conceptually c1+c2 can be large. + ssize_t combined = c1 + c2; + + if ((c1 > 0) && (c2 > 0)) + { + if (combined < (ssize_t)bitWidth) + { + JITDUMP("Optimizing consecutive shifts: (x %s %d) %s %d -> x %s %d\n", + GenTree::OpName(op1->OperGet()), (int)c1, GenTree::OpName(shift->OperGet()), (int)c2, + GenTree::OpName(shift->OperGet()), (int)combined); + + shift->gtGetOp2()->AsIntCon()->SetIconValue(combined); + shift->gtOp1 = op1->gtGetOp1(); + op1->gtGetOp1()->ClearContained(); + BlockRange().Remove(op1->gtGetOp2()); + BlockRange().Remove(op1); + } + else + { + // Overshift Case + JITDUMP("Optimizing overshift: (x %s %d) %s %d\n", GenTree::OpName(op1->OperGet()), (int)c1, + GenTree::OpName(shift->OperGet()), (int)c2); + + if (shift->OperIs(GT_RSH)) + { + // RSH saturates to sign bit (shift by bitWidth - 1) + // (x >> 30) >> 30 -> x >> 31 (for 32-bit) + JITDUMP(" -> x >> %d\n", bitWidth - 1); + + shift->gtGetOp2()->AsIntCon()->SetIconValue(static_cast(bitWidth) - 1); + shift->gtOp1 = op1->gtGetOp1(); + op1->gtGetOp1()->ClearContained(); + BlockRange().Remove(op1->gtGetOp2()); + BlockRange().Remove(op1); + } + else + { + // LSH or RSZ -> 0 + // (x << 30) << 2 -> 0 + // (x >>> 30) >>> 2 -> 0 + JITDUMP(" -> 0\n"); + + GenTree* zero = comp->gtNewZeroConNode(shift->TypeGet()); + BlockRange().InsertAfter(shift, zero); + + LIR::Use use; + if (BlockRange().TryGetUse(shift, &use)) + { + use.ReplaceWith(zero); + } + else + { + zero->SetUnusedValue(); + } + + // Remove the entire chain if possible, or at least the outer shift + // Note: op1 might still be used elsewhere if ref counts > 1? + // But peephole assumes single use usually or we just disconnect. + // The LIR::Use check handles the result use. + // We remove 'shift' and its op2. + BlockRange().Remove(shift->gtGetOp2()); + BlockRange().Remove(shift); + + // We don't remove op1 here as it might be used elsewhere (unlikely in this peephole context but safer) + // Actually if we disconnect it from shift, and it has no other uses... + // But let's leave DCE to handle op1 if it becomes dead. + } + } + } + } + } + // Case 2: (shift (cast (shift x c1)) c2) + // Optimization for: RSZ(CAST(RSZ(x, c1)), c2) -> CAST(RSZ(x, c1 + c2)) + else if (shift->OperIs(GT_RSZ) && op1->OperIs(GT_CAST) && !op1->gtOverflow() && !op1->IsMultiRegNode()) + { + GenTree* cast = op1; + GenTree* innerShift = cast->gtGetOp1(); + + // Only optimize if strict widening or same width (narrowing casts can have side effects on bits) + // Example: (long)(intVar >>> 30) >>> 2 + // If normal: (long)(00...011) >>> 2 = 0 + // If combined: (long)(intVar) >>> 32 = 0 (maybe?) + // But: (short)(intVar >>> 16) >>> 1 + // Real: (short)(0x....1234) -> 0x1234 -> 0x091a + // Combined: (short)(intVar >>> 17) -> 0x091a + // However, truncation behavior is subtle. + // Prompt requested: "Ensure cast doesn't change signedness or truncate in a way that invalidates the optimization" + // Safest is to disable for narrowing. + bool isNarrowing = genTypeSize(cast->TypeGet()) < genTypeSize(innerShift->TypeGet()); + + if (!isNarrowing && innerShift->OperIs(GT_RSZ) && innerShift->gtGetOp2()->IsCnsIntOrI() && !innerShift->IsMultiRegNode()) + { + ssize_t c1 = innerShift->gtGetOp2()->AsIntCon()->IconValue(); + unsigned innerBitWidth = genTypeSize(innerShift->TypeGet()) * 8; + + if ((c1 > 0) && (c2 > 0) && ((c1 + c2) < (ssize_t)innerBitWidth)) + { + JITDUMP("Optimizing distinct type shifts: (cast (x >> %d)) >> %d -> cast (x >> %d)\n", (int)c1, + (int)c2, (int)(c1 + c2)); + + innerShift->gtGetOp2()->AsIntCon()->SetIconValue(c1 + c2); + + // Replace uses of 'shift' with 'cast', bypassing 'shift' + LIR::Use use; + if (BlockRange().TryGetUse(shift, &use)) + { + use.ReplaceWith(cast); + } + + // Remove 'c2' and turn 'shift' into NOP + BlockRange().Remove(shift->gtGetOp2()); + shift->gtBashToNOP(); + } + } + } + } + ContainCheckShiftRotate(shift); #ifdef TARGET_ARM64 diff --git a/src/installer/pkg/sfx/Directory.Build.props b/src/installer/pkg/sfx/Directory.Build.props index 04c7177930cd61..ef62cf7c25d50f 100644 --- a/src/installer/pkg/sfx/Directory.Build.props +++ b/src/installer/pkg/sfx/Directory.Build.props @@ -14,8 +14,8 @@ true - true - true + true + true true