-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[CIR] Recognize constant aggregate initialization of auto vars #166850
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
This adds code that was previously missing from emitAutoVarAlloca to identify when an aggregate auto var is being emitted with a constant initializer. This allows the initialization to be optimized and in some cases default initialization can be missed without this.
|
@llvm/pr-subscribers-clangir Author: Andy Kaylor (andykaylor) ChangesThis adds code that was previously missing from emitAutoVarAlloca to identify when an aggregate auto var is being emitted with a constant initializer, and the associated code that is called from emitAutoVarInit to store the constant. This allows significantly more efficient initialization. Patch is 529.23 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/166850.diff 26 Files Affected:
diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h
index 48ef8be9fb782..60b47303e866a 100644
--- a/clang/include/clang/CIR/MissingFeatures.h
+++ b/clang/include/clang/CIR/MissingFeatures.h
@@ -187,6 +187,7 @@ struct MissingFeatures {
// Misc
static bool abiArgInfo() { return false; }
+ static bool addAutoInitAnnotation() { return false; }
static bool addHeapAllocSiteMetadata() { return false; }
static bool aggEmitFinalDestCopyRValue() { return false; }
static bool aggValueSlot() { return false; }
@@ -196,6 +197,7 @@ struct MissingFeatures {
static bool aggValueSlotMayOverlap() { return false; }
static bool aggValueSlotVolatile() { return false; }
static bool alignCXXRecordDecl() { return false; }
+ static bool appleKext() { return false; }
static bool armComputeVolatileBitfields() { return false; }
static bool asmGoto() { return false; }
static bool asmInputOperands() { return false; }
@@ -241,6 +243,7 @@ struct MissingFeatures {
static bool deleteArray() { return false; }
static bool devirtualizeDestructor() { return false; }
static bool devirtualizeMemberFunction() { return false; }
+ static bool dtorCleanups() { return false; }
static bool ehCleanupFlags() { return false; }
static bool ehCleanupHasPrebranchedFallthrough() { return false; }
static bool ehCleanupScope() { return false; }
@@ -286,6 +289,7 @@ struct MissingFeatures {
static bool objCGC() { return false; }
static bool objCLifetime() { return false; }
static bool hlsl() { return false; }
+ static bool msvcBuiltins() { return false; }
static bool openCL() { return false; }
static bool openMP() { return false; }
static bool opTBAA() { return false; }
@@ -300,6 +304,10 @@ struct MissingFeatures {
static bool setNonGC() { return false; }
static bool setObjCGCLValueClass() { return false; }
static bool setTargetAttributes() { return false; }
+ static bool shouldCreateMemCpyFromGlobal() { return false; }
+ static bool shouldSplitConstantStore() { return false; }
+ static bool shouldUseBZeroPlusStoresToInitialize() { return false; }
+ static bool shouldUseMemSetToInitialize() { return false; }
static bool simplifyCleanupEntry() { return false; }
static bool sourceLanguageCases() { return false; }
static bool stackBase() { return false; }
@@ -311,16 +319,14 @@ struct MissingFeatures {
static bool thunks() { return false; }
static bool tryEmitAsConstant() { return false; }
static bool typeChecks() { return false; }
- static bool weakRefReference() { return false; }
- static bool writebacks() { return false; }
- static bool appleKext() { return false; }
- static bool dtorCleanups() { return false; }
+ static bool vaArgABILowering() { return false; }
+ static bool vectorConstants() { return false; }
+ static bool vlas() { return false; }
static bool vtableInitialization() { return false; }
static bool vtableEmitMetadata() { return false; }
static bool vtableRelativeLayout() { return false; }
- static bool msvcBuiltins() { return false; }
- static bool vaArgABILowering() { return false; }
- static bool vlas() { return false; }
+ static bool weakRefReference() { return false; }
+ static bool writebacks() { return false; }
// Missing types
static bool dataMemberType() { return false; }
diff --git a/clang/lib/CIR/CodeGen/CIRGenDecl.cpp b/clang/lib/CIR/CodeGen/CIRGenDecl.cpp
index aeea0efeb77c3..325875d10d6ea 100644
--- a/clang/lib/CIR/CodeGen/CIRGenDecl.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenDecl.cpp
@@ -50,6 +50,41 @@ CIRGenFunction::emitAutoVarAlloca(const VarDecl &d,
Address address = Address::invalid();
if (ty->isConstantSizeType()) {
+ // If this value is an array, struct, or vector with a statically
+ // determinable constant initializer, there are optimizations we can do.
+ //
+ // TODO: We should constant-evaluate the initializer of any variable,
+ // as long as it is initialized by a constant expression. Currently,
+ // isConstantInitializer produces wrong answers for structs with
+ // reference or bitfield members, and a few other cases, and checking
+ // for POD-ness protects us from some of these.
+ if (d.getInit() &&
+ (ty->isArrayType() || ty->isRecordType() || ty->isVectorType()) &&
+ (d.isConstexpr() ||
+ ((ty.isPODType(getContext()) ||
+ getContext().getBaseElementType(ty)->isObjCObjectPointerType()) &&
+ d.getInit()->isConstantInitializer(getContext(), false)))) {
+
+ // If the variable's a const type, and it's neither an NRVO
+ // candidate nor a __block variable and has no mutable members,
+ // emit it as a global instead.
+ // Exception is if a variable is located in non-constant address space
+ // in OpenCL.
+ // TODO(cir): perhaps we don't need this at all at CIR since this can
+ // be done as part of lowering down to LLVM.
+ bool needsDtor =
+ d.needsDestruction(getContext()) == QualType::DK_cxx_destructor;
+ if ((!getContext().getLangOpts().OpenCL ||
+ ty.getAddressSpace() == LangAS::opencl_constant) &&
+ (cgm.getCodeGenOpts().MergeAllConstants && !nrvo &&
+ !d.isEscapingByref() &&
+ ty.isConstantStorage(getContext(), true, !needsDtor))) {
+ cgm.errorNYI(d.getSourceRange(), "emitAutoVarAlloca: type constant");
+ }
+ // Otherwise, tell the initialization code that we're in this case.
+ emission.isConstantAggregate = true;
+ }
+
// A normal fixed sized variable becomes an alloca in the entry block,
// unless:
// - it's an NRVO variable.
@@ -131,6 +166,47 @@ bool CIRGenFunction::isTrivialInitializer(const Expr *init) {
return false;
}
+static void emitStoresForConstant(CIRGenModule &cgm, const VarDecl &d,
+ Address addr, bool isVolatile,
+ CIRGenBuilderTy &builder,
+ mlir::TypedAttr constant) {
+ mlir::Type ty = constant.getType();
+ cir::CIRDataLayout layout{cgm.getModule()};
+ uint64_t constantSize = layout.getTypeAllocSize(ty);
+ if (!constantSize)
+ return;
+ assert(!cir::MissingFeatures::addAutoInitAnnotation());
+ assert(!cir::MissingFeatures::vectorConstants());
+ assert(!cir::MissingFeatures::shouldUseBZeroPlusStoresToInitialize());
+ assert(!cir::MissingFeatures::shouldUseMemSetToInitialize());
+ assert(!cir::MissingFeatures::shouldSplitConstantStore());
+ assert(!cir::MissingFeatures::shouldCreateMemCpyFromGlobal());
+ // In CIR we want to emit a store for the whole thing, later lowering
+ // prepare to LLVM should unwrap this into the best policy (see asserts
+ // above).
+ //
+ // FIXME(cir): This is closer to memcpy behavior but less optimal, instead of
+ // copy from a global, we just create a cir.const out of it.
+
+ if (addr.getElementType() != ty)
+ addr = addr.withElementType(builder, ty);
+
+ // If the address is an alloca, set the init attribute.
+ // The address is usually and alloca, but there is at least one case where
+ // emitAutoVarInit is called from the OpenACC codegen with an address that
+ // is not an alloca.
+ auto allocaOp = addr.getDefiningOp<cir::AllocaOp>();
+ if (allocaOp)
+ allocaOp.setInitAttr(mlir::UnitAttr::get(&cgm.getMLIRContext()));
+
+ // There are cases where OpenACC codegen calls emitAutoVarInit with a
+ // temporary decl that doesn't have a source range set.
+ mlir::Location loc = builder.getUnknownLoc();
+ if (d.getSourceRange().isValid())
+ loc = cgm.getLoc(d.getSourceRange());
+ builder.createStore(loc, builder.getConstant(loc, constant), addr);
+}
+
void CIRGenFunction::emitAutoVarInit(
const CIRGenFunction::AutoVarEmission &emission) {
assert(emission.variable && "emission was not valid!");
@@ -237,6 +313,9 @@ void CIRGenFunction::emitAutoVarInit(
return emitStoreThroughLValue(
RValue::get(builder.getConstant(initLoc, typedConstant)), lv);
}
+
+ emitStoresForConstant(cgm, d, addr, type.isVolatileQualified(), builder,
+ typedConstant);
}
void CIRGenFunction::emitAutoVarCleanups(
diff --git a/clang/test/CIR/CodeGen/agg-expr-lvalue.c b/clang/test/CIR/CodeGen/agg-expr-lvalue.c
index c826f8fa829d0..509f0218e9912 100644
--- a/clang/test/CIR/CodeGen/agg-expr-lvalue.c
+++ b/clang/test/CIR/CodeGen/agg-expr-lvalue.c
@@ -95,16 +95,13 @@ void test_string_array_in_array(void) {
}
// CIR-LABEL: cir.func{{.*}} @test_string_array_in_array
-// CIR: cir.alloca !cir.array<!cir.array<!s8i x 6> x 2>, {{.*}}, ["matrix", init]
-// CIR: cir.get_global
-// CIR: cir.copy
-// CIR: cir.get_global
-// CIR: cir.copy
+// CIR: %[[MATRIX:.*]] = cir.alloca !cir.array<!cir.array<!s8i x 6> x 2>, {{.*}}, ["matrix", init]
+// CIR: %[[CONST:.*]] = cir.const #cir.const_array<[#cir.const_array<[#cir.int<104> : !s8i, #cir.int<101> : !s8i, #cir.int<108> : !s8i, #cir.int<108> : !s8i, #cir.int<111> : !s8i, #cir.int<0> : !s8i]> : !cir.array<!s8i x 6>, #cir.const_array<[#cir.int<119> : !s8i, #cir.int<111> : !s8i, #cir.int<114> : !s8i, #cir.int<108> : !s8i, #cir.int<100> : !s8i, #cir.int<0> : !s8i]> : !cir.array<!s8i x 6>]>
+// CIR: cir.store{{.*}} %[[CONST]], %[[MATRIX]]
// LLVM-LABEL: define{{.*}} @test_string_array_in_array
-// LLVM: alloca [2 x [6 x i8]]
-// LLVM: call void @llvm.memcpy
-// LLVM: call void @llvm.memcpy
+// LLVM: %[[MATRIX:.*]] = alloca [2 x [6 x i8]]
+// LLVM: store [2 x [6 x i8]] {{\[}}[6 x i8] c"hello\00", [6 x i8] c"world\00"], ptr %[[MATRIX]]
// OGCG-LABEL: define{{.*}} @test_string_array_in_array
// OGCG: alloca [2 x [6 x i8]]
diff --git a/clang/test/CIR/CodeGen/array.cpp b/clang/test/CIR/CodeGen/array.cpp
index 82add4b347e72..5e873810d494b 100644
--- a/clang/test/CIR/CodeGen/array.cpp
+++ b/clang/test/CIR/CodeGen/array.cpp
@@ -151,50 +151,12 @@ void func2() {
}
// CIR: %[[ARR2:.*]] = cir.alloca !cir.array<!s32i x 2>, !cir.ptr<!cir.array<!s32i x 2>>, ["arr", init]
-// CIR: %[[ARR_PTR:.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["arrayinit.temp", init]
-// CIR: %[[ARR_0:.*]] = cir.cast array_to_ptrdecay %[[ARR2]] : !cir.ptr<!cir.array<!s32i x 2>> -> !cir.ptr<!s32i>
-// CIR: %[[FIVE:.*]] = cir.const #cir.int<5> : !s32i
-// CIR: cir.store{{.*}} %[[FIVE]], %[[ARR_0]] : !s32i, !cir.ptr<!s32i>
-// CIR: %[[OFFSET_0:.*]] = cir.const #cir.int<1> : !s64i
-// CIR: %[[ELE_PTR:.*]] = cir.ptr_stride %[[ARR_0]], %[[OFFSET_0]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i>
-// CIR: cir.store{{.*}} %[[ELE_PTR]], %[[ARR_PTR]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
-// CIR: %[[TWO:.*]] = cir.const #cir.int<2> : !s64i
-// CIR: %[[ARR_END:.*]] = cir.ptr_stride %[[ARR_0]], %[[TWO]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i>
-// CIR: cir.do {
-// CIR: %[[ARR_CUR:.*]] = cir.load{{.*}} %[[ARR_PTR]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
-// CIR: %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i
-// CIR: cir.store{{.*}} %[[ZERO]], %[[ARR_CUR]] : !s32i, !cir.ptr<!s32i>
-// CIR: %[[ONE:.*]] = cir.const #cir.int<1> : !s64i
-// CIR: %[[ARR_NEXT:.*]] = cir.ptr_stride %[[ARR_CUR]], %[[ONE]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i>
-// CIR: cir.store{{.*}} %[[ARR_NEXT]], %[[ARR_PTR]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
-// CIR: cir.yield
-// CIR: } while {
-// CIR: %[[ARR_CUR:.*]] = cir.load{{.*}} %[[ARR_PTR]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
-// CIR: %[[CMP:.*]] = cir.cmp(ne, %[[ARR_CUR]], %[[ARR_END]]) : !cir.ptr<!s32i>, !cir.bool
-// CIR: cir.condition(%[[CMP]])
-// CIR: }
+// CIR: %[[CONST:.*]] = cir.const #cir.const_array<[#cir.int<5> : !s32i, #cir.int<0> : !s32i]> : !cir.array<!s32i x 2>
+// CIR: cir.store{{.*}} %[[CONST]], %[[ARR2]] : !cir.array<!s32i x 2>, !cir.ptr<!cir.array<!s32i x 2>>
// LLVM: define{{.*}} void @_Z5func2v(){{.*}}
// LLVM: %[[ARR:.*]] = alloca [2 x i32], i64 1, align 4
-// LLVM: %[[TMP:.*]] = alloca ptr, i64 1, align 8
-// LLVM: %[[ARR_PTR:.*]] = getelementptr i32, ptr %[[ARR]], i32 0
-// LLVM: store i32 5, ptr %[[ARR_PTR]], align 4
-// LLVM: %[[ELE_1_PTR:.*]] = getelementptr i32, ptr %[[ARR_PTR]], i64 1
-// LLVM: store ptr %[[ELE_1_PTR]], ptr %[[TMP]], align 8
-// LLVM: %[[END_PTR:.*]] = getelementptr i32, ptr %[[ARR_PTR]], i64 2
-// LLVM: br label %[[LOOP_BODY:.*]]
-// LLVM: [[LOOP_NEXT:.*]]:
-// LLVM: %[[CUR:.*]] = load ptr, ptr %[[TMP]], align 8
-// LLVM: %[[CMP:.*]] = icmp ne ptr %[[CUR]], %[[END_PTR]]
-// LLVM: br i1 %[[CMP]], label %[[LOOP_BODY]], label %[[LOOP_END:.*]]
-// LLVM: [[LOOP_BODY]]:
-// LLVM: %[[CUR:.*]] = load ptr, ptr %[[TMP]], align 8
-// LLVM: store i32 0, ptr %[[CUR]], align 4
-// LLVM: %[[NEXT:.*]] = getelementptr i32, ptr %[[CUR]], i64 1
-// LLVM: store ptr %[[NEXT]], ptr %[[TMP]], align 8
-// LLVM: br label %[[LOOP_NEXT:.*]]
-// LLVM: [[LOOP_END]]:
-// LLVM: ret void
+// LLVM: store [2 x i32] [i32 5, i32 0], ptr %[[ARR]], align 4
// OGCG: %[[ARR:.*]] = alloca [2 x i32], align 4
// OGCG: call void @llvm.memcpy.p0.p0.i64(ptr align 4 %[[ARR]], ptr align 4 @[[FUN2_ARR]], i64 8, i1 false)
@@ -209,13 +171,8 @@ void func3() {
// CIR: %[[ARR:.*]] = cir.alloca !cir.array<!s32i x 2>, !cir.ptr<!cir.array<!s32i x 2>>, ["arr", init]
// CIR: %[[IDX:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["idx", init]
// CIR: %[[INIT:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["e", init]
-// CIR: %[[ARR_PTR:.*]] = cir.cast array_to_ptrdecay %[[ARR]] : !cir.ptr<!cir.array<!s32i x 2>> -> !cir.ptr<!s32i>
-// CIR: %[[V0:.*]] = cir.const #cir.int<5> : !s32i
-// CIR: cir.store{{.*}} %[[V0]], %[[ARR_PTR]] : !s32i, !cir.ptr<!s32i>
-// CIR: %[[OFFSET_0:.*]] = cir.const #cir.int<1> : !s64i
-// CIR: %[[ELE_1_PTR:.*]] = cir.ptr_stride %[[ARR_PTR]], %[[OFFSET_0]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i>
-// CIR: %[[V1:.*]] = cir.const #cir.int<6> : !s32i
-// CIR: cir.store{{.*}} %[[V1]], %[[ELE_1_PTR]] : !s32i, !cir.ptr<!s32i>
+// CIR: %[[CONST:.*]] = cir.const #cir.const_array<[#cir.int<5> : !s32i, #cir.int<6> : !s32i]> : !cir.array<!s32i x 2>
+// CIR: cir.store{{.*}} %[[CONST]], %[[ARR]] : !cir.array<!s32i x 2>, !cir.ptr<!cir.array<!s32i x 2>>
// CIR: %[[IDX_V:.*]] = cir.const #cir.int<1> : !s32i
// CIR: cir.store{{.*}} %[[IDX_V]], %[[IDX]] : !s32i, !cir.ptr<!s32i>
// CIR: %[[TMP_IDX:.*]] = cir.load{{.*}} %[[IDX]] : !cir.ptr<!s32i>, !s32i
@@ -228,10 +185,7 @@ void func3() {
// LLVM: %[[ARR:.*]] = alloca [2 x i32], i64 1, align 4
// LLVM: %[[IDX:.*]] = alloca i32, i64 1, align 4
// LLVM: %[[INIT:.*]] = alloca i32, i64 1, align 4
-// LLVM: %[[ARR_PTR:.*]] = getelementptr i32, ptr %[[ARR]], i32 0
-// LLVM: store i32 5, ptr %[[ARR_PTR]], align 4
-// LLVM: %[[ELE_1_PTR:.*]] = getelementptr i32, ptr %[[ARR_PTR]], i64 1
-// LLVM: store i32 6, ptr %[[ELE_1_PTR]], align 4
+// LLVM: store [2 x i32] [i32 5, i32 6], ptr %[[ARR]], align 4
// LLVM: store i32 1, ptr %[[IDX]], align 4
// LLVM: %[[TMP1:.*]] = load i32, ptr %[[IDX]], align 4
// LLVM: %[[ARR_PTR:.*]] = getelementptr i32, ptr %[[ARR]], i32 0
@@ -258,15 +212,8 @@ void func4() {
// CIR: %[[ARR:.*]] = cir.alloca !cir.array<!cir.array<!s32i x 1> x 2>, !cir.ptr<!cir.array<!cir.array<!s32i x 1> x 2>>, ["arr", init]
// CIR: %[[INIT:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["e", init]
-// CIR: %[[ARR_PTR:.*]] = cir.cast array_to_ptrdecay %[[ARR]] : !cir.ptr<!cir.array<!cir.array<!s32i x 1> x 2>> -> !cir.ptr<!cir.array<!s32i x 1>>
-// CIR: %[[ARR_0_PTR:.*]] = cir.cast array_to_ptrdecay %[[ARR_PTR]] : !cir.ptr<!cir.array<!s32i x 1>> -> !cir.ptr<!s32i>
-// CIR: %[[V_0_0:.*]] = cir.const #cir.int<5> : !s32i
-// CIR: cir.store{{.*}} %[[V_0_0]], %[[ARR_0_PTR]] : !s32i, !cir.ptr<!s32i>
-// CIR: %[[OFFSET:.*]] = cir.const #cir.int<1> : !s64i
-// CIR: %[[ARR_1:.*]] = cir.ptr_stride %[[ARR_PTR]], %[[OFFSET]] : (!cir.ptr<!cir.array<!s32i x 1>>, !s64i) -> !cir.ptr<!cir.array<!s32i x 1>>
-// CIR: %[[ARR_1_PTR:.*]] = cir.cast array_to_ptrdecay %[[ARR_1]] : !cir.ptr<!cir.array<!s32i x 1>> -> !cir.ptr<!s32i>
-// CIR: %[[V_1_0:.*]] = cir.const #cir.int<6> : !s32i
-// CIR: cir.store{{.*}} %[[V_1_0]], %[[ARR_1_PTR]] : !s32i, !cir.ptr<!s32i>
+// CIR: %[[CONST:.*]] = cir.const #cir.const_array<[#cir.const_array<[#cir.int<5> : !s32i]> : !cir.array<!s32i x 1>, #cir.const_array<[#cir.int<6> : !s32i]> : !cir.array<!s32i x 1>]> : !cir.array<!cir.array<!s32i x 1> x 2>
+// CIR: cir.store{{.*}} %[[CONST]], %[[ARR]] : !cir.array<!cir.array<!s32i x 1> x 2>, !cir.ptr<!cir.array<!cir.array<!s32i x 1> x 2>>
// CIR: %[[IDX:.*]] = cir.const #cir.int<0> : !s32i
// CIR: %[[IDX_1:.*]] = cir.const #cir.int<1> : !s32i
// CIR: %[[ARR_PTR:.*]] = cir.cast array_to_ptrdecay %[[ARR]] : !cir.ptr<!cir.array<!cir.array<!s32i x 1> x 2>> -> !cir.ptr<!cir.array<!s32i x 1>>
@@ -279,12 +226,7 @@ void func4() {
// LLVM: define{{.*}} void @_Z5func4v(){{.*}}
// LLVM: %[[ARR:.*]] = alloca [2 x [1 x i32]], i64 1, align 4
// LLVM: %[[INIT:.*]] = alloca i32, i64 1, align 4
-// LLVM: %[[ARR_PTR:.*]] = getelementptr [1 x i32], ptr %[[ARR]], i32 0
-// LLVM: %[[ARR_0_0:.*]] = getelementptr i32, ptr %[[ARR_PTR]], i32 0
-// LLVM: store i32 5, ptr %[[ARR_0_0]], align 4
-// LLVM: %[[ARR_1:.*]] = getelementptr [1 x i32], ptr %[[ARR_PTR]], i64 1
-// LLVM: %[[ARR_1_0:.*]] = getelementptr i32, ptr %[[ARR_1]], i32 0
-// LLVM: store i32 6, ptr %[[ARR_1_0]], align 4
+// LLVM: store [2 x [1 x i32]] {{\[}}[1 x i32] [i32 5], [1 x i32] [i32 6]], ptr %[[ARR]], align 4
// LLVM: %[[ARR_PTR:.*]] = getelementptr [1 x i32], ptr %[[ARR]], i32 0
// LLVM: %[[ARR_1:.*]] = getelementptr [1 x i32], ptr %[[ARR_PTR]], i64 1
// LLVM: %[[ARR_1_0:.*]] = getelementptr i32, ptr %[[ARR_1]], i32 0
@@ -305,52 +247,12 @@ void func5() {
}
// CIR: %[[ARR:.*]] = cir.alloca !cir.array<!cir.array<!s32i x 1> x 2>, !cir.ptr<!cir.array<!cir.array<!s32i x 1> x 2>>, ["arr", init]
-// CIR: %[[ARR_PTR:.*]] = cir.alloca !cir.ptr<!cir.array<!s32i x 1>>, !cir.ptr<!cir.ptr<!cir.array<!s32i x 1>>>, ["arrayinit.temp", init]
-// CIR: %[[ARR_0:.*]] = cir.cast array_to_ptrdecay %0 : !cir.ptr<!cir.array<!cir.array<!s32i x 1> x 2>> -> !cir.ptr<!cir.array<!s32i x 1>>
-// CIR: %[[ARR_0_PTR:.*]] = cir.cast array_to_ptrdecay %[[ARR_0]] : !cir.ptr<!cir.array<!s32i x 1>> -> !cir.ptr<!s32i>
-// CIR: %[[V_0_0:.*]] = cir.const #cir.int<5> : !s32i
-// CIR: cir.store{{.*}} %[[V_0_0]], %[[ARR_0_PTR]] : !s32i, !cir.ptr<!s32i>
-// CIR: %[[OFFSET:.*]] = cir.const #cir.int<1> : !s64i
-// CIR: %[[ARR_1:.*]] = cir.ptr_stride %[[ARR_0]], %[[OFFSET]] : (!cir.ptr<!cir.array<!s32i x 1>>, !s64i) -> !cir.ptr<!cir.array<!s32i x 1>>
-// CIR: cir.store{{.*}} %[[ARR_1]], %[[ARR_PTR]] : !cir.ptr<!cir.array<!s32i x 1>>, !cir.ptr<!cir.ptr<!cir.array<!s32i x 1>>>
-// CIR: %[[TWO:.*]] = cir.const #cir.int<2> : !s64i
-// CIR: %[[ARR_END:.*]] = cir.ptr_stride %[[ARR_0]], %[[TWO]] : (!cir.ptr<!cir.array<!s32i x 1>>, !s64i) -> !cir.ptr<!cir.array<!s32i x 1>>
-// CIR: cir.do {
-// CIR: %[[ARR_CUR:.*]] = cir.load{{.*}} %[[ARR_PTR]] : !cir.ptr<!cir.ptr<!cir.array<!s32i x 1>>>, !cir.ptr<!cir.array<!s32i x 1>>
-// CIR: %[[ZERO:.*]] = cir.const #cir.zero : !cir.array<!s32i x 1>
-// CIR: cir.store{{.*}} %[[ZERO]], %[[ARR_CUR]] : !cir.array<!s32i x 1>, !cir.ptr<!cir.array<!s32i x 1>>
-// CIR: %[[ONE:.*]] = cir.const #cir.int<1> : !s64i
-// CIR: %[[ARR_NEXT:.*]] = cir.ptr_stride %[[ARR_CUR]], %[[ONE]] : (!cir.ptr<!cir.array<!s32i x 1>>, !s64i) -> !cir.ptr<!cir.array<!s32i x 1>>
-// CIR: cir.store{{.*}} %[[ARR_NEXT]], %[[ARR_PTR]] : !cir.ptr<!cir.array<!s32i x 1>>, !cir.ptr<!cir.ptr<!cir.array<!s32i x 1>>>
-// CIR: cir.yield
-// CIR: } while {
-// CIR: %[[ARR_CUR:.*]] = cir.load{{.*}} %[[ARR_PTR]] : !cir.ptr<!cir.ptr<!cir.array<!s32i x 1>>>, !cir.ptr<!cir.array<!s32i x 1>>
-// CIR: %[[CMP:.*]] = cir.cmp(ne, %[[ARR_CUR]], %[[ARR_END]]) : !cir.ptr<!cir.array<!s32i x 1>>, !cir.bool
-// CIR: cir.condition(%[[CMP]])
-// CIR: }
+// CIR: %[[CONST:.*]] = cir.const #cir.const_array<[#cir.const_array<[#cir.int<5> : !s32i]> : !cir.array<!s32i x 1>, #cir.zero : !cir.array<!s32i x 1>]> : !cir.array<!cir.array<!s32i x 1> x 2>
+// CIR: cir.store{{.*}} %[[CONST]], %[[ARR]] : !cir.array<!cir.array<!s32i x 1> x 2>, !cir.ptr<!cir.array<!cir.array<!s32i x 1> x 2>>
// LLVM: define{{.*}} void @_Z5func5v(){{.*}}
// LLVM: %[[ARR:.*]] = alloca [2 x [1 x i32]], i64 1, align 4
-// LLVM: %[[TMP:.*]] = alloca ptr, i64 1, align 8
-// LLVM: ...
[truncated]
|
|
@llvm/pr-subscribers-clang Author: Andy Kaylor (andykaylor) ChangesThis adds code that was previously missing from emitAutoVarAlloca to identify when an aggregate auto var is being emitted with a constant initializer, and the associated code that is called from emitAutoVarInit to store the constant. This allows significantly more efficient initialization. Patch is 529.23 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/166850.diff 26 Files Affected:
diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h
index 48ef8be9fb782..60b47303e866a 100644
--- a/clang/include/clang/CIR/MissingFeatures.h
+++ b/clang/include/clang/CIR/MissingFeatures.h
@@ -187,6 +187,7 @@ struct MissingFeatures {
// Misc
static bool abiArgInfo() { return false; }
+ static bool addAutoInitAnnotation() { return false; }
static bool addHeapAllocSiteMetadata() { return false; }
static bool aggEmitFinalDestCopyRValue() { return false; }
static bool aggValueSlot() { return false; }
@@ -196,6 +197,7 @@ struct MissingFeatures {
static bool aggValueSlotMayOverlap() { return false; }
static bool aggValueSlotVolatile() { return false; }
static bool alignCXXRecordDecl() { return false; }
+ static bool appleKext() { return false; }
static bool armComputeVolatileBitfields() { return false; }
static bool asmGoto() { return false; }
static bool asmInputOperands() { return false; }
@@ -241,6 +243,7 @@ struct MissingFeatures {
static bool deleteArray() { return false; }
static bool devirtualizeDestructor() { return false; }
static bool devirtualizeMemberFunction() { return false; }
+ static bool dtorCleanups() { return false; }
static bool ehCleanupFlags() { return false; }
static bool ehCleanupHasPrebranchedFallthrough() { return false; }
static bool ehCleanupScope() { return false; }
@@ -286,6 +289,7 @@ struct MissingFeatures {
static bool objCGC() { return false; }
static bool objCLifetime() { return false; }
static bool hlsl() { return false; }
+ static bool msvcBuiltins() { return false; }
static bool openCL() { return false; }
static bool openMP() { return false; }
static bool opTBAA() { return false; }
@@ -300,6 +304,10 @@ struct MissingFeatures {
static bool setNonGC() { return false; }
static bool setObjCGCLValueClass() { return false; }
static bool setTargetAttributes() { return false; }
+ static bool shouldCreateMemCpyFromGlobal() { return false; }
+ static bool shouldSplitConstantStore() { return false; }
+ static bool shouldUseBZeroPlusStoresToInitialize() { return false; }
+ static bool shouldUseMemSetToInitialize() { return false; }
static bool simplifyCleanupEntry() { return false; }
static bool sourceLanguageCases() { return false; }
static bool stackBase() { return false; }
@@ -311,16 +319,14 @@ struct MissingFeatures {
static bool thunks() { return false; }
static bool tryEmitAsConstant() { return false; }
static bool typeChecks() { return false; }
- static bool weakRefReference() { return false; }
- static bool writebacks() { return false; }
- static bool appleKext() { return false; }
- static bool dtorCleanups() { return false; }
+ static bool vaArgABILowering() { return false; }
+ static bool vectorConstants() { return false; }
+ static bool vlas() { return false; }
static bool vtableInitialization() { return false; }
static bool vtableEmitMetadata() { return false; }
static bool vtableRelativeLayout() { return false; }
- static bool msvcBuiltins() { return false; }
- static bool vaArgABILowering() { return false; }
- static bool vlas() { return false; }
+ static bool weakRefReference() { return false; }
+ static bool writebacks() { return false; }
// Missing types
static bool dataMemberType() { return false; }
diff --git a/clang/lib/CIR/CodeGen/CIRGenDecl.cpp b/clang/lib/CIR/CodeGen/CIRGenDecl.cpp
index aeea0efeb77c3..325875d10d6ea 100644
--- a/clang/lib/CIR/CodeGen/CIRGenDecl.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenDecl.cpp
@@ -50,6 +50,41 @@ CIRGenFunction::emitAutoVarAlloca(const VarDecl &d,
Address address = Address::invalid();
if (ty->isConstantSizeType()) {
+ // If this value is an array, struct, or vector with a statically
+ // determinable constant initializer, there are optimizations we can do.
+ //
+ // TODO: We should constant-evaluate the initializer of any variable,
+ // as long as it is initialized by a constant expression. Currently,
+ // isConstantInitializer produces wrong answers for structs with
+ // reference or bitfield members, and a few other cases, and checking
+ // for POD-ness protects us from some of these.
+ if (d.getInit() &&
+ (ty->isArrayType() || ty->isRecordType() || ty->isVectorType()) &&
+ (d.isConstexpr() ||
+ ((ty.isPODType(getContext()) ||
+ getContext().getBaseElementType(ty)->isObjCObjectPointerType()) &&
+ d.getInit()->isConstantInitializer(getContext(), false)))) {
+
+ // If the variable's a const type, and it's neither an NRVO
+ // candidate nor a __block variable and has no mutable members,
+ // emit it as a global instead.
+ // Exception is if a variable is located in non-constant address space
+ // in OpenCL.
+ // TODO(cir): perhaps we don't need this at all at CIR since this can
+ // be done as part of lowering down to LLVM.
+ bool needsDtor =
+ d.needsDestruction(getContext()) == QualType::DK_cxx_destructor;
+ if ((!getContext().getLangOpts().OpenCL ||
+ ty.getAddressSpace() == LangAS::opencl_constant) &&
+ (cgm.getCodeGenOpts().MergeAllConstants && !nrvo &&
+ !d.isEscapingByref() &&
+ ty.isConstantStorage(getContext(), true, !needsDtor))) {
+ cgm.errorNYI(d.getSourceRange(), "emitAutoVarAlloca: type constant");
+ }
+ // Otherwise, tell the initialization code that we're in this case.
+ emission.isConstantAggregate = true;
+ }
+
// A normal fixed sized variable becomes an alloca in the entry block,
// unless:
// - it's an NRVO variable.
@@ -131,6 +166,47 @@ bool CIRGenFunction::isTrivialInitializer(const Expr *init) {
return false;
}
+static void emitStoresForConstant(CIRGenModule &cgm, const VarDecl &d,
+ Address addr, bool isVolatile,
+ CIRGenBuilderTy &builder,
+ mlir::TypedAttr constant) {
+ mlir::Type ty = constant.getType();
+ cir::CIRDataLayout layout{cgm.getModule()};
+ uint64_t constantSize = layout.getTypeAllocSize(ty);
+ if (!constantSize)
+ return;
+ assert(!cir::MissingFeatures::addAutoInitAnnotation());
+ assert(!cir::MissingFeatures::vectorConstants());
+ assert(!cir::MissingFeatures::shouldUseBZeroPlusStoresToInitialize());
+ assert(!cir::MissingFeatures::shouldUseMemSetToInitialize());
+ assert(!cir::MissingFeatures::shouldSplitConstantStore());
+ assert(!cir::MissingFeatures::shouldCreateMemCpyFromGlobal());
+ // In CIR we want to emit a store for the whole thing, later lowering
+ // prepare to LLVM should unwrap this into the best policy (see asserts
+ // above).
+ //
+ // FIXME(cir): This is closer to memcpy behavior but less optimal, instead of
+ // copy from a global, we just create a cir.const out of it.
+
+ if (addr.getElementType() != ty)
+ addr = addr.withElementType(builder, ty);
+
+ // If the address is an alloca, set the init attribute.
+ // The address is usually and alloca, but there is at least one case where
+ // emitAutoVarInit is called from the OpenACC codegen with an address that
+ // is not an alloca.
+ auto allocaOp = addr.getDefiningOp<cir::AllocaOp>();
+ if (allocaOp)
+ allocaOp.setInitAttr(mlir::UnitAttr::get(&cgm.getMLIRContext()));
+
+ // There are cases where OpenACC codegen calls emitAutoVarInit with a
+ // temporary decl that doesn't have a source range set.
+ mlir::Location loc = builder.getUnknownLoc();
+ if (d.getSourceRange().isValid())
+ loc = cgm.getLoc(d.getSourceRange());
+ builder.createStore(loc, builder.getConstant(loc, constant), addr);
+}
+
void CIRGenFunction::emitAutoVarInit(
const CIRGenFunction::AutoVarEmission &emission) {
assert(emission.variable && "emission was not valid!");
@@ -237,6 +313,9 @@ void CIRGenFunction::emitAutoVarInit(
return emitStoreThroughLValue(
RValue::get(builder.getConstant(initLoc, typedConstant)), lv);
}
+
+ emitStoresForConstant(cgm, d, addr, type.isVolatileQualified(), builder,
+ typedConstant);
}
void CIRGenFunction::emitAutoVarCleanups(
diff --git a/clang/test/CIR/CodeGen/agg-expr-lvalue.c b/clang/test/CIR/CodeGen/agg-expr-lvalue.c
index c826f8fa829d0..509f0218e9912 100644
--- a/clang/test/CIR/CodeGen/agg-expr-lvalue.c
+++ b/clang/test/CIR/CodeGen/agg-expr-lvalue.c
@@ -95,16 +95,13 @@ void test_string_array_in_array(void) {
}
// CIR-LABEL: cir.func{{.*}} @test_string_array_in_array
-// CIR: cir.alloca !cir.array<!cir.array<!s8i x 6> x 2>, {{.*}}, ["matrix", init]
-// CIR: cir.get_global
-// CIR: cir.copy
-// CIR: cir.get_global
-// CIR: cir.copy
+// CIR: %[[MATRIX:.*]] = cir.alloca !cir.array<!cir.array<!s8i x 6> x 2>, {{.*}}, ["matrix", init]
+// CIR: %[[CONST:.*]] = cir.const #cir.const_array<[#cir.const_array<[#cir.int<104> : !s8i, #cir.int<101> : !s8i, #cir.int<108> : !s8i, #cir.int<108> : !s8i, #cir.int<111> : !s8i, #cir.int<0> : !s8i]> : !cir.array<!s8i x 6>, #cir.const_array<[#cir.int<119> : !s8i, #cir.int<111> : !s8i, #cir.int<114> : !s8i, #cir.int<108> : !s8i, #cir.int<100> : !s8i, #cir.int<0> : !s8i]> : !cir.array<!s8i x 6>]>
+// CIR: cir.store{{.*}} %[[CONST]], %[[MATRIX]]
// LLVM-LABEL: define{{.*}} @test_string_array_in_array
-// LLVM: alloca [2 x [6 x i8]]
-// LLVM: call void @llvm.memcpy
-// LLVM: call void @llvm.memcpy
+// LLVM: %[[MATRIX:.*]] = alloca [2 x [6 x i8]]
+// LLVM: store [2 x [6 x i8]] {{\[}}[6 x i8] c"hello\00", [6 x i8] c"world\00"], ptr %[[MATRIX]]
// OGCG-LABEL: define{{.*}} @test_string_array_in_array
// OGCG: alloca [2 x [6 x i8]]
diff --git a/clang/test/CIR/CodeGen/array.cpp b/clang/test/CIR/CodeGen/array.cpp
index 82add4b347e72..5e873810d494b 100644
--- a/clang/test/CIR/CodeGen/array.cpp
+++ b/clang/test/CIR/CodeGen/array.cpp
@@ -151,50 +151,12 @@ void func2() {
}
// CIR: %[[ARR2:.*]] = cir.alloca !cir.array<!s32i x 2>, !cir.ptr<!cir.array<!s32i x 2>>, ["arr", init]
-// CIR: %[[ARR_PTR:.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["arrayinit.temp", init]
-// CIR: %[[ARR_0:.*]] = cir.cast array_to_ptrdecay %[[ARR2]] : !cir.ptr<!cir.array<!s32i x 2>> -> !cir.ptr<!s32i>
-// CIR: %[[FIVE:.*]] = cir.const #cir.int<5> : !s32i
-// CIR: cir.store{{.*}} %[[FIVE]], %[[ARR_0]] : !s32i, !cir.ptr<!s32i>
-// CIR: %[[OFFSET_0:.*]] = cir.const #cir.int<1> : !s64i
-// CIR: %[[ELE_PTR:.*]] = cir.ptr_stride %[[ARR_0]], %[[OFFSET_0]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i>
-// CIR: cir.store{{.*}} %[[ELE_PTR]], %[[ARR_PTR]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
-// CIR: %[[TWO:.*]] = cir.const #cir.int<2> : !s64i
-// CIR: %[[ARR_END:.*]] = cir.ptr_stride %[[ARR_0]], %[[TWO]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i>
-// CIR: cir.do {
-// CIR: %[[ARR_CUR:.*]] = cir.load{{.*}} %[[ARR_PTR]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
-// CIR: %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i
-// CIR: cir.store{{.*}} %[[ZERO]], %[[ARR_CUR]] : !s32i, !cir.ptr<!s32i>
-// CIR: %[[ONE:.*]] = cir.const #cir.int<1> : !s64i
-// CIR: %[[ARR_NEXT:.*]] = cir.ptr_stride %[[ARR_CUR]], %[[ONE]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i>
-// CIR: cir.store{{.*}} %[[ARR_NEXT]], %[[ARR_PTR]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
-// CIR: cir.yield
-// CIR: } while {
-// CIR: %[[ARR_CUR:.*]] = cir.load{{.*}} %[[ARR_PTR]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
-// CIR: %[[CMP:.*]] = cir.cmp(ne, %[[ARR_CUR]], %[[ARR_END]]) : !cir.ptr<!s32i>, !cir.bool
-// CIR: cir.condition(%[[CMP]])
-// CIR: }
+// CIR: %[[CONST:.*]] = cir.const #cir.const_array<[#cir.int<5> : !s32i, #cir.int<0> : !s32i]> : !cir.array<!s32i x 2>
+// CIR: cir.store{{.*}} %[[CONST]], %[[ARR2]] : !cir.array<!s32i x 2>, !cir.ptr<!cir.array<!s32i x 2>>
// LLVM: define{{.*}} void @_Z5func2v(){{.*}}
// LLVM: %[[ARR:.*]] = alloca [2 x i32], i64 1, align 4
-// LLVM: %[[TMP:.*]] = alloca ptr, i64 1, align 8
-// LLVM: %[[ARR_PTR:.*]] = getelementptr i32, ptr %[[ARR]], i32 0
-// LLVM: store i32 5, ptr %[[ARR_PTR]], align 4
-// LLVM: %[[ELE_1_PTR:.*]] = getelementptr i32, ptr %[[ARR_PTR]], i64 1
-// LLVM: store ptr %[[ELE_1_PTR]], ptr %[[TMP]], align 8
-// LLVM: %[[END_PTR:.*]] = getelementptr i32, ptr %[[ARR_PTR]], i64 2
-// LLVM: br label %[[LOOP_BODY:.*]]
-// LLVM: [[LOOP_NEXT:.*]]:
-// LLVM: %[[CUR:.*]] = load ptr, ptr %[[TMP]], align 8
-// LLVM: %[[CMP:.*]] = icmp ne ptr %[[CUR]], %[[END_PTR]]
-// LLVM: br i1 %[[CMP]], label %[[LOOP_BODY]], label %[[LOOP_END:.*]]
-// LLVM: [[LOOP_BODY]]:
-// LLVM: %[[CUR:.*]] = load ptr, ptr %[[TMP]], align 8
-// LLVM: store i32 0, ptr %[[CUR]], align 4
-// LLVM: %[[NEXT:.*]] = getelementptr i32, ptr %[[CUR]], i64 1
-// LLVM: store ptr %[[NEXT]], ptr %[[TMP]], align 8
-// LLVM: br label %[[LOOP_NEXT:.*]]
-// LLVM: [[LOOP_END]]:
-// LLVM: ret void
+// LLVM: store [2 x i32] [i32 5, i32 0], ptr %[[ARR]], align 4
// OGCG: %[[ARR:.*]] = alloca [2 x i32], align 4
// OGCG: call void @llvm.memcpy.p0.p0.i64(ptr align 4 %[[ARR]], ptr align 4 @[[FUN2_ARR]], i64 8, i1 false)
@@ -209,13 +171,8 @@ void func3() {
// CIR: %[[ARR:.*]] = cir.alloca !cir.array<!s32i x 2>, !cir.ptr<!cir.array<!s32i x 2>>, ["arr", init]
// CIR: %[[IDX:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["idx", init]
// CIR: %[[INIT:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["e", init]
-// CIR: %[[ARR_PTR:.*]] = cir.cast array_to_ptrdecay %[[ARR]] : !cir.ptr<!cir.array<!s32i x 2>> -> !cir.ptr<!s32i>
-// CIR: %[[V0:.*]] = cir.const #cir.int<5> : !s32i
-// CIR: cir.store{{.*}} %[[V0]], %[[ARR_PTR]] : !s32i, !cir.ptr<!s32i>
-// CIR: %[[OFFSET_0:.*]] = cir.const #cir.int<1> : !s64i
-// CIR: %[[ELE_1_PTR:.*]] = cir.ptr_stride %[[ARR_PTR]], %[[OFFSET_0]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i>
-// CIR: %[[V1:.*]] = cir.const #cir.int<6> : !s32i
-// CIR: cir.store{{.*}} %[[V1]], %[[ELE_1_PTR]] : !s32i, !cir.ptr<!s32i>
+// CIR: %[[CONST:.*]] = cir.const #cir.const_array<[#cir.int<5> : !s32i, #cir.int<6> : !s32i]> : !cir.array<!s32i x 2>
+// CIR: cir.store{{.*}} %[[CONST]], %[[ARR]] : !cir.array<!s32i x 2>, !cir.ptr<!cir.array<!s32i x 2>>
// CIR: %[[IDX_V:.*]] = cir.const #cir.int<1> : !s32i
// CIR: cir.store{{.*}} %[[IDX_V]], %[[IDX]] : !s32i, !cir.ptr<!s32i>
// CIR: %[[TMP_IDX:.*]] = cir.load{{.*}} %[[IDX]] : !cir.ptr<!s32i>, !s32i
@@ -228,10 +185,7 @@ void func3() {
// LLVM: %[[ARR:.*]] = alloca [2 x i32], i64 1, align 4
// LLVM: %[[IDX:.*]] = alloca i32, i64 1, align 4
// LLVM: %[[INIT:.*]] = alloca i32, i64 1, align 4
-// LLVM: %[[ARR_PTR:.*]] = getelementptr i32, ptr %[[ARR]], i32 0
-// LLVM: store i32 5, ptr %[[ARR_PTR]], align 4
-// LLVM: %[[ELE_1_PTR:.*]] = getelementptr i32, ptr %[[ARR_PTR]], i64 1
-// LLVM: store i32 6, ptr %[[ELE_1_PTR]], align 4
+// LLVM: store [2 x i32] [i32 5, i32 6], ptr %[[ARR]], align 4
// LLVM: store i32 1, ptr %[[IDX]], align 4
// LLVM: %[[TMP1:.*]] = load i32, ptr %[[IDX]], align 4
// LLVM: %[[ARR_PTR:.*]] = getelementptr i32, ptr %[[ARR]], i32 0
@@ -258,15 +212,8 @@ void func4() {
// CIR: %[[ARR:.*]] = cir.alloca !cir.array<!cir.array<!s32i x 1> x 2>, !cir.ptr<!cir.array<!cir.array<!s32i x 1> x 2>>, ["arr", init]
// CIR: %[[INIT:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["e", init]
-// CIR: %[[ARR_PTR:.*]] = cir.cast array_to_ptrdecay %[[ARR]] : !cir.ptr<!cir.array<!cir.array<!s32i x 1> x 2>> -> !cir.ptr<!cir.array<!s32i x 1>>
-// CIR: %[[ARR_0_PTR:.*]] = cir.cast array_to_ptrdecay %[[ARR_PTR]] : !cir.ptr<!cir.array<!s32i x 1>> -> !cir.ptr<!s32i>
-// CIR: %[[V_0_0:.*]] = cir.const #cir.int<5> : !s32i
-// CIR: cir.store{{.*}} %[[V_0_0]], %[[ARR_0_PTR]] : !s32i, !cir.ptr<!s32i>
-// CIR: %[[OFFSET:.*]] = cir.const #cir.int<1> : !s64i
-// CIR: %[[ARR_1:.*]] = cir.ptr_stride %[[ARR_PTR]], %[[OFFSET]] : (!cir.ptr<!cir.array<!s32i x 1>>, !s64i) -> !cir.ptr<!cir.array<!s32i x 1>>
-// CIR: %[[ARR_1_PTR:.*]] = cir.cast array_to_ptrdecay %[[ARR_1]] : !cir.ptr<!cir.array<!s32i x 1>> -> !cir.ptr<!s32i>
-// CIR: %[[V_1_0:.*]] = cir.const #cir.int<6> : !s32i
-// CIR: cir.store{{.*}} %[[V_1_0]], %[[ARR_1_PTR]] : !s32i, !cir.ptr<!s32i>
+// CIR: %[[CONST:.*]] = cir.const #cir.const_array<[#cir.const_array<[#cir.int<5> : !s32i]> : !cir.array<!s32i x 1>, #cir.const_array<[#cir.int<6> : !s32i]> : !cir.array<!s32i x 1>]> : !cir.array<!cir.array<!s32i x 1> x 2>
+// CIR: cir.store{{.*}} %[[CONST]], %[[ARR]] : !cir.array<!cir.array<!s32i x 1> x 2>, !cir.ptr<!cir.array<!cir.array<!s32i x 1> x 2>>
// CIR: %[[IDX:.*]] = cir.const #cir.int<0> : !s32i
// CIR: %[[IDX_1:.*]] = cir.const #cir.int<1> : !s32i
// CIR: %[[ARR_PTR:.*]] = cir.cast array_to_ptrdecay %[[ARR]] : !cir.ptr<!cir.array<!cir.array<!s32i x 1> x 2>> -> !cir.ptr<!cir.array<!s32i x 1>>
@@ -279,12 +226,7 @@ void func4() {
// LLVM: define{{.*}} void @_Z5func4v(){{.*}}
// LLVM: %[[ARR:.*]] = alloca [2 x [1 x i32]], i64 1, align 4
// LLVM: %[[INIT:.*]] = alloca i32, i64 1, align 4
-// LLVM: %[[ARR_PTR:.*]] = getelementptr [1 x i32], ptr %[[ARR]], i32 0
-// LLVM: %[[ARR_0_0:.*]] = getelementptr i32, ptr %[[ARR_PTR]], i32 0
-// LLVM: store i32 5, ptr %[[ARR_0_0]], align 4
-// LLVM: %[[ARR_1:.*]] = getelementptr [1 x i32], ptr %[[ARR_PTR]], i64 1
-// LLVM: %[[ARR_1_0:.*]] = getelementptr i32, ptr %[[ARR_1]], i32 0
-// LLVM: store i32 6, ptr %[[ARR_1_0]], align 4
+// LLVM: store [2 x [1 x i32]] {{\[}}[1 x i32] [i32 5], [1 x i32] [i32 6]], ptr %[[ARR]], align 4
// LLVM: %[[ARR_PTR:.*]] = getelementptr [1 x i32], ptr %[[ARR]], i32 0
// LLVM: %[[ARR_1:.*]] = getelementptr [1 x i32], ptr %[[ARR_PTR]], i64 1
// LLVM: %[[ARR_1_0:.*]] = getelementptr i32, ptr %[[ARR_1]], i32 0
@@ -305,52 +247,12 @@ void func5() {
}
// CIR: %[[ARR:.*]] = cir.alloca !cir.array<!cir.array<!s32i x 1> x 2>, !cir.ptr<!cir.array<!cir.array<!s32i x 1> x 2>>, ["arr", init]
-// CIR: %[[ARR_PTR:.*]] = cir.alloca !cir.ptr<!cir.array<!s32i x 1>>, !cir.ptr<!cir.ptr<!cir.array<!s32i x 1>>>, ["arrayinit.temp", init]
-// CIR: %[[ARR_0:.*]] = cir.cast array_to_ptrdecay %0 : !cir.ptr<!cir.array<!cir.array<!s32i x 1> x 2>> -> !cir.ptr<!cir.array<!s32i x 1>>
-// CIR: %[[ARR_0_PTR:.*]] = cir.cast array_to_ptrdecay %[[ARR_0]] : !cir.ptr<!cir.array<!s32i x 1>> -> !cir.ptr<!s32i>
-// CIR: %[[V_0_0:.*]] = cir.const #cir.int<5> : !s32i
-// CIR: cir.store{{.*}} %[[V_0_0]], %[[ARR_0_PTR]] : !s32i, !cir.ptr<!s32i>
-// CIR: %[[OFFSET:.*]] = cir.const #cir.int<1> : !s64i
-// CIR: %[[ARR_1:.*]] = cir.ptr_stride %[[ARR_0]], %[[OFFSET]] : (!cir.ptr<!cir.array<!s32i x 1>>, !s64i) -> !cir.ptr<!cir.array<!s32i x 1>>
-// CIR: cir.store{{.*}} %[[ARR_1]], %[[ARR_PTR]] : !cir.ptr<!cir.array<!s32i x 1>>, !cir.ptr<!cir.ptr<!cir.array<!s32i x 1>>>
-// CIR: %[[TWO:.*]] = cir.const #cir.int<2> : !s64i
-// CIR: %[[ARR_END:.*]] = cir.ptr_stride %[[ARR_0]], %[[TWO]] : (!cir.ptr<!cir.array<!s32i x 1>>, !s64i) -> !cir.ptr<!cir.array<!s32i x 1>>
-// CIR: cir.do {
-// CIR: %[[ARR_CUR:.*]] = cir.load{{.*}} %[[ARR_PTR]] : !cir.ptr<!cir.ptr<!cir.array<!s32i x 1>>>, !cir.ptr<!cir.array<!s32i x 1>>
-// CIR: %[[ZERO:.*]] = cir.const #cir.zero : !cir.array<!s32i x 1>
-// CIR: cir.store{{.*}} %[[ZERO]], %[[ARR_CUR]] : !cir.array<!s32i x 1>, !cir.ptr<!cir.array<!s32i x 1>>
-// CIR: %[[ONE:.*]] = cir.const #cir.int<1> : !s64i
-// CIR: %[[ARR_NEXT:.*]] = cir.ptr_stride %[[ARR_CUR]], %[[ONE]] : (!cir.ptr<!cir.array<!s32i x 1>>, !s64i) -> !cir.ptr<!cir.array<!s32i x 1>>
-// CIR: cir.store{{.*}} %[[ARR_NEXT]], %[[ARR_PTR]] : !cir.ptr<!cir.array<!s32i x 1>>, !cir.ptr<!cir.ptr<!cir.array<!s32i x 1>>>
-// CIR: cir.yield
-// CIR: } while {
-// CIR: %[[ARR_CUR:.*]] = cir.load{{.*}} %[[ARR_PTR]] : !cir.ptr<!cir.ptr<!cir.array<!s32i x 1>>>, !cir.ptr<!cir.array<!s32i x 1>>
-// CIR: %[[CMP:.*]] = cir.cmp(ne, %[[ARR_CUR]], %[[ARR_END]]) : !cir.ptr<!cir.array<!s32i x 1>>, !cir.bool
-// CIR: cir.condition(%[[CMP]])
-// CIR: }
+// CIR: %[[CONST:.*]] = cir.const #cir.const_array<[#cir.const_array<[#cir.int<5> : !s32i]> : !cir.array<!s32i x 1>, #cir.zero : !cir.array<!s32i x 1>]> : !cir.array<!cir.array<!s32i x 1> x 2>
+// CIR: cir.store{{.*}} %[[CONST]], %[[ARR]] : !cir.array<!cir.array<!s32i x 1> x 2>, !cir.ptr<!cir.array<!cir.array<!s32i x 1> x 2>>
// LLVM: define{{.*}} void @_Z5func5v(){{.*}}
// LLVM: %[[ARR:.*]] = alloca [2 x [1 x i32]], i64 1, align 4
-// LLVM: %[[TMP:.*]] = alloca ptr, i64 1, align 8
-// LLVM: ...
[truncated]
|
1 similar comment
|
@llvm/pr-subscribers-clang Author: Andy Kaylor (andykaylor) ChangesThis adds code that was previously missing from emitAutoVarAlloca to identify when an aggregate auto var is being emitted with a constant initializer, and the associated code that is called from emitAutoVarInit to store the constant. This allows significantly more efficient initialization. Patch is 529.23 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/166850.diff 26 Files Affected:
diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h
index 48ef8be9fb782..60b47303e866a 100644
--- a/clang/include/clang/CIR/MissingFeatures.h
+++ b/clang/include/clang/CIR/MissingFeatures.h
@@ -187,6 +187,7 @@ struct MissingFeatures {
// Misc
static bool abiArgInfo() { return false; }
+ static bool addAutoInitAnnotation() { return false; }
static bool addHeapAllocSiteMetadata() { return false; }
static bool aggEmitFinalDestCopyRValue() { return false; }
static bool aggValueSlot() { return false; }
@@ -196,6 +197,7 @@ struct MissingFeatures {
static bool aggValueSlotMayOverlap() { return false; }
static bool aggValueSlotVolatile() { return false; }
static bool alignCXXRecordDecl() { return false; }
+ static bool appleKext() { return false; }
static bool armComputeVolatileBitfields() { return false; }
static bool asmGoto() { return false; }
static bool asmInputOperands() { return false; }
@@ -241,6 +243,7 @@ struct MissingFeatures {
static bool deleteArray() { return false; }
static bool devirtualizeDestructor() { return false; }
static bool devirtualizeMemberFunction() { return false; }
+ static bool dtorCleanups() { return false; }
static bool ehCleanupFlags() { return false; }
static bool ehCleanupHasPrebranchedFallthrough() { return false; }
static bool ehCleanupScope() { return false; }
@@ -286,6 +289,7 @@ struct MissingFeatures {
static bool objCGC() { return false; }
static bool objCLifetime() { return false; }
static bool hlsl() { return false; }
+ static bool msvcBuiltins() { return false; }
static bool openCL() { return false; }
static bool openMP() { return false; }
static bool opTBAA() { return false; }
@@ -300,6 +304,10 @@ struct MissingFeatures {
static bool setNonGC() { return false; }
static bool setObjCGCLValueClass() { return false; }
static bool setTargetAttributes() { return false; }
+ static bool shouldCreateMemCpyFromGlobal() { return false; }
+ static bool shouldSplitConstantStore() { return false; }
+ static bool shouldUseBZeroPlusStoresToInitialize() { return false; }
+ static bool shouldUseMemSetToInitialize() { return false; }
static bool simplifyCleanupEntry() { return false; }
static bool sourceLanguageCases() { return false; }
static bool stackBase() { return false; }
@@ -311,16 +319,14 @@ struct MissingFeatures {
static bool thunks() { return false; }
static bool tryEmitAsConstant() { return false; }
static bool typeChecks() { return false; }
- static bool weakRefReference() { return false; }
- static bool writebacks() { return false; }
- static bool appleKext() { return false; }
- static bool dtorCleanups() { return false; }
+ static bool vaArgABILowering() { return false; }
+ static bool vectorConstants() { return false; }
+ static bool vlas() { return false; }
static bool vtableInitialization() { return false; }
static bool vtableEmitMetadata() { return false; }
static bool vtableRelativeLayout() { return false; }
- static bool msvcBuiltins() { return false; }
- static bool vaArgABILowering() { return false; }
- static bool vlas() { return false; }
+ static bool weakRefReference() { return false; }
+ static bool writebacks() { return false; }
// Missing types
static bool dataMemberType() { return false; }
diff --git a/clang/lib/CIR/CodeGen/CIRGenDecl.cpp b/clang/lib/CIR/CodeGen/CIRGenDecl.cpp
index aeea0efeb77c3..325875d10d6ea 100644
--- a/clang/lib/CIR/CodeGen/CIRGenDecl.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenDecl.cpp
@@ -50,6 +50,41 @@ CIRGenFunction::emitAutoVarAlloca(const VarDecl &d,
Address address = Address::invalid();
if (ty->isConstantSizeType()) {
+ // If this value is an array, struct, or vector with a statically
+ // determinable constant initializer, there are optimizations we can do.
+ //
+ // TODO: We should constant-evaluate the initializer of any variable,
+ // as long as it is initialized by a constant expression. Currently,
+ // isConstantInitializer produces wrong answers for structs with
+ // reference or bitfield members, and a few other cases, and checking
+ // for POD-ness protects us from some of these.
+ if (d.getInit() &&
+ (ty->isArrayType() || ty->isRecordType() || ty->isVectorType()) &&
+ (d.isConstexpr() ||
+ ((ty.isPODType(getContext()) ||
+ getContext().getBaseElementType(ty)->isObjCObjectPointerType()) &&
+ d.getInit()->isConstantInitializer(getContext(), false)))) {
+
+ // If the variable's a const type, and it's neither an NRVO
+ // candidate nor a __block variable and has no mutable members,
+ // emit it as a global instead.
+ // Exception is if a variable is located in non-constant address space
+ // in OpenCL.
+ // TODO(cir): perhaps we don't need this at all at CIR since this can
+ // be done as part of lowering down to LLVM.
+ bool needsDtor =
+ d.needsDestruction(getContext()) == QualType::DK_cxx_destructor;
+ if ((!getContext().getLangOpts().OpenCL ||
+ ty.getAddressSpace() == LangAS::opencl_constant) &&
+ (cgm.getCodeGenOpts().MergeAllConstants && !nrvo &&
+ !d.isEscapingByref() &&
+ ty.isConstantStorage(getContext(), true, !needsDtor))) {
+ cgm.errorNYI(d.getSourceRange(), "emitAutoVarAlloca: type constant");
+ }
+ // Otherwise, tell the initialization code that we're in this case.
+ emission.isConstantAggregate = true;
+ }
+
// A normal fixed sized variable becomes an alloca in the entry block,
// unless:
// - it's an NRVO variable.
@@ -131,6 +166,47 @@ bool CIRGenFunction::isTrivialInitializer(const Expr *init) {
return false;
}
+static void emitStoresForConstant(CIRGenModule &cgm, const VarDecl &d,
+ Address addr, bool isVolatile,
+ CIRGenBuilderTy &builder,
+ mlir::TypedAttr constant) {
+ mlir::Type ty = constant.getType();
+ cir::CIRDataLayout layout{cgm.getModule()};
+ uint64_t constantSize = layout.getTypeAllocSize(ty);
+ if (!constantSize)
+ return;
+ assert(!cir::MissingFeatures::addAutoInitAnnotation());
+ assert(!cir::MissingFeatures::vectorConstants());
+ assert(!cir::MissingFeatures::shouldUseBZeroPlusStoresToInitialize());
+ assert(!cir::MissingFeatures::shouldUseMemSetToInitialize());
+ assert(!cir::MissingFeatures::shouldSplitConstantStore());
+ assert(!cir::MissingFeatures::shouldCreateMemCpyFromGlobal());
+ // In CIR we want to emit a store for the whole thing, later lowering
+ // prepare to LLVM should unwrap this into the best policy (see asserts
+ // above).
+ //
+ // FIXME(cir): This is closer to memcpy behavior but less optimal, instead of
+ // copy from a global, we just create a cir.const out of it.
+
+ if (addr.getElementType() != ty)
+ addr = addr.withElementType(builder, ty);
+
+ // If the address is an alloca, set the init attribute.
+ // The address is usually and alloca, but there is at least one case where
+ // emitAutoVarInit is called from the OpenACC codegen with an address that
+ // is not an alloca.
+ auto allocaOp = addr.getDefiningOp<cir::AllocaOp>();
+ if (allocaOp)
+ allocaOp.setInitAttr(mlir::UnitAttr::get(&cgm.getMLIRContext()));
+
+ // There are cases where OpenACC codegen calls emitAutoVarInit with a
+ // temporary decl that doesn't have a source range set.
+ mlir::Location loc = builder.getUnknownLoc();
+ if (d.getSourceRange().isValid())
+ loc = cgm.getLoc(d.getSourceRange());
+ builder.createStore(loc, builder.getConstant(loc, constant), addr);
+}
+
void CIRGenFunction::emitAutoVarInit(
const CIRGenFunction::AutoVarEmission &emission) {
assert(emission.variable && "emission was not valid!");
@@ -237,6 +313,9 @@ void CIRGenFunction::emitAutoVarInit(
return emitStoreThroughLValue(
RValue::get(builder.getConstant(initLoc, typedConstant)), lv);
}
+
+ emitStoresForConstant(cgm, d, addr, type.isVolatileQualified(), builder,
+ typedConstant);
}
void CIRGenFunction::emitAutoVarCleanups(
diff --git a/clang/test/CIR/CodeGen/agg-expr-lvalue.c b/clang/test/CIR/CodeGen/agg-expr-lvalue.c
index c826f8fa829d0..509f0218e9912 100644
--- a/clang/test/CIR/CodeGen/agg-expr-lvalue.c
+++ b/clang/test/CIR/CodeGen/agg-expr-lvalue.c
@@ -95,16 +95,13 @@ void test_string_array_in_array(void) {
}
// CIR-LABEL: cir.func{{.*}} @test_string_array_in_array
-// CIR: cir.alloca !cir.array<!cir.array<!s8i x 6> x 2>, {{.*}}, ["matrix", init]
-// CIR: cir.get_global
-// CIR: cir.copy
-// CIR: cir.get_global
-// CIR: cir.copy
+// CIR: %[[MATRIX:.*]] = cir.alloca !cir.array<!cir.array<!s8i x 6> x 2>, {{.*}}, ["matrix", init]
+// CIR: %[[CONST:.*]] = cir.const #cir.const_array<[#cir.const_array<[#cir.int<104> : !s8i, #cir.int<101> : !s8i, #cir.int<108> : !s8i, #cir.int<108> : !s8i, #cir.int<111> : !s8i, #cir.int<0> : !s8i]> : !cir.array<!s8i x 6>, #cir.const_array<[#cir.int<119> : !s8i, #cir.int<111> : !s8i, #cir.int<114> : !s8i, #cir.int<108> : !s8i, #cir.int<100> : !s8i, #cir.int<0> : !s8i]> : !cir.array<!s8i x 6>]>
+// CIR: cir.store{{.*}} %[[CONST]], %[[MATRIX]]
// LLVM-LABEL: define{{.*}} @test_string_array_in_array
-// LLVM: alloca [2 x [6 x i8]]
-// LLVM: call void @llvm.memcpy
-// LLVM: call void @llvm.memcpy
+// LLVM: %[[MATRIX:.*]] = alloca [2 x [6 x i8]]
+// LLVM: store [2 x [6 x i8]] {{\[}}[6 x i8] c"hello\00", [6 x i8] c"world\00"], ptr %[[MATRIX]]
// OGCG-LABEL: define{{.*}} @test_string_array_in_array
// OGCG: alloca [2 x [6 x i8]]
diff --git a/clang/test/CIR/CodeGen/array.cpp b/clang/test/CIR/CodeGen/array.cpp
index 82add4b347e72..5e873810d494b 100644
--- a/clang/test/CIR/CodeGen/array.cpp
+++ b/clang/test/CIR/CodeGen/array.cpp
@@ -151,50 +151,12 @@ void func2() {
}
// CIR: %[[ARR2:.*]] = cir.alloca !cir.array<!s32i x 2>, !cir.ptr<!cir.array<!s32i x 2>>, ["arr", init]
-// CIR: %[[ARR_PTR:.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["arrayinit.temp", init]
-// CIR: %[[ARR_0:.*]] = cir.cast array_to_ptrdecay %[[ARR2]] : !cir.ptr<!cir.array<!s32i x 2>> -> !cir.ptr<!s32i>
-// CIR: %[[FIVE:.*]] = cir.const #cir.int<5> : !s32i
-// CIR: cir.store{{.*}} %[[FIVE]], %[[ARR_0]] : !s32i, !cir.ptr<!s32i>
-// CIR: %[[OFFSET_0:.*]] = cir.const #cir.int<1> : !s64i
-// CIR: %[[ELE_PTR:.*]] = cir.ptr_stride %[[ARR_0]], %[[OFFSET_0]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i>
-// CIR: cir.store{{.*}} %[[ELE_PTR]], %[[ARR_PTR]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
-// CIR: %[[TWO:.*]] = cir.const #cir.int<2> : !s64i
-// CIR: %[[ARR_END:.*]] = cir.ptr_stride %[[ARR_0]], %[[TWO]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i>
-// CIR: cir.do {
-// CIR: %[[ARR_CUR:.*]] = cir.load{{.*}} %[[ARR_PTR]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
-// CIR: %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i
-// CIR: cir.store{{.*}} %[[ZERO]], %[[ARR_CUR]] : !s32i, !cir.ptr<!s32i>
-// CIR: %[[ONE:.*]] = cir.const #cir.int<1> : !s64i
-// CIR: %[[ARR_NEXT:.*]] = cir.ptr_stride %[[ARR_CUR]], %[[ONE]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i>
-// CIR: cir.store{{.*}} %[[ARR_NEXT]], %[[ARR_PTR]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
-// CIR: cir.yield
-// CIR: } while {
-// CIR: %[[ARR_CUR:.*]] = cir.load{{.*}} %[[ARR_PTR]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
-// CIR: %[[CMP:.*]] = cir.cmp(ne, %[[ARR_CUR]], %[[ARR_END]]) : !cir.ptr<!s32i>, !cir.bool
-// CIR: cir.condition(%[[CMP]])
-// CIR: }
+// CIR: %[[CONST:.*]] = cir.const #cir.const_array<[#cir.int<5> : !s32i, #cir.int<0> : !s32i]> : !cir.array<!s32i x 2>
+// CIR: cir.store{{.*}} %[[CONST]], %[[ARR2]] : !cir.array<!s32i x 2>, !cir.ptr<!cir.array<!s32i x 2>>
// LLVM: define{{.*}} void @_Z5func2v(){{.*}}
// LLVM: %[[ARR:.*]] = alloca [2 x i32], i64 1, align 4
-// LLVM: %[[TMP:.*]] = alloca ptr, i64 1, align 8
-// LLVM: %[[ARR_PTR:.*]] = getelementptr i32, ptr %[[ARR]], i32 0
-// LLVM: store i32 5, ptr %[[ARR_PTR]], align 4
-// LLVM: %[[ELE_1_PTR:.*]] = getelementptr i32, ptr %[[ARR_PTR]], i64 1
-// LLVM: store ptr %[[ELE_1_PTR]], ptr %[[TMP]], align 8
-// LLVM: %[[END_PTR:.*]] = getelementptr i32, ptr %[[ARR_PTR]], i64 2
-// LLVM: br label %[[LOOP_BODY:.*]]
-// LLVM: [[LOOP_NEXT:.*]]:
-// LLVM: %[[CUR:.*]] = load ptr, ptr %[[TMP]], align 8
-// LLVM: %[[CMP:.*]] = icmp ne ptr %[[CUR]], %[[END_PTR]]
-// LLVM: br i1 %[[CMP]], label %[[LOOP_BODY]], label %[[LOOP_END:.*]]
-// LLVM: [[LOOP_BODY]]:
-// LLVM: %[[CUR:.*]] = load ptr, ptr %[[TMP]], align 8
-// LLVM: store i32 0, ptr %[[CUR]], align 4
-// LLVM: %[[NEXT:.*]] = getelementptr i32, ptr %[[CUR]], i64 1
-// LLVM: store ptr %[[NEXT]], ptr %[[TMP]], align 8
-// LLVM: br label %[[LOOP_NEXT:.*]]
-// LLVM: [[LOOP_END]]:
-// LLVM: ret void
+// LLVM: store [2 x i32] [i32 5, i32 0], ptr %[[ARR]], align 4
// OGCG: %[[ARR:.*]] = alloca [2 x i32], align 4
// OGCG: call void @llvm.memcpy.p0.p0.i64(ptr align 4 %[[ARR]], ptr align 4 @[[FUN2_ARR]], i64 8, i1 false)
@@ -209,13 +171,8 @@ void func3() {
// CIR: %[[ARR:.*]] = cir.alloca !cir.array<!s32i x 2>, !cir.ptr<!cir.array<!s32i x 2>>, ["arr", init]
// CIR: %[[IDX:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["idx", init]
// CIR: %[[INIT:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["e", init]
-// CIR: %[[ARR_PTR:.*]] = cir.cast array_to_ptrdecay %[[ARR]] : !cir.ptr<!cir.array<!s32i x 2>> -> !cir.ptr<!s32i>
-// CIR: %[[V0:.*]] = cir.const #cir.int<5> : !s32i
-// CIR: cir.store{{.*}} %[[V0]], %[[ARR_PTR]] : !s32i, !cir.ptr<!s32i>
-// CIR: %[[OFFSET_0:.*]] = cir.const #cir.int<1> : !s64i
-// CIR: %[[ELE_1_PTR:.*]] = cir.ptr_stride %[[ARR_PTR]], %[[OFFSET_0]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i>
-// CIR: %[[V1:.*]] = cir.const #cir.int<6> : !s32i
-// CIR: cir.store{{.*}} %[[V1]], %[[ELE_1_PTR]] : !s32i, !cir.ptr<!s32i>
+// CIR: %[[CONST:.*]] = cir.const #cir.const_array<[#cir.int<5> : !s32i, #cir.int<6> : !s32i]> : !cir.array<!s32i x 2>
+// CIR: cir.store{{.*}} %[[CONST]], %[[ARR]] : !cir.array<!s32i x 2>, !cir.ptr<!cir.array<!s32i x 2>>
// CIR: %[[IDX_V:.*]] = cir.const #cir.int<1> : !s32i
// CIR: cir.store{{.*}} %[[IDX_V]], %[[IDX]] : !s32i, !cir.ptr<!s32i>
// CIR: %[[TMP_IDX:.*]] = cir.load{{.*}} %[[IDX]] : !cir.ptr<!s32i>, !s32i
@@ -228,10 +185,7 @@ void func3() {
// LLVM: %[[ARR:.*]] = alloca [2 x i32], i64 1, align 4
// LLVM: %[[IDX:.*]] = alloca i32, i64 1, align 4
// LLVM: %[[INIT:.*]] = alloca i32, i64 1, align 4
-// LLVM: %[[ARR_PTR:.*]] = getelementptr i32, ptr %[[ARR]], i32 0
-// LLVM: store i32 5, ptr %[[ARR_PTR]], align 4
-// LLVM: %[[ELE_1_PTR:.*]] = getelementptr i32, ptr %[[ARR_PTR]], i64 1
-// LLVM: store i32 6, ptr %[[ELE_1_PTR]], align 4
+// LLVM: store [2 x i32] [i32 5, i32 6], ptr %[[ARR]], align 4
// LLVM: store i32 1, ptr %[[IDX]], align 4
// LLVM: %[[TMP1:.*]] = load i32, ptr %[[IDX]], align 4
// LLVM: %[[ARR_PTR:.*]] = getelementptr i32, ptr %[[ARR]], i32 0
@@ -258,15 +212,8 @@ void func4() {
// CIR: %[[ARR:.*]] = cir.alloca !cir.array<!cir.array<!s32i x 1> x 2>, !cir.ptr<!cir.array<!cir.array<!s32i x 1> x 2>>, ["arr", init]
// CIR: %[[INIT:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["e", init]
-// CIR: %[[ARR_PTR:.*]] = cir.cast array_to_ptrdecay %[[ARR]] : !cir.ptr<!cir.array<!cir.array<!s32i x 1> x 2>> -> !cir.ptr<!cir.array<!s32i x 1>>
-// CIR: %[[ARR_0_PTR:.*]] = cir.cast array_to_ptrdecay %[[ARR_PTR]] : !cir.ptr<!cir.array<!s32i x 1>> -> !cir.ptr<!s32i>
-// CIR: %[[V_0_0:.*]] = cir.const #cir.int<5> : !s32i
-// CIR: cir.store{{.*}} %[[V_0_0]], %[[ARR_0_PTR]] : !s32i, !cir.ptr<!s32i>
-// CIR: %[[OFFSET:.*]] = cir.const #cir.int<1> : !s64i
-// CIR: %[[ARR_1:.*]] = cir.ptr_stride %[[ARR_PTR]], %[[OFFSET]] : (!cir.ptr<!cir.array<!s32i x 1>>, !s64i) -> !cir.ptr<!cir.array<!s32i x 1>>
-// CIR: %[[ARR_1_PTR:.*]] = cir.cast array_to_ptrdecay %[[ARR_1]] : !cir.ptr<!cir.array<!s32i x 1>> -> !cir.ptr<!s32i>
-// CIR: %[[V_1_0:.*]] = cir.const #cir.int<6> : !s32i
-// CIR: cir.store{{.*}} %[[V_1_0]], %[[ARR_1_PTR]] : !s32i, !cir.ptr<!s32i>
+// CIR: %[[CONST:.*]] = cir.const #cir.const_array<[#cir.const_array<[#cir.int<5> : !s32i]> : !cir.array<!s32i x 1>, #cir.const_array<[#cir.int<6> : !s32i]> : !cir.array<!s32i x 1>]> : !cir.array<!cir.array<!s32i x 1> x 2>
+// CIR: cir.store{{.*}} %[[CONST]], %[[ARR]] : !cir.array<!cir.array<!s32i x 1> x 2>, !cir.ptr<!cir.array<!cir.array<!s32i x 1> x 2>>
// CIR: %[[IDX:.*]] = cir.const #cir.int<0> : !s32i
// CIR: %[[IDX_1:.*]] = cir.const #cir.int<1> : !s32i
// CIR: %[[ARR_PTR:.*]] = cir.cast array_to_ptrdecay %[[ARR]] : !cir.ptr<!cir.array<!cir.array<!s32i x 1> x 2>> -> !cir.ptr<!cir.array<!s32i x 1>>
@@ -279,12 +226,7 @@ void func4() {
// LLVM: define{{.*}} void @_Z5func4v(){{.*}}
// LLVM: %[[ARR:.*]] = alloca [2 x [1 x i32]], i64 1, align 4
// LLVM: %[[INIT:.*]] = alloca i32, i64 1, align 4
-// LLVM: %[[ARR_PTR:.*]] = getelementptr [1 x i32], ptr %[[ARR]], i32 0
-// LLVM: %[[ARR_0_0:.*]] = getelementptr i32, ptr %[[ARR_PTR]], i32 0
-// LLVM: store i32 5, ptr %[[ARR_0_0]], align 4
-// LLVM: %[[ARR_1:.*]] = getelementptr [1 x i32], ptr %[[ARR_PTR]], i64 1
-// LLVM: %[[ARR_1_0:.*]] = getelementptr i32, ptr %[[ARR_1]], i32 0
-// LLVM: store i32 6, ptr %[[ARR_1_0]], align 4
+// LLVM: store [2 x [1 x i32]] {{\[}}[1 x i32] [i32 5], [1 x i32] [i32 6]], ptr %[[ARR]], align 4
// LLVM: %[[ARR_PTR:.*]] = getelementptr [1 x i32], ptr %[[ARR]], i32 0
// LLVM: %[[ARR_1:.*]] = getelementptr [1 x i32], ptr %[[ARR_PTR]], i64 1
// LLVM: %[[ARR_1_0:.*]] = getelementptr i32, ptr %[[ARR_1]], i32 0
@@ -305,52 +247,12 @@ void func5() {
}
// CIR: %[[ARR:.*]] = cir.alloca !cir.array<!cir.array<!s32i x 1> x 2>, !cir.ptr<!cir.array<!cir.array<!s32i x 1> x 2>>, ["arr", init]
-// CIR: %[[ARR_PTR:.*]] = cir.alloca !cir.ptr<!cir.array<!s32i x 1>>, !cir.ptr<!cir.ptr<!cir.array<!s32i x 1>>>, ["arrayinit.temp", init]
-// CIR: %[[ARR_0:.*]] = cir.cast array_to_ptrdecay %0 : !cir.ptr<!cir.array<!cir.array<!s32i x 1> x 2>> -> !cir.ptr<!cir.array<!s32i x 1>>
-// CIR: %[[ARR_0_PTR:.*]] = cir.cast array_to_ptrdecay %[[ARR_0]] : !cir.ptr<!cir.array<!s32i x 1>> -> !cir.ptr<!s32i>
-// CIR: %[[V_0_0:.*]] = cir.const #cir.int<5> : !s32i
-// CIR: cir.store{{.*}} %[[V_0_0]], %[[ARR_0_PTR]] : !s32i, !cir.ptr<!s32i>
-// CIR: %[[OFFSET:.*]] = cir.const #cir.int<1> : !s64i
-// CIR: %[[ARR_1:.*]] = cir.ptr_stride %[[ARR_0]], %[[OFFSET]] : (!cir.ptr<!cir.array<!s32i x 1>>, !s64i) -> !cir.ptr<!cir.array<!s32i x 1>>
-// CIR: cir.store{{.*}} %[[ARR_1]], %[[ARR_PTR]] : !cir.ptr<!cir.array<!s32i x 1>>, !cir.ptr<!cir.ptr<!cir.array<!s32i x 1>>>
-// CIR: %[[TWO:.*]] = cir.const #cir.int<2> : !s64i
-// CIR: %[[ARR_END:.*]] = cir.ptr_stride %[[ARR_0]], %[[TWO]] : (!cir.ptr<!cir.array<!s32i x 1>>, !s64i) -> !cir.ptr<!cir.array<!s32i x 1>>
-// CIR: cir.do {
-// CIR: %[[ARR_CUR:.*]] = cir.load{{.*}} %[[ARR_PTR]] : !cir.ptr<!cir.ptr<!cir.array<!s32i x 1>>>, !cir.ptr<!cir.array<!s32i x 1>>
-// CIR: %[[ZERO:.*]] = cir.const #cir.zero : !cir.array<!s32i x 1>
-// CIR: cir.store{{.*}} %[[ZERO]], %[[ARR_CUR]] : !cir.array<!s32i x 1>, !cir.ptr<!cir.array<!s32i x 1>>
-// CIR: %[[ONE:.*]] = cir.const #cir.int<1> : !s64i
-// CIR: %[[ARR_NEXT:.*]] = cir.ptr_stride %[[ARR_CUR]], %[[ONE]] : (!cir.ptr<!cir.array<!s32i x 1>>, !s64i) -> !cir.ptr<!cir.array<!s32i x 1>>
-// CIR: cir.store{{.*}} %[[ARR_NEXT]], %[[ARR_PTR]] : !cir.ptr<!cir.array<!s32i x 1>>, !cir.ptr<!cir.ptr<!cir.array<!s32i x 1>>>
-// CIR: cir.yield
-// CIR: } while {
-// CIR: %[[ARR_CUR:.*]] = cir.load{{.*}} %[[ARR_PTR]] : !cir.ptr<!cir.ptr<!cir.array<!s32i x 1>>>, !cir.ptr<!cir.array<!s32i x 1>>
-// CIR: %[[CMP:.*]] = cir.cmp(ne, %[[ARR_CUR]], %[[ARR_END]]) : !cir.ptr<!cir.array<!s32i x 1>>, !cir.bool
-// CIR: cir.condition(%[[CMP]])
-// CIR: }
+// CIR: %[[CONST:.*]] = cir.const #cir.const_array<[#cir.const_array<[#cir.int<5> : !s32i]> : !cir.array<!s32i x 1>, #cir.zero : !cir.array<!s32i x 1>]> : !cir.array<!cir.array<!s32i x 1> x 2>
+// CIR: cir.store{{.*}} %[[CONST]], %[[ARR]] : !cir.array<!cir.array<!s32i x 1> x 2>, !cir.ptr<!cir.array<!cir.array<!s32i x 1> x 2>>
// LLVM: define{{.*}} void @_Z5func5v(){{.*}}
// LLVM: %[[ARR:.*]] = alloca [2 x [1 x i32]], i64 1, align 4
-// LLVM: %[[TMP:.*]] = alloca ptr, i64 1, align 8
-// LLVM: ...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, Thanks!
| // CIR: cir.store align(16) %[[ARR_INIT]], %[[ARR_ADDR]] | ||
| // CIR: cir.store align(8) %[[ARR_ADDR]], %[[RANGE_ADDR]] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| // CIR: cir.store align(16) %[[ARR_INIT]], %[[ARR_ADDR]] | |
| // CIR: cir.store align(8) %[[ARR_ADDR]], %[[RANGE_ADDR]] | |
| // CIR: cir.store{{.*}} %[[ARR_INIT]], %[[ARR_ADDR]] | |
| // CIR: cir.store{{.*}} %[[ARR_ADDR]], %[[RANGE_ADDR]] |
| // CIR: %[[CONST_0:.*]] = cir.const #cir.int<0> : !s32i | ||
| // CIR: cir.store{{.*}} %[[CONST_0]], %[[Y_ELEM_PTR]] : !s32i, !cir.ptr<!s32i> | ||
| // CIR: %[[CONST:.*]] = cir.const #cir.zero : !rec_Point | ||
| // CIR: cir.store align(4) %[[CONST]], %[[A_ADDR]] : !rec_Point, !cir.ptr<!rec_Point> |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| // CIR: cir.store align(4) %[[CONST]], %[[A_ADDR]] : !rec_Point, !cir.ptr<!rec_Point> | |
| // CIR: cir.store{{.*}} %[[CONST]], %[[A_ADDR]] : !rec_Point, !cir.ptr<!rec_Point> |
| // CIR: %[[ZERO:.*]] = cir.const #cir.zero : !rec_CompleteS | ||
| // CIR: cir.store{{.*}} %[[ZERO]], %[[A_ADDR]] : !rec_CompleteS, !cir.ptr<!rec_CompleteS> | ||
| // CIR: %[[CONST:.*]] = cir.const #cir.zero : !rec_CompleteS | ||
| // CIR: cir.store align(4) %[[CONST]], %[[A_ADDR]] : !rec_CompleteS, !cir.ptr<!rec_CompleteS> |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| // CIR: cir.store align(4) %[[CONST]], %[[A_ADDR]] : !rec_CompleteS, !cir.ptr<!rec_CompleteS> | |
| // CIR: cir.store{{.*}} %[[CONST]], %[[A_ADDR]] : !rec_CompleteS, !cir.ptr<!rec_CompleteS> |
| // CIR: %[[CONST_0:.*]] = cir.const #cir.int<0> : !s8i | ||
| // CIR: cir.store{{.*}} %[[CONST_0]], %[[A_ELEM_1_PTR]] : !s8i, !cir.ptr<!s8i> | ||
| // CIR: %[[CONST:.*]] = cir.const #cir.zero : !rec_CompleteS | ||
| // CIR: cir.store align(4) %[[CONST]], %[[A_ADDR]] : !rec_CompleteS, !cir.ptr<!rec_CompleteS> |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| // CIR: cir.store align(4) %[[CONST]], %[[A_ADDR]] : !rec_CompleteS, !cir.ptr<!rec_CompleteS> | |
| // CIR: cir.store{{.*}} %[[CONST]], %[[A_ADDR]] : !rec_CompleteS, !cir.ptr<!rec_CompleteS> |
This adds code that was previously missing from emitAutoVarAlloca to identify when an aggregate auto var is being emitted with a constant initializer, and the associated code that is called from emitAutoVarInit to store the constant. This allows significantly more efficient initialization.