[SYCL] Allow _Bitint of size greater than 128 bits when -fintelfpga is used (#6152)

premanandrao · web-flow · commit 51f2ea347316 · 2022-06-02T17:08:18.000+03:00
diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def
@@ -269,6 +269,7 @@ LANGOPT(GPUExcludeWrongSideOverloads, 1, 0, "always exclude wrong side overloads
 
 LANGOPT(SYCLIsDevice      , 1, 0, "Generate code for SYCL device")
 LANGOPT(SYCLIsHost        , 1, 0, "SYCL host compilation")
+LANGOPT(IntelFPGA         , 1, 0, "Perform ahead-of-time compilation for FPGA")
 LANGOPT(SYCLAllowFuncPtr  , 1, 0, "Allow function pointers in SYCL device code")
 LANGOPT(SYCLStdLayoutKernelParams, 1, 0, "Enable standard layout requirement for SYCL kernel parameters")
 LANGOPT(SYCLUnnamedLambda , 1, 0, "Allow unnamed lambda SYCL kernels")
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
@@ -2754,7 +2754,8 @@ defm strict_vtable_pointers : BoolFOption<"strict-vtable-pointers",
   NegFlag<SetFalse>>;
 def fstrict_overflow : Flag<["-"], "fstrict-overflow">, Group<f_Group>;
 def fintelfpga : Flag<["-"], "fintelfpga">, Group<f_Group>,
-  Flags<[CC1Option, CoreOption]>, HelpText<"Perform ahead of time compilation for FPGA">;
+  Flags<[CC1Option, CoreOption]>, MarshallingInfoFlag<LangOpts<"IntelFPGA">>,
+  HelpText<"Perform ahead-of-time compilation for FPGA">;
 def fsycl_device_only : Flag<["-"], "fsycl-device-only">, Flags<[CoreOption]>,
   HelpText<"Compile SYCL kernels for device">;
 def fsycl_targets_EQ : CommaJoined<["-"], "fsycl-targets=">, Flags<[NoXarchOption, CC1Option, CoreOption]>,
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -4965,8 +4965,11 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
         HasFPGA = true;
         break;
       }
-    if (HasFPGA)
+    if (HasFPGA) {
       CmdArgs.push_back("-fsycl-disable-range-rounding");
+      // Pass -fintelfpga to both the host and device SYCL compilations if set.
+      CmdArgs.push_back("-fintelfpga");
+    }
 
     // Add any options that are needed specific to SYCL offload while
     // performing the host side compilation.
diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
@@ -2292,7 +2292,7 @@ QualType Sema::BuildBitIntType(bool IsUnsigned, Expr *BitWidth,
   }
 
   const TargetInfo &TI = getASTContext().getTargetInfo();
-  if (NumBits > TI.getMaxBitIntWidth()) {
+  if (NumBits > TI.getMaxBitIntWidth() && !Context.getLangOpts().IntelFPGA) {
     Diag(Loc, diag::err_bit_int_max_size)
         << IsUnsigned << static_cast<uint64_t>(TI.getMaxBitIntWidth());
     return QualType();
diff --git a/clang/test/CodeGenSYCL/no-opaque-ptrs-sycl-host-intelfpga-bitint.cpp b/clang/test/CodeGenSYCL/no-opaque-ptrs-sycl-host-intelfpga-bitint.cpp
@@ -0,0 +1,23 @@
+// RUN: %clang_cc1 -no-opaque-pointers -fsycl-is-host -fintelfpga -triple x86_64 -emit-llvm %s -o - | FileCheck %s
+
+// This test checks that we generate appropriate code for division
+// operations of _BitInts of size greater than 128 bits, since it
+// is allowed when -fintelfpga is enabled.
+
+// CHECK: define{{.*}} void @_Z3fooDB211_S_(i211* {{.*}} sret(i211) align 8 %agg.result, i211* {{.*}} byval(i211) align 8 %[[ARG1:[0-9]+]], i211* {{.*}} byval(i211) align 8 %[[ARG2:[0-9]+]])
+signed _BitInt(211) foo(signed _BitInt(211) a, signed _BitInt(211) b) {
+  // CHECK: %[[VAR_A:a]].addr = alloca i211, align 8
+  // CHECK: %[[VAR_B:b]].addr = alloca i211, align 8
+  // CHECK: %[[VAR_A]] = load i211, i211* %[[ARG1]], align 8
+  // CHECK: %[[VAR_B]] = load i211, i211* %[[ARG2]], align 8
+  // CHECK: store i211 %[[VAR_A]], i211* %[[VAR_A]].addr, align 8
+  // CHECK: store i211 %[[VAR_B]], i211* %[[VAR_B]].addr, align 8
+  // CHECK: %[[TEMP1:[0-9]+]] = load i211, i211* %[[VAR_A]].addr, align 8
+  // CHECK: %[[TEMP2:[0-9]+]] = load i211, i211* %[[VAR_B]].addr, align 8
+  // CHECK: %div = sdiv i211 %[[TEMP1]], %[[TEMP2]]
+  // CHECK: store i211 %div, i211* %agg.result, align 8
+  // CHECK: %[[RES:[0-9+]]] = load i211, i211* %agg.result, align 8
+  // CHECK: store i211 %[[RES]], i211* %agg.result, align 8
+  // CHECK: ret void
+  return a / b;
+}
diff --git a/clang/test/CodeGenSYCL/no-opaque-ptrs-sycl-intelfpga-bitint.cpp b/clang/test/CodeGenSYCL/no-opaque-ptrs-sycl-intelfpga-bitint.cpp
@@ -0,0 +1,26 @@
+// RUN: %clang_cc1 -no-opaque-pointers -fsycl-is-device -fintelfpga -triple spir64_fpga -IInputs -emit-llvm %s -o - | FileCheck %s
+
+// This test checks that we generate appropriate code for division
+// operations of _BitInts of size greater than 128 bits, since it
+// is allowed when -fintelfpga is enabled.
+
+#include "Inputs/sycl.hpp"
+
+// CHECK: define{{.*}} void @_Z3fooDB211_S_(i211 addrspace(4)* {{.*}} sret(i211) align 8 %agg.result, i211* {{.*}} byval(i211) align 8 %[[ARG1:[0-9]+]], i211* {{.*}} byval(i211) align 8 %[[ARG2:[0-9]+]])
+signed _BitInt(211) foo(signed _BitInt(211) a, signed _BitInt(211) b) {
+  // CHECK: %[[VAR_A:a]] = load i211, i211* %[[ARG1]], align 8
+  // CHECK: %[[VAR_B:b]] = load i211, i211* %[[ARG2]], align 8
+  // CHECK: %[[RES:div]] = sdiv i211 %[[VAR_A]], %[[VAR_B]]
+  // CHECK: store i211 %[[RES]], i211 addrspace(4)* %agg.result, align 8
+  // CHECK: ret void
+  return a / b;
+}
+
+int main() {
+  sycl::handler h;
+  auto lambda = []() {
+    _BitInt(211) a, b = 3, c = 4;
+    a = foo(b, c);
+  };
+  h.single_task(lambda);
+}
diff --git a/clang/test/CodeGenSYCL/sycl-host-intelfpga-bitint.cpp b/clang/test/CodeGenSYCL/sycl-host-intelfpga-bitint.cpp
@@ -0,0 +1,23 @@
+// RUN: %clang_cc1 -opaque-pointers -fsycl-is-host -fintelfpga -triple x86_64 -emit-llvm %s -o - | FileCheck %s
+
+// This test checks that we generate appropriate code for division
+// operations of _BitInts of size greater than 128 bits, since it
+// is allowed when -fintelfpga is enabled.
+
+// CHECK: define{{.*}} void @_Z3fooDB211_S_(ptr {{.*}} sret(i211) align 8 %agg.result, ptr {{.*}} byval(i211) align 8 %[[ARG1:[0-9]+]], ptr {{.*}} byval(i211) align 8 %[[ARG2:[0-9]+]])
+signed _BitInt(211) foo(signed _BitInt(211) a, signed _BitInt(211) b) {
+  // CHECK: %[[VAR_A:a]].addr = alloca i211, align 8
+  // CHECK: %[[VAR_B:b]].addr = alloca i211, align 8
+  // CHECK: %[[VAR_A]] = load i211, ptr %[[ARG1]], align 8
+  // CHECK: %[[VAR_B]] = load i211, ptr %[[ARG2]], align 8
+  // CHECK: store i211 %[[VAR_A]], ptr %[[VAR_A]].addr, align 8
+  // CHECK: store i211 %[[VAR_B]], ptr %[[VAR_B]].addr, align 8
+  // CHECK: %[[TEMP1:[0-9]+]] = load i211, ptr %[[VAR_A]].addr, align 8
+  // CHECK: %[[TEMP2:[0-9]+]] = load i211, ptr %[[VAR_B]].addr, align 8
+  // CHECK: %div = sdiv i211 %[[TEMP1]], %[[TEMP2]]
+  // CHECK: store i211 %div, ptr %agg.result, align 8
+  // CHECK: %[[RES:[0-9+]]] = load i211, ptr %agg.result, align 8
+  // CHECK: store i211 %[[RES]], ptr %agg.result, align 8
+  // CHECK: ret void
+  return a / b;
+}
diff --git a/clang/test/CodeGenSYCL/sycl-intelfpga-bitint.cpp b/clang/test/CodeGenSYCL/sycl-intelfpga-bitint.cpp
@@ -0,0 +1,26 @@
+// RUN: %clang_cc1 -opaque-pointers -fsycl-is-device -fintelfpga -triple spir64_fpga -IInputs -emit-llvm %s -o - | FileCheck %s
+
+// This test checks that we generate appropriate code for division
+// operations of _BitInts of size greater than 128 bits, since it
+// is allowed when -fintelfpga is enabled.
+
+#include "Inputs/sycl.hpp"
+
+// CHECK: define{{.*}} void @_Z3fooDB211_S_(ptr addrspace(4) {{.*}} sret(i211) align 8 %agg.result, ptr {{.*}} byval(i211) align 8 %[[ARG1:[0-9]+]], ptr {{.*}} byval(i211) align 8 %[[ARG2:[0-9]+]])
+signed _BitInt(211) foo(signed _BitInt(211) a, signed _BitInt(211) b) {
+  // CHECK: %[[VAR_A:a]] = load i211, ptr %[[ARG1]], align 8
+  // CHECK: %[[VAR_B:b]] = load i211, ptr %[[ARG2]], align 8
+  // CHECK: %[[RES:div]] = sdiv i211 %[[VAR_A]], %[[VAR_B]]
+  // CHECK: store i211 %[[RES]], ptr addrspace(4) %agg.result, align 8
+  // CHECK: ret void
+  return a / b;
+}
+
+int main() {
+  sycl::handler h;
+  auto lambda = []() {
+    _BitInt(211) a, b = 3, c = 4;
+    a = foo(b, c);
+  };
+  h.single_task(lambda);
+}
diff --git a/clang/test/Driver/sycl-offload-intelfpga.cpp b/clang/test/Driver/sycl-offload-intelfpga.cpp
@@ -20,6 +20,12 @@
 // RUN:   | FileCheck -check-prefix=CHK-TOOLS-INTELFPGA-G0 %s
 // CHK-TOOLS-INTELFPGA-G0-NOT: clang{{.*}} "-debug-info-kind=constructor"
 
+/// -fintelfpga passes it to host and device cc1 compilations
+// RUN:   %clangxx -### -fsycl -fintelfpga %s 2>&1 \
+// RUN:   | FileCheck -check-prefix=CHK-HOST-DEVICE %s
+// CHK-HOST-DEVICE: clang{{.*}} "-cc1"{{.*}} "-fsycl-is-device"{{.*}} "-fintelfpga"
+// CHK-HOST-DEVICE: clang{{.*}} "-cc1"{{.*}} "-fintelfpga"{{.*}} "-fsycl-is-host"
+
 /// FPGA target implies -fsycl-disable-range-rounding
 // RUN:   %clangxx -### -target x86_64-unknown-linux-gnu -fsycl -fintelfpga %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHK-RANGE-ROUNDING %s
diff --git a/clang/test/SemaSYCL/sycl-intelfpga.cpp b/clang/test/SemaSYCL/sycl-intelfpga.cpp
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 -fsycl-is-device -fintelfpga -verify=device-intelfpga -fsyntax-only %s
+// RUN: %clang_cc1 -fsycl-is-host -fintelfpga -verify=host-intelfpga -fsyntax-only %s
+// RUN: %clang_cc1 -fsycl-is-device -verify=device -fsyntax-only %s
+// RUN: %clang_cc1 -fsycl-is-host -verify=host -fsyntax-only %s
+
+// Tests that we do not issue errors for _Bitints of size greater than 128
+// when -fintelfpga is enabled.  The backend is expected to be able to handle
+// this.  When -fintelfpga is not passed, we continue to diagnose.
+
+// device-intelfpga-no-diagnostics
+// host-intelfpga-no-diagnostics
+// device-error@+2 3{{signed _BitInt of bit sizes greater than 128 not supported}}
+// host-error@+1 3{{signed _BitInt of bit sizes greater than 128 not supported}}
+signed _BitInt(211) foo(signed _BitInt(211) a, signed _BitInt(211) b) {
+  return a / b;
+}

Original file line number	Diff line number	Diff line change
`@@ -2292,7 +2292,7 @@ QualType Sema::BuildBitIntType(bool IsUnsigned, Expr *BitWidth,`
`2292`	`2292`	`}`
`2293`	`2293`
`2294`	`2294`	`const TargetInfo &TI = getASTContext().getTargetInfo();`
`2295`		`- if (NumBits > TI.getMaxBitIntWidth()) {`
	`2295`	`+ if (NumBits > TI.getMaxBitIntWidth() && !Context.getLangOpts().IntelFPGA) {`
`2296`	`2296`	`Diag(Loc, diag::err_bit_int_max_size)`
`2297`	`2297`	`<< IsUnsigned << static_cast<uint64_t>(TI.getMaxBitIntWidth());`
`2298`	`2298`	`return QualType();`