Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/sycl' into sycl-devops-pr/udit…
Browse files Browse the repository at this point in the history
…/pvc_e2e
  • Loading branch information
uditagarwal97 committed Sep 16, 2024
2 parents cec74ff + 27fff01 commit 53c68cd
Show file tree
Hide file tree
Showing 13 changed files with 50 additions and 64 deletions.
2 changes: 1 addition & 1 deletion libclc/cmake/modules/AddLibclc.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,7 @@ macro(add_libclc_builtin_set arch_suffix)
IN_FILE ${builtins_link_lib}
OUT_DIR ${LIBCLC_LIBRARY_OUTPUT_INTDIR}
OPT_FLAGS ${ARG_OPT_FLAGS}
DEPENDENCIES ${builtins_link_lib_tgt})
DEPENDENCIES ${builtins_link_lib_tgt} ${LIBCLC_LIBRARY_OUTPUT_INTDIR})

# Add dependency to top-level pseudo target to ease making other
# targets dependent on libclc.
Expand Down
14 changes: 7 additions & 7 deletions sycl/cmake/modules/FetchUnifiedRuntime.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -117,13 +117,13 @@ if(SYCL_UR_USE_FETCH_CONTENT)
endfunction()

set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
# commit 2bbe952669861579ea84fa30f14e1ed27ead0692
# Merge: d357964a 6b353545
# Author: Omar Ahmed <omar.ahmed@codeplay.com>
# Date: Thu Sep 12 11:36:11 2024 +0100
# Merge pull request #1928 from isaacault/iault/image_array_copy
# [Bindless][Exp] Image Array Sub-Region Copies
set(UNIFIED_RUNTIME_TAG 2bbe952669861579ea84fa30f14e1ed27ead0692)
# commit f31160dea6d142014f441bc4ca5e58e48827490e
# Merge: 2bbe9526 64068799
# Author: Piotr Balcer <piotr.balcer@intel.com>
# Date: Thu Sep 12 14:19:48 2024 +0200
# Merge pull request #2083 from kswiecicki/xpti-init-fix
# Fix XPTI initialization bug
set(UNIFIED_RUNTIME_TAG f31160dea6d142014f441bc4ca5e58e48827490e)

set(UMF_BUILD_EXAMPLES OFF CACHE INTERNAL "EXAMPLES")
# Due to the use of dependentloadflag and no installer for UMF and hwloc we need
Expand Down
20 changes: 10 additions & 10 deletions sycl/include/std/experimental/simd.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1042,39 +1042,39 @@ inline constexpr overaligned_tag<_Np> overaligned{};

// traits [simd.traits]
template <class _Tp>
struct is_abi_tag : std::integral_constant<bool, false> {};
struct is_abi_tag : std::false_type {};

template <_StorageKind __kind, int _Np>
struct is_abi_tag<__simd_abi<__kind, _Np>>
: std::integral_constant<bool, true> {};
: std::true_type {};

template <class _Tp>
struct is_simd : std::integral_constant<bool, false> {};
struct is_simd : std::false_type {};

template <class _Tp, class _Abi>
struct is_simd<simd<_Tp, _Abi>> : std::integral_constant<bool, true> {};
struct is_simd<simd<_Tp, _Abi>> : std::true_type {};

template <class _Tp>
struct is_simd_mask : std::integral_constant<bool, false> {};
struct is_simd_mask : std::false_type {};

template <class _Tp, class _Abi>
struct is_simd_mask<simd_mask<_Tp, _Abi>> : std::integral_constant<bool, true> {
struct is_simd_mask<simd_mask<_Tp, _Abi>> : std::true_type {
};

template <class _Tp>
struct is_simd_flag_type : std::integral_constant<bool, false> {};
struct is_simd_flag_type : std::false_type {};

template <>
struct is_simd_flag_type<element_aligned_tag>
: std::integral_constant<bool, true> {};
: std::true_type {};

template <>
struct is_simd_flag_type<vector_aligned_tag>
: std::integral_constant<bool, true> {};
: std::true_type {};

template <size_t _Align>
struct is_simd_flag_type<overaligned_tag<_Align>>
: std::integral_constant<bool, true> {};
: std::true_type {};

template <class _Tp>
inline constexpr bool is_abi_tag_v = is_abi_tag<_Tp>::value;
Expand Down
12 changes: 6 additions & 6 deletions sycl/include/sycl/detail/helpers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,15 +127,15 @@ class Builder {
#ifdef __SYCL_DEVICE_ONLY__

template <int N>
using is_valid_dimensions = std::integral_constant<bool, (N > 0) && (N < 4)>;
static inline constexpr bool is_valid_dimensions = (N > 0) && (N < 4);

template <int Dims> static const id<Dims> getElement(id<Dims> *) {
static_assert(is_valid_dimensions<Dims>::value, "invalid dimensions");
static_assert(is_valid_dimensions<Dims>, "invalid dimensions");
return __spirv::initGlobalInvocationId<Dims, id<Dims>>();
}

template <int Dims> static const group<Dims> getElement(group<Dims> *) {
static_assert(is_valid_dimensions<Dims>::value, "invalid dimensions");
static_assert(is_valid_dimensions<Dims>, "invalid dimensions");
range<Dims> GlobalSize{__spirv::initGlobalSize<Dims, range<Dims>>()};
range<Dims> LocalSize{__spirv::initWorkgroupSize<Dims, range<Dims>>()};
range<Dims> GroupRange{__spirv::initNumWorkgroups<Dims, range<Dims>>()};
Expand All @@ -145,7 +145,7 @@ class Builder {

template <int Dims, bool WithOffset>
static std::enable_if_t<WithOffset, const item<Dims, WithOffset>> getItem() {
static_assert(is_valid_dimensions<Dims>::value, "invalid dimensions");
static_assert(is_valid_dimensions<Dims>, "invalid dimensions");
id<Dims> GlobalId{__spirv::initGlobalInvocationId<Dims, id<Dims>>()};
range<Dims> GlobalSize{__spirv::initGlobalSize<Dims, range<Dims>>()};
id<Dims> GlobalOffset{__spirv::initGlobalOffset<Dims, id<Dims>>()};
Expand All @@ -154,14 +154,14 @@ class Builder {

template <int Dims, bool WithOffset>
static std::enable_if_t<!WithOffset, const item<Dims, WithOffset>> getItem() {
static_assert(is_valid_dimensions<Dims>::value, "invalid dimensions");
static_assert(is_valid_dimensions<Dims>, "invalid dimensions");
id<Dims> GlobalId{__spirv::initGlobalInvocationId<Dims, id<Dims>>()};
range<Dims> GlobalSize{__spirv::initGlobalSize<Dims, range<Dims>>()};
return createItem<Dims, false>(GlobalSize, GlobalId);
}

template <int Dims> static const nd_item<Dims> getElement(nd_item<Dims> *) {
static_assert(is_valid_dimensions<Dims>::value, "invalid dimensions");
static_assert(is_valid_dimensions<Dims>, "invalid dimensions");
range<Dims> GlobalSize{__spirv::initGlobalSize<Dims, range<Dims>>()};
range<Dims> LocalSize{__spirv::initWorkgroupSize<Dims, range<Dims>>()};
range<Dims> GroupRange{__spirv::initNumWorkgroups<Dims, range<Dims>>()};
Expand Down
3 changes: 1 addition & 2 deletions sycl/include/sycl/detail/type_traits.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,7 @@ template <> struct is_sub_group<sycl::sub_group> : std::true_type {};

template <typename T>
struct is_generic_group
: std::integral_constant<bool,
is_group<T>::value || is_sub_group<T>::value> {};
: std::bool_constant<is_group<T>::value || is_sub_group<T>::value> {};
template <typename T>
inline constexpr bool is_generic_group_v = is_generic_group<T>::value;

Expand Down
2 changes: 1 addition & 1 deletion sycl/include/sycl/ext/oneapi/bfloat16.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ class bfloat16 {
explicit operator bool() { return to_float(value) != 0.0f; }

// Unary minus operator overloading
friend bfloat16 operator-(bfloat16 &lhs) {
friend bfloat16 operator-(const bfloat16 &lhs) {
#if defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) && \
(__SYCL_CUDA_ARCH__ >= 800)
detail::Bfloat16StorageT res;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,17 +35,15 @@ class complex<_Tp, typename std::enable_if_t<is_genfloat<_Tp>::value>>;
////////////////////////////////////////////////////////////////////////////////

template <class _Tp>
struct is_genfloat
: std::integral_constant<bool, std::is_same_v<_Tp, double> ||
std::is_same_v<_Tp, float> ||
std::is_same_v<_Tp, sycl::half>> {};
struct is_genfloat : std::bool_constant<std::is_same_v<_Tp, double> ||
std::is_same_v<_Tp, float> ||
std::is_same_v<_Tp, sycl::half>> {};

template <class _Tp>
struct is_gencomplex
: std::integral_constant<bool,
std::is_same_v<_Tp, complex<double>> ||
std::is_same_v<_Tp, complex<float>> ||
std::is_same_v<_Tp, complex<sycl::half>>> {};
: std::bool_constant<std::is_same_v<_Tp, complex<double>> ||
std::is_same_v<_Tp, complex<float>> ||
std::is_same_v<_Tp, complex<sycl::half>>> {};

////////////////////////////////////////////////////////////////////////////////
/// DEFINES
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,6 @@ namespace experimental {

namespace cplx::detail {

template <bool _Val> using _BoolConstant = std::integral_constant<bool, _Val>;

template <class _Tp, class _Up>
using _IsNotSame = _BoolConstant<!__is_same(_Tp, _Up)>;

template <class _Tp> struct __numeric_type {
static void __test(...);
static sycl::half __test(sycl::half);
Expand All @@ -46,7 +41,7 @@ template <class _Tp> struct __numeric_type {
static double __test(double);

typedef decltype(__test(std::declval<_Tp>())) type;
static const bool value = _IsNotSame<type, void>::value;
static const bool value = !std::is_same_v<type, void>;
};

template <> struct __numeric_type<void> {
Expand Down
5 changes: 2 additions & 3 deletions sycl/include/sycl/ext/oneapi/experimental/group_sort.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,8 @@ struct is_sorter_impl {
std::declval<G>(), std::declval<Val>()))>;

template <typename G = Group>
static decltype(std::integral_constant<bool,
is_expected_return_type<G>::value &&
sycl::is_group_v<G>>{})
static decltype(std::bool_constant<is_expected_return_type<G>::value &&
sycl::is_group_v<G>>{})
test(int);

template <typename = Group> static std::false_type test(...);
Expand Down
4 changes: 2 additions & 2 deletions sycl/include/sycl/group_algorithm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,8 +124,8 @@ struct is_complex : public std::false_type {};
// ---- is_arithmetic_or_complex
template <typename T>
using is_arithmetic_or_complex =
std::integral_constant<bool, sycl::detail::is_complex<T>::value ||
sycl::detail::is_arithmetic<T>::value>;
std::bool_constant<sycl::detail::is_complex<T>::value ||
sycl::detail::is_arithmetic<T>::value>;

template <typename T>
struct is_vector_arithmetic_or_complex
Expand Down
14 changes: 6 additions & 8 deletions sycl/test-e2e/Basic/launch_queries/max_num_work_groups.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,8 @@ int test_max_num_work_groups(sycl::queue &q, const sycl::device &dev) {

const size_t maxWorkGroupSize =
kernel.template get_info<work_group_size>(dev);
const size_t NumWorkItems = maxWorkGroupSize * maxWorkGroupSize;
// Will try to launch 2 work groups.
const size_t NumWorkItems = maxWorkGroupSize * 2;

size_t workGroupSize = 32;
size_t localMemorySizeInBytes{0};
Expand Down Expand Up @@ -100,8 +101,6 @@ int test_max_num_work_groups(sycl::queue &q, const sycl::device &dev) {
// ===================== //
// We must have at least one active group if we are below resource limits.
assert(maxWGs > 0 && "max_num_work_groups query failed");
if (maxWGs == 0)
return 1;

// Run the kernel
auto launch_range = sycl::nd_range<1>{sycl::range<1>{NumWorkItems},
Expand All @@ -121,7 +120,6 @@ int test_max_num_work_groups(sycl::queue &q, const sycl::device &dev) {
// ========================== //
// Test 3 - use max resources //
// ========================== //
// A little over the maximum work-group size for the purpose of exceeding.
workGroupSize = maxWorkGroupSize;
workGroupRange[0] = workGroupSize;
size_t localSize =
Expand All @@ -133,9 +131,8 @@ int test_max_num_work_groups(sycl::queue &q, const sycl::device &dev) {
syclex::info::kernel_queue_specific::max_num_work_groups>(
q, workGroupRange, localMemorySizeInBytes);

// We must have at least one active group if we are at resource limits.
assert(maxWGs > 0 && "max_num_work_groups query failed");
if (maxWGs == 0)
return 1;

launch_range = sycl::nd_range<1>{sycl::range<1>{NumWorkItems},
sycl::range<1>{workGroupSize}};
Expand All @@ -155,6 +152,7 @@ int test_max_num_work_groups(sycl::queue &q, const sycl::device &dev) {
// =============================== //
// Test 4 - exceed resource limits //
// =============================== //
// A little over the maximum work-group size for the purpose of exceeding.
workGroupSize = maxWorkGroupSize + 32;
workGroupRange[0] = workGroupSize;
maxWGs = kernel.template ext_oneapi_get_info<
Expand All @@ -163,10 +161,10 @@ int test_max_num_work_groups(sycl::queue &q, const sycl::device &dev) {
// It cannot be possible to launch a kernel successfully with a configuration
// that exceeds the available resources as in the above defined workGroupSize.
// workGroupSize is larger than maxWorkGroupSize, hence maxWGs must equal 0.
// Note: Level-Zero currently always returns a non-zero value. While other
// backends (i.e., OpenCL, HIP) always return 1 in their implementations.
if (dev.get_backend() == sycl::backend::ext_oneapi_cuda) {
assert(maxWGs == 0 && "max_num_work_groups query failed");
if (maxWGs > 0)
return 1;
}

// As we ensured that the 'max_num_work_groups' query correctly
Expand Down
5 changes: 1 addition & 4 deletions sycl/test-e2e/OneapiDeviceSelector/no_duplicate_devices.cpp
Original file line number Diff line number Diff line change
@@ -1,12 +1,9 @@
// REQUIRES: opencl, cpu
// RUN: %{build} -o %t.out
// RUN: env ONEAPI_DEVICE_SELECTOR="opencl:*" %{run-unfiltered-devices} %t.out 1 &> tmp.txt
// RUN: env ONEAPI_DEVICE_SELECTOR="opencl:*" %{run-unfiltered-devices} %t.out 1 > tmp.txt
// RUN: cat tmp.txt | env ONEAPI_DEVICE_SELECTOR="opencl:*,cpu" %{run-unfiltered-devices} %t.out
// RUN: cat tmp.txt | env ONEAPI_DEVICE_SELECTOR="opencl:cpu,cpu" %{run-unfiltered-devices} %t.out

// https://github.com/intel/llvm/issues/15288
// XFAIL: linux && gpu-intel-gen12

// on the first run we pass a dummy arg to the app. On seeing that, we count the
// number of CPU devices and output it. That is piped to a file. On subsequent
// runs we cat the file and pipe that to app. The app then compares the number
Expand Down
12 changes: 6 additions & 6 deletions sycl/test/check_device_code/vector/vector_math_ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -341,26 +341,26 @@ SYCL_EXTERNAL auto TestNegation(vec<ext::oneapi::bfloat16, 3> a) { return !a; }
// CHECK-NEXT: [[REF_TMP_I:%.*]] = alloca float, align 4
// CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(4)
// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META100:![0-9]+]])
// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[REF_TMP_I]])
// CHECK-NEXT: [[REF_TMP_ASCAST_I:%.*]] = addrspacecast ptr [[REF_TMP_I]] to ptr addrspace(4)
// CHECK-NEXT: tail call void @llvm.memset.p4.i64(ptr addrspace(4) noundef align 32 dereferenceable(32) [[AGG_RESULT]], i8 0, i64 32, i1 false), !alias.scope [[META100]]
// CHECK-NEXT: [[REF_TMP_ASCAST_I:%.*]] = addrspacecast ptr [[REF_TMP_I]] to ptr addrspace(4)
// CHECK-NEXT: br label [[FOR_COND_I:%.*]]
// CHECK: for.cond.i:
// CHECK-NEXT: [[I_0_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ]
// CHECK-NEXT: [[CMP_I:%.*]] = icmp ult i64 [[I_0_I]], 16
// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V16DETAILNGERKNS0_3VECINS0_3EXT6ONEAPI8BFLOAT16ELI16EEE_EXIT:%.*]]
// CHECK: for.body.i:
// CHECK-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds [16 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[A_ASCAST]], i64 0, i64 [[I_0_I]]
// CHECK-NEXT: [[CALL_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) [[ARRAYIDX_I_I_I_I]]) #[[ATTR8]], !noalias [[META100]]
// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[REF_TMP_I]])
// CHECK-NEXT: [[CALL_I_I_I:%.*]] = call spir_func float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) [[ARRAYIDX_I_I_I_I]]) #[[ATTR8]], !noalias [[META103:![0-9]+]]
// CHECK-NEXT: [[FNEG_I:%.*]] = fneg float [[CALL_I_I_I]]
// CHECK-NEXT: store float [[FNEG_I]], ptr [[REF_TMP_I]], align 4, !tbaa [[TBAA47]], !noalias [[META100]]
// CHECK-NEXT: store float [[FNEG_I]], ptr [[REF_TMP_I]], align 4, !tbaa [[TBAA47]], !noalias [[META103]]
// CHECK-NEXT: [[CALL_I_I10_I:%.*]] = call spir_func noundef zeroext i16 @__devicelib_ConvertFToBF16INTEL(ptr addrspace(4) noundef align 4 dereferenceable(4) [[REF_TMP_ASCAST_I]]) #[[ATTR8]], !noalias [[META103]]
// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[REF_TMP_I]])
// CHECK-NEXT: [[ARRAYIDX_I_I_I9_I:%.*]] = getelementptr inbounds [16 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[AGG_RESULT]], i64 0, i64 [[I_0_I]]
// CHECK-NEXT: [[CALL_I_I10_I:%.*]] = call spir_func noundef zeroext i16 @__devicelib_ConvertFToBF16INTEL(ptr addrspace(4) noundef align 4 dereferenceable(4) [[REF_TMP_ASCAST_I]]) #[[ATTR8]], !noalias [[META100]]
// CHECK-NEXT: store i16 [[CALL_I_I10_I]], ptr addrspace(4) [[ARRAYIDX_I_I_I9_I]], align 2, !tbaa [[TBAA103:![0-9]+]], !alias.scope [[META100]]
// CHECK-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1
// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP105:![0-9]+]]
// CHECK: _ZN4sycl3_V16detailngERKNS0_3vecINS0_3ext6oneapi8bfloat16ELi16EEE.exit:
// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[REF_TMP_I]])
// CHECK-NEXT: ret void
//
SYCL_EXTERNAL auto TestMinus(vec<ext::oneapi::bfloat16, 16> a) { return -a; }

0 comments on commit 53c68cd

Please sign in to comment.