diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..e24ff034 --- /dev/null +++ b/.gitignore @@ -0,0 +1,28 @@ +.klocwork/ +.vs/ +.vscode/ +_bin/ +_bin32/ +_bin64/ +build/ +build32/ +build64/ +Builds/ +config/*.png +install/ +Makefile +*~ +*.aps +*.bmp +*.ncb +*.opensdf +*.pal +*.sdf +*.sln +*.suo +*.vcproj +*.vcxproj +*.vcxproj.filters +*.vcxproj.user +*.xcf + diff --git a/Android.mk b/Android.mk new file mode 100644 index 00000000..63d82a7e --- /dev/null +++ b/Android.mk @@ -0,0 +1,23 @@ +LOCAL_PATH := $(call my-dir) + +#================# + +include $(CLEAR_VARS) + +APP_STL := stlport_static + +LOCAL_MODULE := clIntercept + +#LOCAL_CPPFLAGS += -std=c++11 + +LOCAL_SHARED_LIBRARIES := libdl liblog +LOCAL_SRC_FILES += Src/dispatch.cpp Src/enummap.cpp Src/intercept.cpp Src/main.cpp Src/stubs.cpp +LOCAL_SRC_FILES += OS/OS_linux_common.cpp OS/OS_linux.cpp + +LOCAL_C_INCLUDES += $(LOCAL_PATH)/Src + + +include external/libcxx/libcxx.mk + +include $(BUILD_SHARED_LIBRARY) + diff --git a/CL/cl.h b/CL/cl.h new file mode 100644 index 00000000..f217e554 --- /dev/null +++ b/CL/cl.h @@ -0,0 +1,1478 @@ +/******************************************************************************* + * Copyright (c) 2008-2015 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS + * KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS + * SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT + * https://www.khronos.org/registry/ + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + ******************************************************************************/ + +#ifndef __OPENCL_CL_H +#define __OPENCL_CL_H + +// Unlike the Khronos header file, we want to unconditonally include the +// CLIntercept cl_platform.h, and not the system cl_platform.h. +#if 0 +#ifdef __APPLE__ +#include +#else +#include +#endif +#else +#include "CL/cl_platform.h" +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/******************************************************************************/ + +typedef struct _cl_platform_id * cl_platform_id; +typedef struct _cl_device_id * cl_device_id; +typedef struct _cl_context * cl_context; +typedef struct _cl_command_queue * cl_command_queue; +typedef struct _cl_mem * cl_mem; +typedef struct _cl_program * cl_program; +typedef struct _cl_kernel * cl_kernel; +typedef struct _cl_event * cl_event; +typedef struct _cl_sampler * cl_sampler; + +typedef cl_uint cl_bool; /* WARNING! Unlike cl_ types in cl_platform.h, cl_bool is not guaranteed to be the same size as the bool in kernels. */ +typedef cl_ulong cl_bitfield; +typedef cl_bitfield cl_device_type; +typedef cl_uint cl_platform_info; +typedef cl_uint cl_device_info; +typedef cl_bitfield cl_device_fp_config; +typedef cl_uint cl_device_mem_cache_type; +typedef cl_uint cl_device_local_mem_type; +typedef cl_bitfield cl_device_exec_capabilities; +typedef cl_bitfield cl_device_svm_capabilities; +typedef cl_bitfield cl_command_queue_properties; +typedef intptr_t cl_device_partition_property; +typedef cl_bitfield cl_device_affinity_domain; + +typedef intptr_t cl_context_properties; +typedef cl_uint cl_context_info; +typedef cl_bitfield cl_queue_properties; +typedef cl_uint cl_command_queue_info; +typedef cl_uint cl_channel_order; +typedef cl_uint cl_channel_type; +typedef cl_bitfield cl_mem_flags; +typedef cl_bitfield cl_svm_mem_flags; +typedef cl_uint cl_mem_object_type; +typedef cl_uint cl_mem_info; +typedef cl_bitfield cl_mem_migration_flags; +typedef cl_uint cl_image_info; +typedef cl_uint cl_buffer_create_type; +typedef cl_uint cl_addressing_mode; +typedef cl_uint cl_filter_mode; +typedef cl_uint cl_sampler_info; +typedef cl_bitfield cl_map_flags; +typedef intptr_t cl_pipe_properties; +typedef cl_uint cl_pipe_info; +typedef cl_uint cl_program_info; +typedef cl_uint cl_program_build_info; +typedef cl_uint cl_program_binary_type; +typedef cl_int cl_build_status; +typedef cl_uint cl_kernel_info; +typedef cl_uint cl_kernel_arg_info; +typedef cl_uint cl_kernel_arg_address_qualifier; +typedef cl_uint cl_kernel_arg_access_qualifier; +typedef cl_bitfield cl_kernel_arg_type_qualifier; +typedef cl_uint cl_kernel_work_group_info; +typedef cl_uint cl_kernel_sub_group_info; +typedef cl_uint cl_event_info; +typedef cl_uint cl_command_type; +typedef cl_uint cl_profiling_info; +typedef cl_bitfield cl_sampler_properties; +typedef cl_uint cl_kernel_exec_info; + +typedef struct _cl_image_format { + cl_channel_order image_channel_order; + cl_channel_type image_channel_data_type; +} cl_image_format; + +typedef struct _cl_image_desc { + cl_mem_object_type image_type; + size_t image_width; + size_t image_height; + size_t image_depth; + size_t image_array_size; + size_t image_row_pitch; + size_t image_slice_pitch; + cl_uint num_mip_levels; + cl_uint num_samples; + cl_mem mem_object; +} cl_image_desc; + +typedef struct _cl_buffer_region { + size_t origin; + size_t size; +} cl_buffer_region; + +/******************************************************************************/ + +/* Error Codes */ +#define CL_SUCCESS 0 +#define CL_DEVICE_NOT_FOUND -1 +#define CL_DEVICE_NOT_AVAILABLE -2 +#define CL_COMPILER_NOT_AVAILABLE -3 +#define CL_MEM_OBJECT_ALLOCATION_FAILURE -4 +#define CL_OUT_OF_RESOURCES -5 +#define CL_OUT_OF_HOST_MEMORY -6 +#define CL_PROFILING_INFO_NOT_AVAILABLE -7 +#define CL_MEM_COPY_OVERLAP -8 +#define CL_IMAGE_FORMAT_MISMATCH -9 +#define CL_IMAGE_FORMAT_NOT_SUPPORTED -10 +#define CL_BUILD_PROGRAM_FAILURE -11 +#define CL_MAP_FAILURE -12 +#define CL_MISALIGNED_SUB_BUFFER_OFFSET -13 +#define CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST -14 +#define CL_COMPILE_PROGRAM_FAILURE -15 +#define CL_LINKER_NOT_AVAILABLE -16 +#define CL_LINK_PROGRAM_FAILURE -17 +#define CL_DEVICE_PARTITION_FAILED -18 +#define CL_KERNEL_ARG_INFO_NOT_AVAILABLE -19 + +#define CL_INVALID_VALUE -30 +#define CL_INVALID_DEVICE_TYPE -31 +#define CL_INVALID_PLATFORM -32 +#define CL_INVALID_DEVICE -33 +#define CL_INVALID_CONTEXT -34 +#define CL_INVALID_QUEUE_PROPERTIES -35 +#define CL_INVALID_COMMAND_QUEUE -36 +#define CL_INVALID_HOST_PTR -37 +#define CL_INVALID_MEM_OBJECT -38 +#define CL_INVALID_IMAGE_FORMAT_DESCRIPTOR -39 +#define CL_INVALID_IMAGE_SIZE -40 +#define CL_INVALID_SAMPLER -41 +#define CL_INVALID_BINARY -42 +#define CL_INVALID_BUILD_OPTIONS -43 +#define CL_INVALID_PROGRAM -44 +#define CL_INVALID_PROGRAM_EXECUTABLE -45 +#define CL_INVALID_KERNEL_NAME -46 +#define CL_INVALID_KERNEL_DEFINITION -47 +#define CL_INVALID_KERNEL -48 +#define CL_INVALID_ARG_INDEX -49 +#define CL_INVALID_ARG_VALUE -50 +#define CL_INVALID_ARG_SIZE -51 +#define CL_INVALID_KERNEL_ARGS -52 +#define CL_INVALID_WORK_DIMENSION -53 +#define CL_INVALID_WORK_GROUP_SIZE -54 +#define CL_INVALID_WORK_ITEM_SIZE -55 +#define CL_INVALID_GLOBAL_OFFSET -56 +#define CL_INVALID_EVENT_WAIT_LIST -57 +#define CL_INVALID_EVENT -58 +#define CL_INVALID_OPERATION -59 +#define CL_INVALID_GL_OBJECT -60 +#define CL_INVALID_BUFFER_SIZE -61 +#define CL_INVALID_MIP_LEVEL -62 +#define CL_INVALID_GLOBAL_WORK_SIZE -63 +#define CL_INVALID_PROPERTY -64 +#define CL_INVALID_IMAGE_DESCRIPTOR -65 +#define CL_INVALID_COMPILER_OPTIONS -66 +#define CL_INVALID_LINKER_OPTIONS -67 +#define CL_INVALID_DEVICE_PARTITION_COUNT -68 +#define CL_INVALID_PIPE_SIZE -69 +#define CL_INVALID_DEVICE_QUEUE -70 +#define CL_INVALID_SPEC_ID -71 +#define CL_MAX_SIZE_RESTRICTION_EXCEEDED -72 + +/* OpenCL Version */ +#define CL_VERSION_1_0 1 +#define CL_VERSION_1_1 1 +#define CL_VERSION_1_2 1 +#define CL_VERSION_2_0 1 +#define CL_VERSION_2_1 1 +#define CL_VERSION_2_2 1 + +/* cl_bool */ +#define CL_FALSE 0 +#define CL_TRUE 1 +#define CL_BLOCKING CL_TRUE +#define CL_NON_BLOCKING CL_FALSE + +/* cl_platform_info */ +#define CL_PLATFORM_PROFILE 0x0900 +#define CL_PLATFORM_VERSION 0x0901 +#define CL_PLATFORM_NAME 0x0902 +#define CL_PLATFORM_VENDOR 0x0903 +#define CL_PLATFORM_EXTENSIONS 0x0904 +#define CL_PLATFORM_HOST_TIMER_RESOLUTION 0x0905 + +/* cl_device_type - bitfield */ +#define CL_DEVICE_TYPE_DEFAULT (1 << 0) +#define CL_DEVICE_TYPE_CPU (1 << 1) +#define CL_DEVICE_TYPE_GPU (1 << 2) +#define CL_DEVICE_TYPE_ACCELERATOR (1 << 3) +#define CL_DEVICE_TYPE_CUSTOM (1 << 4) +#define CL_DEVICE_TYPE_ALL 0xFFFFFFFF + +/* cl_device_info */ +#define CL_DEVICE_TYPE 0x1000 +#define CL_DEVICE_VENDOR_ID 0x1001 +#define CL_DEVICE_MAX_COMPUTE_UNITS 0x1002 +#define CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS 0x1003 +#define CL_DEVICE_MAX_WORK_GROUP_SIZE 0x1004 +#define CL_DEVICE_MAX_WORK_ITEM_SIZES 0x1005 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR 0x1006 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT 0x1007 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT 0x1008 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG 0x1009 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT 0x100A +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE 0x100B +#define CL_DEVICE_MAX_CLOCK_FREQUENCY 0x100C +#define CL_DEVICE_ADDRESS_BITS 0x100D +#define CL_DEVICE_MAX_READ_IMAGE_ARGS 0x100E +#define CL_DEVICE_MAX_WRITE_IMAGE_ARGS 0x100F +#define CL_DEVICE_MAX_MEM_ALLOC_SIZE 0x1010 +#define CL_DEVICE_IMAGE2D_MAX_WIDTH 0x1011 +#define CL_DEVICE_IMAGE2D_MAX_HEIGHT 0x1012 +#define CL_DEVICE_IMAGE3D_MAX_WIDTH 0x1013 +#define CL_DEVICE_IMAGE3D_MAX_HEIGHT 0x1014 +#define CL_DEVICE_IMAGE3D_MAX_DEPTH 0x1015 +#define CL_DEVICE_IMAGE_SUPPORT 0x1016 +#define CL_DEVICE_MAX_PARAMETER_SIZE 0x1017 +#define CL_DEVICE_MAX_SAMPLERS 0x1018 +#define CL_DEVICE_MEM_BASE_ADDR_ALIGN 0x1019 +#define CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE 0x101A +#define CL_DEVICE_SINGLE_FP_CONFIG 0x101B +#define CL_DEVICE_GLOBAL_MEM_CACHE_TYPE 0x101C +#define CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE 0x101D +#define CL_DEVICE_GLOBAL_MEM_CACHE_SIZE 0x101E +#define CL_DEVICE_GLOBAL_MEM_SIZE 0x101F +#define CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE 0x1020 +#define CL_DEVICE_MAX_CONSTANT_ARGS 0x1021 +#define CL_DEVICE_LOCAL_MEM_TYPE 0x1022 +#define CL_DEVICE_LOCAL_MEM_SIZE 0x1023 +#define CL_DEVICE_ERROR_CORRECTION_SUPPORT 0x1024 +#define CL_DEVICE_PROFILING_TIMER_RESOLUTION 0x1025 +#define CL_DEVICE_ENDIAN_LITTLE 0x1026 +#define CL_DEVICE_AVAILABLE 0x1027 +#define CL_DEVICE_COMPILER_AVAILABLE 0x1028 +#define CL_DEVICE_EXECUTION_CAPABILITIES 0x1029 +#define CL_DEVICE_QUEUE_PROPERTIES 0x102A /* deprecated */ +#define CL_DEVICE_QUEUE_ON_HOST_PROPERTIES 0x102A +#define CL_DEVICE_NAME 0x102B +#define CL_DEVICE_VENDOR 0x102C +#define CL_DRIVER_VERSION 0x102D +#define CL_DEVICE_PROFILE 0x102E +#define CL_DEVICE_VERSION 0x102F +#define CL_DEVICE_EXTENSIONS 0x1030 +#define CL_DEVICE_PLATFORM 0x1031 +#define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032 +#define CL_DEVICE_HALF_FP_CONFIG 0x1033 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF 0x1034 +#define CL_DEVICE_HOST_UNIFIED_MEMORY 0x1035 /* deprecated */ +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR 0x1036 +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT 0x1037 +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_INT 0x1038 +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG 0x1039 +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT 0x103A +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE 0x103B +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF 0x103C +#define CL_DEVICE_OPENCL_C_VERSION 0x103D +#define CL_DEVICE_LINKER_AVAILABLE 0x103E +#define CL_DEVICE_BUILT_IN_KERNELS 0x103F +#define CL_DEVICE_IMAGE_MAX_BUFFER_SIZE 0x1040 +#define CL_DEVICE_IMAGE_MAX_ARRAY_SIZE 0x1041 +#define CL_DEVICE_PARENT_DEVICE 0x1042 +#define CL_DEVICE_PARTITION_MAX_SUB_DEVICES 0x1043 +#define CL_DEVICE_PARTITION_PROPERTIES 0x1044 +#define CL_DEVICE_PARTITION_AFFINITY_DOMAIN 0x1045 +#define CL_DEVICE_PARTITION_TYPE 0x1046 +#define CL_DEVICE_REFERENCE_COUNT 0x1047 +#define CL_DEVICE_PREFERRED_INTEROP_USER_SYNC 0x1048 +#define CL_DEVICE_PRINTF_BUFFER_SIZE 0x1049 +#define CL_DEVICE_IMAGE_PITCH_ALIGNMENT 0x104A +#define CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT 0x104B +#define CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS 0x104C +#define CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE 0x104D +#define CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES 0x104E +#define CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE 0x104F +#define CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE 0x1050 +#define CL_DEVICE_MAX_ON_DEVICE_QUEUES 0x1051 +#define CL_DEVICE_MAX_ON_DEVICE_EVENTS 0x1052 +#define CL_DEVICE_SVM_CAPABILITIES 0x1053 +#define CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE 0x1054 +#define CL_DEVICE_MAX_PIPE_ARGS 0x1055 +#define CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS 0x1056 +#define CL_DEVICE_PIPE_MAX_PACKET_SIZE 0x1057 +#define CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT 0x1058 +#define CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT 0x1059 +#define CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT 0x105A +#define CL_DEVICE_IL_VERSION 0x105B +#define CL_DEVICE_MAX_NUM_SUB_GROUPS 0x105C +#define CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS 0x105D + +/* cl_device_fp_config - bitfield */ +#define CL_FP_DENORM (1 << 0) +#define CL_FP_INF_NAN (1 << 1) +#define CL_FP_ROUND_TO_NEAREST (1 << 2) +#define CL_FP_ROUND_TO_ZERO (1 << 3) +#define CL_FP_ROUND_TO_INF (1 << 4) +#define CL_FP_FMA (1 << 5) +#define CL_FP_SOFT_FLOAT (1 << 6) +#define CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT (1 << 7) + +/* cl_device_mem_cache_type */ +#define CL_NONE 0x0 +#define CL_READ_ONLY_CACHE 0x1 +#define CL_READ_WRITE_CACHE 0x2 + +/* cl_device_local_mem_type */ +#define CL_LOCAL 0x1 +#define CL_GLOBAL 0x2 + +/* cl_device_exec_capabilities - bitfield */ +#define CL_EXEC_KERNEL (1 << 0) +#define CL_EXEC_NATIVE_KERNEL (1 << 1) + +/* cl_command_queue_properties - bitfield */ +#define CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE (1 << 0) +#define CL_QUEUE_PROFILING_ENABLE (1 << 1) +#define CL_QUEUE_ON_DEVICE (1 << 2) +#define CL_QUEUE_ON_DEVICE_DEFAULT (1 << 3) + +/* cl_context_info */ +#define CL_CONTEXT_REFERENCE_COUNT 0x1080 +#define CL_CONTEXT_DEVICES 0x1081 +#define CL_CONTEXT_PROPERTIES 0x1082 +#define CL_CONTEXT_NUM_DEVICES 0x1083 + +/* cl_context_properties */ +#define CL_CONTEXT_PLATFORM 0x1084 +#define CL_CONTEXT_INTEROP_USER_SYNC 0x1085 + +/* cl_device_partition_property */ +#define CL_DEVICE_PARTITION_EQUALLY 0x1086 +#define CL_DEVICE_PARTITION_BY_COUNTS 0x1087 +#define CL_DEVICE_PARTITION_BY_COUNTS_LIST_END 0x0 +#define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN 0x1088 + +/* cl_device_affinity_domain */ +#define CL_DEVICE_AFFINITY_DOMAIN_NUMA (1 << 0) +#define CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE (1 << 1) +#define CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE (1 << 2) +#define CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE (1 << 3) +#define CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE (1 << 4) +#define CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE (1 << 5) + +/* cl_device_svm_capabilities */ +#define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER (1 << 0) +#define CL_DEVICE_SVM_FINE_GRAIN_BUFFER (1 << 1) +#define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM (1 << 2) +#define CL_DEVICE_SVM_ATOMICS (1 << 3) + +/* cl_command_queue_info */ +#define CL_QUEUE_CONTEXT 0x1090 +#define CL_QUEUE_DEVICE 0x1091 +#define CL_QUEUE_REFERENCE_COUNT 0x1092 +#define CL_QUEUE_PROPERTIES 0x1093 +#define CL_QUEUE_SIZE 0x1094 +#define CL_QUEUE_DEVICE_DEFAULT 0x1095 + +/* cl_mem_flags and cl_svm_mem_flags - bitfield */ +#define CL_MEM_READ_WRITE (1 << 0) +#define CL_MEM_WRITE_ONLY (1 << 1) +#define CL_MEM_READ_ONLY (1 << 2) +#define CL_MEM_USE_HOST_PTR (1 << 3) +#define CL_MEM_ALLOC_HOST_PTR (1 << 4) +#define CL_MEM_COPY_HOST_PTR (1 << 5) +/* reserved (1 << 6) */ +#define CL_MEM_HOST_WRITE_ONLY (1 << 7) +#define CL_MEM_HOST_READ_ONLY (1 << 8) +#define CL_MEM_HOST_NO_ACCESS (1 << 9) +#define CL_MEM_SVM_FINE_GRAIN_BUFFER (1 << 10) /* used by cl_svm_mem_flags only */ +#define CL_MEM_SVM_ATOMICS (1 << 11) /* used by cl_svm_mem_flags only */ +#define CL_MEM_KERNEL_READ_AND_WRITE (1 << 12) + +/* cl_mem_migration_flags - bitfield */ +#define CL_MIGRATE_MEM_OBJECT_HOST (1 << 0) +#define CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED (1 << 1) + +/* cl_channel_order */ +#define CL_R 0x10B0 +#define CL_A 0x10B1 +#define CL_RG 0x10B2 +#define CL_RA 0x10B3 +#define CL_RGB 0x10B4 +#define CL_RGBA 0x10B5 +#define CL_BGRA 0x10B6 +#define CL_ARGB 0x10B7 +#define CL_INTENSITY 0x10B8 +#define CL_LUMINANCE 0x10B9 +#define CL_Rx 0x10BA +#define CL_RGx 0x10BB +#define CL_RGBx 0x10BC +#define CL_DEPTH 0x10BD +#define CL_DEPTH_STENCIL 0x10BE +#define CL_sRGB 0x10BF +#define CL_sRGBx 0x10C0 +#define CL_sRGBA 0x10C1 +#define CL_sBGRA 0x10C2 +#define CL_ABGR 0x10C3 + +/* cl_channel_type */ +#define CL_SNORM_INT8 0x10D0 +#define CL_SNORM_INT16 0x10D1 +#define CL_UNORM_INT8 0x10D2 +#define CL_UNORM_INT16 0x10D3 +#define CL_UNORM_SHORT_565 0x10D4 +#define CL_UNORM_SHORT_555 0x10D5 +#define CL_UNORM_INT_101010 0x10D6 +#define CL_SIGNED_INT8 0x10D7 +#define CL_SIGNED_INT16 0x10D8 +#define CL_SIGNED_INT32 0x10D9 +#define CL_UNSIGNED_INT8 0x10DA +#define CL_UNSIGNED_INT16 0x10DB +#define CL_UNSIGNED_INT32 0x10DC +#define CL_HALF_FLOAT 0x10DD +#define CL_FLOAT 0x10DE +#define CL_UNORM_INT24 0x10DF +#define CL_UNORM_INT_101010_2 0x10E0 + +/* cl_mem_object_type */ +#define CL_MEM_OBJECT_BUFFER 0x10F0 +#define CL_MEM_OBJECT_IMAGE2D 0x10F1 +#define CL_MEM_OBJECT_IMAGE3D 0x10F2 +#define CL_MEM_OBJECT_IMAGE2D_ARRAY 0x10F3 +#define CL_MEM_OBJECT_IMAGE1D 0x10F4 +#define CL_MEM_OBJECT_IMAGE1D_ARRAY 0x10F5 +#define CL_MEM_OBJECT_IMAGE1D_BUFFER 0x10F6 +#define CL_MEM_OBJECT_PIPE 0x10F7 + +/* cl_mem_info */ +#define CL_MEM_TYPE 0x1100 +#define CL_MEM_FLAGS 0x1101 +#define CL_MEM_SIZE 0x1102 +#define CL_MEM_HOST_PTR 0x1103 +#define CL_MEM_MAP_COUNT 0x1104 +#define CL_MEM_REFERENCE_COUNT 0x1105 +#define CL_MEM_CONTEXT 0x1106 +#define CL_MEM_ASSOCIATED_MEMOBJECT 0x1107 +#define CL_MEM_OFFSET 0x1108 +#define CL_MEM_USES_SVM_POINTER 0x1109 + +/* cl_image_info */ +#define CL_IMAGE_FORMAT 0x1110 +#define CL_IMAGE_ELEMENT_SIZE 0x1111 +#define CL_IMAGE_ROW_PITCH 0x1112 +#define CL_IMAGE_SLICE_PITCH 0x1113 +#define CL_IMAGE_WIDTH 0x1114 +#define CL_IMAGE_HEIGHT 0x1115 +#define CL_IMAGE_DEPTH 0x1116 +#define CL_IMAGE_ARRAY_SIZE 0x1117 +#define CL_IMAGE_BUFFER 0x1118 +#define CL_IMAGE_NUM_MIP_LEVELS 0x1119 +#define CL_IMAGE_NUM_SAMPLES 0x111A + +/* cl_pipe_info */ +#define CL_PIPE_PACKET_SIZE 0x1120 +#define CL_PIPE_MAX_PACKETS 0x1121 + +/* cl_addressing_mode */ +#define CL_ADDRESS_NONE 0x1130 +#define CL_ADDRESS_CLAMP_TO_EDGE 0x1131 +#define CL_ADDRESS_CLAMP 0x1132 +#define CL_ADDRESS_REPEAT 0x1133 +#define CL_ADDRESS_MIRRORED_REPEAT 0x1134 + +/* cl_filter_mode */ +#define CL_FILTER_NEAREST 0x1140 +#define CL_FILTER_LINEAR 0x1141 + +/* cl_sampler_info */ +#define CL_SAMPLER_REFERENCE_COUNT 0x1150 +#define CL_SAMPLER_CONTEXT 0x1151 +#define CL_SAMPLER_NORMALIZED_COORDS 0x1152 +#define CL_SAMPLER_ADDRESSING_MODE 0x1153 +#define CL_SAMPLER_FILTER_MODE 0x1154 +#define CL_SAMPLER_MIP_FILTER_MODE 0x1155 +#define CL_SAMPLER_LOD_MIN 0x1156 +#define CL_SAMPLER_LOD_MAX 0x1157 + +/* cl_map_flags - bitfield */ +#define CL_MAP_READ (1 << 0) +#define CL_MAP_WRITE (1 << 1) +#define CL_MAP_WRITE_INVALIDATE_REGION (1 << 2) + +/* cl_program_info */ +#define CL_PROGRAM_REFERENCE_COUNT 0x1160 +#define CL_PROGRAM_CONTEXT 0x1161 +#define CL_PROGRAM_NUM_DEVICES 0x1162 +#define CL_PROGRAM_DEVICES 0x1163 +#define CL_PROGRAM_SOURCE 0x1164 +#define CL_PROGRAM_BINARY_SIZES 0x1165 +#define CL_PROGRAM_BINARIES 0x1166 +#define CL_PROGRAM_NUM_KERNELS 0x1167 +#define CL_PROGRAM_KERNEL_NAMES 0x1168 +#define CL_PROGRAM_IL 0x1169 +#define CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT 0x116A +#define CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT 0x116B + +/* cl_program_build_info */ +#define CL_PROGRAM_BUILD_STATUS 0x1181 +#define CL_PROGRAM_BUILD_OPTIONS 0x1182 +#define CL_PROGRAM_BUILD_LOG 0x1183 +#define CL_PROGRAM_BINARY_TYPE 0x1184 +#define CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE 0x1185 + +/* cl_program_binary_type */ +#define CL_PROGRAM_BINARY_TYPE_NONE 0x0 +#define CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT 0x1 +#define CL_PROGRAM_BINARY_TYPE_LIBRARY 0x2 +#define CL_PROGRAM_BINARY_TYPE_EXECUTABLE 0x4 + +/* cl_build_status */ +#define CL_BUILD_SUCCESS 0 +#define CL_BUILD_NONE -1 +#define CL_BUILD_ERROR -2 +#define CL_BUILD_IN_PROGRESS -3 + +/* cl_kernel_info */ +#define CL_KERNEL_FUNCTION_NAME 0x1190 +#define CL_KERNEL_NUM_ARGS 0x1191 +#define CL_KERNEL_REFERENCE_COUNT 0x1192 +#define CL_KERNEL_CONTEXT 0x1193 +#define CL_KERNEL_PROGRAM 0x1194 +#define CL_KERNEL_ATTRIBUTES 0x1195 +#define CL_KERNEL_MAX_NUM_SUB_GROUPS 0x11B9 +#define CL_KERNEL_COMPILE_NUM_SUB_GROUPS 0x11BA + +/* cl_kernel_arg_info */ +#define CL_KERNEL_ARG_ADDRESS_QUALIFIER 0x1196 +#define CL_KERNEL_ARG_ACCESS_QUALIFIER 0x1197 +#define CL_KERNEL_ARG_TYPE_NAME 0x1198 +#define CL_KERNEL_ARG_TYPE_QUALIFIER 0x1199 +#define CL_KERNEL_ARG_NAME 0x119A + +/* cl_kernel_arg_address_qualifier */ +#define CL_KERNEL_ARG_ADDRESS_GLOBAL 0x119B +#define CL_KERNEL_ARG_ADDRESS_LOCAL 0x119C +#define CL_KERNEL_ARG_ADDRESS_CONSTANT 0x119D +#define CL_KERNEL_ARG_ADDRESS_PRIVATE 0x119E + +/* cl_kernel_arg_access_qualifier */ +#define CL_KERNEL_ARG_ACCESS_READ_ONLY 0x11A0 +#define CL_KERNEL_ARG_ACCESS_WRITE_ONLY 0x11A1 +#define CL_KERNEL_ARG_ACCESS_READ_WRITE 0x11A2 +#define CL_KERNEL_ARG_ACCESS_NONE 0x11A3 + +/* cl_kernel_arg_type_qualifer */ +#define CL_KERNEL_ARG_TYPE_NONE 0 +#define CL_KERNEL_ARG_TYPE_CONST (1 << 0) +#define CL_KERNEL_ARG_TYPE_RESTRICT (1 << 1) +#define CL_KERNEL_ARG_TYPE_VOLATILE (1 << 2) +#define CL_KERNEL_ARG_TYPE_PIPE (1 << 3) + +/* cl_kernel_work_group_info */ +#define CL_KERNEL_WORK_GROUP_SIZE 0x11B0 +#define CL_KERNEL_COMPILE_WORK_GROUP_SIZE 0x11B1 +#define CL_KERNEL_LOCAL_MEM_SIZE 0x11B2 +#define CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE 0x11B3 +#define CL_KERNEL_PRIVATE_MEM_SIZE 0x11B4 +#define CL_KERNEL_GLOBAL_WORK_SIZE 0x11B5 + +/* cl_kernel_sub_group_info */ +#define CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE 0x2033 +#define CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE 0x2034 +#define CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT 0x11B8 + +/* cl_kernel_exec_info */ +#define CL_KERNEL_EXEC_INFO_SVM_PTRS 0x11B6 +#define CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM 0x11B7 + +/* cl_event_info */ +#define CL_EVENT_COMMAND_QUEUE 0x11D0 +#define CL_EVENT_COMMAND_TYPE 0x11D1 +#define CL_EVENT_REFERENCE_COUNT 0x11D2 +#define CL_EVENT_COMMAND_EXECUTION_STATUS 0x11D3 +#define CL_EVENT_CONTEXT 0x11D4 + +/* cl_command_type */ +#define CL_COMMAND_NDRANGE_KERNEL 0x11F0 +#define CL_COMMAND_TASK 0x11F1 +#define CL_COMMAND_NATIVE_KERNEL 0x11F2 +#define CL_COMMAND_READ_BUFFER 0x11F3 +#define CL_COMMAND_WRITE_BUFFER 0x11F4 +#define CL_COMMAND_COPY_BUFFER 0x11F5 +#define CL_COMMAND_READ_IMAGE 0x11F6 +#define CL_COMMAND_WRITE_IMAGE 0x11F7 +#define CL_COMMAND_COPY_IMAGE 0x11F8 +#define CL_COMMAND_COPY_IMAGE_TO_BUFFER 0x11F9 +#define CL_COMMAND_COPY_BUFFER_TO_IMAGE 0x11FA +#define CL_COMMAND_MAP_BUFFER 0x11FB +#define CL_COMMAND_MAP_IMAGE 0x11FC +#define CL_COMMAND_UNMAP_MEM_OBJECT 0x11FD +#define CL_COMMAND_MARKER 0x11FE +#define CL_COMMAND_ACQUIRE_GL_OBJECTS 0x11FF +#define CL_COMMAND_RELEASE_GL_OBJECTS 0x1200 +#define CL_COMMAND_READ_BUFFER_RECT 0x1201 +#define CL_COMMAND_WRITE_BUFFER_RECT 0x1202 +#define CL_COMMAND_COPY_BUFFER_RECT 0x1203 +#define CL_COMMAND_USER 0x1204 +#define CL_COMMAND_BARRIER 0x1205 +#define CL_COMMAND_MIGRATE_MEM_OBJECTS 0x1206 +#define CL_COMMAND_FILL_BUFFER 0x1207 +#define CL_COMMAND_FILL_IMAGE 0x1208 +#define CL_COMMAND_SVM_FREE 0x1209 +#define CL_COMMAND_SVM_MEMCPY 0x120A +#define CL_COMMAND_SVM_MEMFILL 0x120B +#define CL_COMMAND_SVM_MAP 0x120C +#define CL_COMMAND_SVM_UNMAP 0x120D + +/* command execution status */ +#define CL_COMPLETE 0x0 +#define CL_RUNNING 0x1 +#define CL_SUBMITTED 0x2 +#define CL_QUEUED 0x3 + +/* cl_buffer_create_type */ +#define CL_BUFFER_CREATE_TYPE_REGION 0x1220 + +/* cl_profiling_info */ +#define CL_PROFILING_COMMAND_QUEUED 0x1280 +#define CL_PROFILING_COMMAND_SUBMIT 0x1281 +#define CL_PROFILING_COMMAND_START 0x1282 +#define CL_PROFILING_COMMAND_END 0x1283 +#define CL_PROFILING_COMMAND_COMPLETE 0x1284 + +/********************************************************************************************************/ + +/* Platform API */ +extern CL_API_ENTRY cl_int CL_API_CALL +clGetPlatformIDs(cl_uint /* num_entries */, + cl_platform_id * /* platforms */, + cl_uint * /* num_platforms */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetPlatformInfo(cl_platform_id /* platform */, + cl_platform_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +/* Device APIs */ +extern CL_API_ENTRY cl_int CL_API_CALL +clGetDeviceIDs(cl_platform_id /* platform */, + cl_device_type /* device_type */, + cl_uint /* num_entries */, + cl_device_id * /* devices */, + cl_uint * /* num_devices */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetDeviceInfo(cl_device_id /* device */, + cl_device_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clCreateSubDevices(cl_device_id /* in_device */, + const cl_device_partition_property * /* properties */, + cl_uint /* num_devices */, + cl_device_id * /* out_devices */, + cl_uint * /* num_devices_ret */) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainDevice(cl_device_id /* device */) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseDevice(cl_device_id /* device */) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetDefaultDeviceCommandQueue(cl_context /* context */, + cl_device_id /* device */, + cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_2_1; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetDeviceAndHostTimer(cl_device_id /* device */, + cl_ulong* /* device_timestamp */, + cl_ulong* /* host_timestamp */) CL_API_SUFFIX__VERSION_2_1; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetHostTimer(cl_device_id /* device */, + cl_ulong* /* host_timestamp */) CL_API_SUFFIX__VERSION_2_1; + +/* Context APIs */ +extern CL_API_ENTRY cl_context CL_API_CALL +clCreateContext(const cl_context_properties * /* properties */, + cl_uint /* num_devices */, + const cl_device_id * /* devices */, + void (CL_CALLBACK * /* pfn_notify */)(const char *, const void *, size_t, void *), + void * /* user_data */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_context CL_API_CALL +clCreateContextFromType(const cl_context_properties * /* properties */, + cl_device_type /* device_type */, + void (CL_CALLBACK * /* pfn_notify*/ )(const char *, const void *, size_t, void *), + void * /* user_data */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainContext(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseContext(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetContextInfo(cl_context /* context */, + cl_context_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +/* Command Queue APIs */ +extern CL_API_ENTRY cl_command_queue CL_API_CALL +clCreateCommandQueueWithProperties(cl_context /* context */, + cl_device_id /* device */, + const cl_queue_properties * /* properties */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_2_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetCommandQueueInfo(cl_command_queue /* command_queue */, + cl_command_queue_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +/* Memory Object APIs */ +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateBuffer(cl_context /* context */, + cl_mem_flags /* flags */, + size_t /* size */, + void * /* host_ptr */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateSubBuffer(cl_mem /* buffer */, + cl_mem_flags /* flags */, + cl_buffer_create_type /* buffer_create_type */, + const void * /* buffer_create_info */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1; + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateImage(cl_context /* context */, + cl_mem_flags /* flags */, + const cl_image_format * /* image_format */, + const cl_image_desc * /* image_desc */, + void * /* host_ptr */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreatePipe(cl_context /* context */, + cl_mem_flags /* flags */, + cl_uint /* pipe_packet_size */, + cl_uint /* pipe_max_packets */, + const cl_pipe_properties * /* properties */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_2_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetSupportedImageFormats(cl_context /* context */, + cl_mem_flags /* flags */, + cl_mem_object_type /* image_type */, + cl_uint /* num_entries */, + cl_image_format * /* image_formats */, + cl_uint * /* num_image_formats */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetMemObjectInfo(cl_mem /* memobj */, + cl_mem_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetImageInfo(cl_mem /* image */, + cl_image_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetPipeInfo(cl_mem /* pipe */, + cl_pipe_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_2_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetMemObjectDestructorCallback(cl_mem /* memobj */, + void (CL_CALLBACK * /*pfn_notify*/)( cl_mem /* memobj */, void* /*user_data*/), + void * /*user_data */ ) CL_API_SUFFIX__VERSION_1_1; + +/* SVM Allocation APIs */ +extern CL_API_ENTRY void * CL_API_CALL +clSVMAlloc(cl_context /* context */, + cl_svm_mem_flags /* flags */, + size_t /* size */, + cl_uint /* alignment */) CL_API_SUFFIX__VERSION_2_0; + +extern CL_API_ENTRY void CL_API_CALL +clSVMFree(cl_context /* context */, + void * /* svm_pointer */) CL_API_SUFFIX__VERSION_2_0; + +/* Sampler APIs */ +extern CL_API_ENTRY cl_sampler CL_API_CALL +clCreateSamplerWithProperties(cl_context /* context */, + const cl_sampler_properties * /* sampler_properties */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_2_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainSampler(cl_sampler /* sampler */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseSampler(cl_sampler /* sampler */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetSamplerInfo(cl_sampler /* sampler */, + cl_sampler_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +/* Program Object APIs */ +extern CL_API_ENTRY cl_program CL_API_CALL +clCreateProgramWithSource(cl_context /* context */, + cl_uint /* count */, + const char ** /* strings */, + const size_t * /* lengths */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_program CL_API_CALL +clCreateProgramWithBinary(cl_context /* context */, + cl_uint /* num_devices */, + const cl_device_id * /* device_list */, + const size_t * /* lengths */, + const unsigned char ** /* binaries */, + cl_int * /* binary_status */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_program CL_API_CALL +clCreateProgramWithBuiltInKernels(cl_context /* context */, + cl_uint /* num_devices */, + const cl_device_id * /* device_list */, + const char * /* kernel_names */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_program CL_API_CALL +clCreateProgramWithIL(cl_context /* context */, + const void* /* il */, + size_t /* length */, + cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_2_1; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clBuildProgram(cl_program /* program */, + cl_uint /* num_devices */, + const cl_device_id * /* device_list */, + const char * /* options */, + void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */), + void * /* user_data */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clCompileProgram(cl_program /* program */, + cl_uint /* num_devices */, + const cl_device_id * /* device_list */, + const char * /* options */, + cl_uint /* num_input_headers */, + const cl_program * /* input_headers */, + const char ** /* header_include_names */, + void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */), + void * /* user_data */) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_program CL_API_CALL +clLinkProgram(cl_context /* context */, + cl_uint /* num_devices */, + const cl_device_id * /* device_list */, + const char * /* options */, + cl_uint /* num_input_programs */, + const cl_program * /* input_programs */, + void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */), + void * /* user_data */, + cl_int * /* errcode_ret */ ) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetProgramReleaseCallback(cl_program /* program */, + void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */), + void * /* user_data */) CL_API_SUFFIX__VERSION_2_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetProgramSpecializationConstant(cl_program /* program */, + cl_uint /* spec_id */, + size_t /* spec_size */, + const void* /* spec_value */) CL_API_SUFFIX__VERSION_2_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clUnloadPlatformCompiler(cl_platform_id /* platform */) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetProgramInfo(cl_program /* program */, + cl_program_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetProgramBuildInfo(cl_program /* program */, + cl_device_id /* device */, + cl_program_build_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +/* Kernel Object APIs */ +extern CL_API_ENTRY cl_kernel CL_API_CALL +clCreateKernel(cl_program /* program */, + const char * /* kernel_name */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clCreateKernelsInProgram(cl_program /* program */, + cl_uint /* num_kernels */, + cl_kernel * /* kernels */, + cl_uint * /* num_kernels_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_kernel CL_API_CALL +clCloneKernel(cl_kernel /* source_kernel */, + cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_2_1; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainKernel(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseKernel(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetKernelArg(cl_kernel /* kernel */, + cl_uint /* arg_index */, + size_t /* arg_size */, + const void * /* arg_value */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetKernelArgSVMPointer(cl_kernel /* kernel */, + cl_uint /* arg_index */, + const void * /* arg_value */) CL_API_SUFFIX__VERSION_2_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetKernelExecInfo(cl_kernel /* kernel */, + cl_kernel_exec_info /* param_name */, + size_t /* param_value_size */, + const void * /* param_value */) CL_API_SUFFIX__VERSION_2_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetKernelInfo(cl_kernel /* kernel */, + cl_kernel_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetKernelArgInfo(cl_kernel /* kernel */, + cl_uint /* arg_indx */, + cl_kernel_arg_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetKernelWorkGroupInfo(cl_kernel /* kernel */, + cl_device_id /* device */, + cl_kernel_work_group_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetKernelSubGroupInfo(cl_kernel /* kernel */, + cl_device_id /* device */, + cl_kernel_sub_group_info /* param_name */, + size_t /* input_value_size */, + const void* /*input_value */, + size_t /* param_value_size */, + void* /* param_value */, + size_t* /* param_value_size_ret */ ) CL_API_SUFFIX__VERSION_2_1; + +/* Event Object APIs */ +extern CL_API_ENTRY cl_int CL_API_CALL +clWaitForEvents(cl_uint /* num_events */, + const cl_event * /* event_list */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetEventInfo(cl_event /* event */, + cl_event_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_event CL_API_CALL +clCreateUserEvent(cl_context /* context */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainEvent(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseEvent(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetUserEventStatus(cl_event /* event */, + cl_int /* execution_status */) CL_API_SUFFIX__VERSION_1_1; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetEventCallback( cl_event /* event */, + cl_int /* command_exec_callback_type */, + void (CL_CALLBACK * /* pfn_notify */)(cl_event, cl_int, void *), + void * /* user_data */) CL_API_SUFFIX__VERSION_1_1; + +/* Profiling APIs */ +extern CL_API_ENTRY cl_int CL_API_CALL +clGetEventProfilingInfo(cl_event /* event */, + cl_profiling_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +/* Flush and Finish APIs */ +extern CL_API_ENTRY cl_int CL_API_CALL +clFlush(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clFinish(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; + +/* Enqueued Commands APIs */ +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReadBuffer(cl_command_queue /* command_queue */, + cl_mem /* buffer */, + cl_bool /* blocking_read */, + size_t /* offset */, + size_t /* size */, + void * /* ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReadBufferRect(cl_command_queue /* command_queue */, + cl_mem /* buffer */, + cl_bool /* blocking_read */, + const size_t * /* buffer_offset */, + const size_t * /* host_offset */, + const size_t * /* region */, + size_t /* buffer_row_pitch */, + size_t /* buffer_slice_pitch */, + size_t /* host_row_pitch */, + size_t /* host_slice_pitch */, + void * /* ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_1; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueWriteBuffer(cl_command_queue /* command_queue */, + cl_mem /* buffer */, + cl_bool /* blocking_write */, + size_t /* offset */, + size_t /* size */, + const void * /* ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueWriteBufferRect(cl_command_queue /* command_queue */, + cl_mem /* buffer */, + cl_bool /* blocking_write */, + const size_t * /* buffer_offset */, + const size_t * /* host_offset */, + const size_t * /* region */, + size_t /* buffer_row_pitch */, + size_t /* buffer_slice_pitch */, + size_t /* host_row_pitch */, + size_t /* host_slice_pitch */, + const void * /* ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_1; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueFillBuffer(cl_command_queue /* command_queue */, + cl_mem /* buffer */, + const void * /* pattern */, + size_t /* pattern_size */, + size_t /* offset */, + size_t /* size */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyBuffer(cl_command_queue /* command_queue */, + cl_mem /* src_buffer */, + cl_mem /* dst_buffer */, + size_t /* src_offset */, + size_t /* dst_offset */, + size_t /* size */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyBufferRect(cl_command_queue /* command_queue */, + cl_mem /* src_buffer */, + cl_mem /* dst_buffer */, + const size_t * /* src_origin */, + const size_t * /* dst_origin */, + const size_t * /* region */, + size_t /* src_row_pitch */, + size_t /* src_slice_pitch */, + size_t /* dst_row_pitch */, + size_t /* dst_slice_pitch */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_1; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReadImage(cl_command_queue /* command_queue */, + cl_mem /* image */, + cl_bool /* blocking_read */, + const size_t * /* origin[3] */, + const size_t * /* region[3] */, + size_t /* row_pitch */, + size_t /* slice_pitch */, + void * /* ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueWriteImage(cl_command_queue /* command_queue */, + cl_mem /* image */, + cl_bool /* blocking_write */, + const size_t * /* origin[3] */, + const size_t * /* region[3] */, + size_t /* input_row_pitch */, + size_t /* input_slice_pitch */, + const void * /* ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueFillImage(cl_command_queue /* command_queue */, + cl_mem /* image */, + const void * /* fill_color */, + const size_t * /* origin[3] */, + const size_t * /* region[3] */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyImage(cl_command_queue /* command_queue */, + cl_mem /* src_image */, + cl_mem /* dst_image */, + const size_t * /* src_origin[3] */, + const size_t * /* dst_origin[3] */, + const size_t * /* region[3] */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyImageToBuffer(cl_command_queue /* command_queue */, + cl_mem /* src_image */, + cl_mem /* dst_buffer */, + const size_t * /* src_origin[3] */, + const size_t * /* region[3] */, + size_t /* dst_offset */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyBufferToImage(cl_command_queue /* command_queue */, + cl_mem /* src_buffer */, + cl_mem /* dst_image */, + size_t /* src_offset */, + const size_t * /* dst_origin[3] */, + const size_t * /* region[3] */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY void * CL_API_CALL +clEnqueueMapBuffer(cl_command_queue /* command_queue */, + cl_mem /* buffer */, + cl_bool /* blocking_map */, + cl_map_flags /* map_flags */, + size_t /* offset */, + size_t /* size */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY void * CL_API_CALL +clEnqueueMapImage(cl_command_queue /* command_queue */, + cl_mem /* image */, + cl_bool /* blocking_map */, + cl_map_flags /* map_flags */, + const size_t * /* origin[3] */, + const size_t * /* region[3] */, + size_t * /* image_row_pitch */, + size_t * /* image_slice_pitch */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueUnmapMemObject(cl_command_queue /* command_queue */, + cl_mem /* memobj */, + void * /* mapped_ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueMigrateMemObjects(cl_command_queue /* command_queue */, + cl_uint /* num_mem_objects */, + const cl_mem * /* mem_objects */, + cl_mem_migration_flags /* flags */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueNDRangeKernel(cl_command_queue /* command_queue */, + cl_kernel /* kernel */, + cl_uint /* work_dim */, + const size_t * /* global_work_offset */, + const size_t * /* global_work_size */, + const size_t * /* local_work_size */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueNativeKernel(cl_command_queue /* command_queue */, + void (CL_CALLBACK * /*user_func*/)(void *), + void * /* args */, + size_t /* cb_args */, + cl_uint /* num_mem_objects */, + const cl_mem * /* mem_list */, + const void ** /* args_mem_loc */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueMarkerWithWaitList(cl_command_queue /* command_queue */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueBarrierWithWaitList(cl_command_queue /* command_queue */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMFree(cl_command_queue /* command_queue */, + cl_uint /* num_svm_pointers */, + void *[] /* svm_pointers[] */, + void (CL_CALLBACK * /*pfn_free_func*/)(cl_command_queue /* queue */, + cl_uint /* num_svm_pointers */, + void *[] /* svm_pointers[] */, + void * /* user_data */), + void * /* user_data */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMMemcpy(cl_command_queue /* command_queue */, + cl_bool /* blocking_copy */, + void * /* dst_ptr */, + const void * /* src_ptr */, + size_t /* size */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMMemFill(cl_command_queue /* command_queue */, + void * /* svm_ptr */, + const void * /* pattern */, + size_t /* pattern_size */, + size_t /* size */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMMap(cl_command_queue /* command_queue */, + cl_bool /* blocking_map */, + cl_map_flags /* flags */, + void * /* svm_ptr */, + size_t /* size */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMUnmap(cl_command_queue /* command_queue */, + void * /* svm_ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMMigrateMem(cl_command_queue /* command_queue */, + cl_uint /* num_svm_pointers */, + const void** /* svm_pointers */, + const size_t* /* sizes */, + cl_mem_migration_flags /* flags */, + cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, + cl_event* /* event */) CL_API_SUFFIX__VERSION_2_1; + +/* Extension function access + * + * Returns the extension function address for the given function name, + * or NULL if a valid function can not be found. The client must + * check to make sure the address is not NULL, before using or + * calling the returned function address. + */ +extern CL_API_ENTRY void * CL_API_CALL +clGetExtensionFunctionAddressForPlatform(cl_platform_id /* platform */, + const char * /* func_name */) CL_API_SUFFIX__VERSION_1_2; + +#ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS + /* + * WARNING: + * This API introduces mutable state into the OpenCL implementation. It has been REMOVED + * to better facilitate thread safety. The 1.0 API is not thread safe. It is not tested by the + * OpenCL 1.1 conformance test, and consequently may not work or may not work dependably. + * It is likely to be non-performant. Use of this API is not advised. Use at your own risk. + * + * Software developers previously relying on this API are instructed to set the command queue + * properties when creating the queue, instead. + */ + extern CL_API_ENTRY cl_int CL_API_CALL + clSetCommandQueueProperty(cl_command_queue /* command_queue */, + cl_command_queue_properties /* properties */, + cl_bool /* enable */, + cl_command_queue_properties * /* old_properties */) CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED; +#endif /* CL_USE_DEPRECATED_OPENCL_1_0_APIS */ + +/* Deprecated OpenCL 1.1 APIs */ +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL +clCreateImage2D(cl_context /* context */, + cl_mem_flags /* flags */, + const cl_image_format * /* image_format */, + size_t /* image_width */, + size_t /* image_height */, + size_t /* image_row_pitch */, + void * /* host_ptr */, + cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL +clCreateImage3D(cl_context /* context */, + cl_mem_flags /* flags */, + const cl_image_format * /* image_format */, + size_t /* image_width */, + size_t /* image_height */, + size_t /* image_depth */, + size_t /* image_row_pitch */, + size_t /* image_slice_pitch */, + void * /* host_ptr */, + cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL +clEnqueueMarker(cl_command_queue /* command_queue */, + cl_event * /* event */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL +clEnqueueWaitForEvents(cl_command_queue /* command_queue */, + cl_uint /* num_events */, + const cl_event * /* event_list */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL +clEnqueueBarrier(cl_command_queue /* command_queue */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL +clUnloadCompiler(void) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED void * CL_API_CALL +clGetExtensionFunctionAddress(const char * /* func_name */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +/* Deprecated OpenCL 1.2 APIs */ +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_2_DEPRECATED cl_command_queue CL_API_CALL +clCreateCommandQueue(cl_context /* context */, + cl_device_id /* device */, + cl_command_queue_properties /* properties */, + cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED; + + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_2_DEPRECATED cl_sampler CL_API_CALL +clCreateSampler(cl_context /* context */, + cl_bool /* normalized_coords */, + cl_addressing_mode /* addressing_mode */, + cl_filter_mode /* filter_mode */, + cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_2_DEPRECATED cl_int CL_API_CALL +clEnqueueTask(cl_command_queue /* command_queue */, + cl_kernel /* kernel */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED; + +#ifdef __cplusplus +} +#endif + +#endif /* __OPENCL_CL_H */ + diff --git a/CL/cl_gl.h b/CL/cl_gl.h new file mode 100644 index 00000000..14218d0c --- /dev/null +++ b/CL/cl_gl.h @@ -0,0 +1,173 @@ +/********************************************************************************** + * Copyright (c) 2008-2015 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS + * KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS + * SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT + * https://www.khronos.org/registry/ + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + **********************************************************************************/ + +#ifndef __OPENCL_CL_GL_H +#define __OPENCL_CL_GL_H + +// Unlike the Khronos header file, we want to unconditonally include the +// CLIntercept cl.h, and not the system cl.h. +#if 0 +#ifdef __APPLE__ +#include +#else +#include +#endif +#else +#include "CL/cl.h" +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +typedef cl_uint cl_gl_object_type; +typedef cl_uint cl_gl_texture_info; +typedef cl_uint cl_gl_platform_info; +typedef struct __GLsync *cl_GLsync; + +/* cl_gl_object_type = 0x2000 - 0x200F enum values are currently taken */ +#define CL_GL_OBJECT_BUFFER 0x2000 +#define CL_GL_OBJECT_TEXTURE2D 0x2001 +#define CL_GL_OBJECT_TEXTURE3D 0x2002 +#define CL_GL_OBJECT_RENDERBUFFER 0x2003 +#define CL_GL_OBJECT_TEXTURE2D_ARRAY 0x200E +#define CL_GL_OBJECT_TEXTURE1D 0x200F +#define CL_GL_OBJECT_TEXTURE1D_ARRAY 0x2010 +#define CL_GL_OBJECT_TEXTURE_BUFFER 0x2011 + +/* cl_gl_texture_info */ +#define CL_GL_TEXTURE_TARGET 0x2004 +#define CL_GL_MIPMAP_LEVEL 0x2005 +#define CL_GL_NUM_SAMPLES 0x2012 + + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateFromGLBuffer(cl_context /* context */, + cl_mem_flags /* flags */, + cl_GLuint /* bufobj */, + int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateFromGLTexture(cl_context /* context */, + cl_mem_flags /* flags */, + cl_GLenum /* target */, + cl_GLint /* miplevel */, + cl_GLuint /* texture */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateFromGLRenderbuffer(cl_context /* context */, + cl_mem_flags /* flags */, + cl_GLuint /* renderbuffer */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetGLObjectInfo(cl_mem /* memobj */, + cl_gl_object_type * /* gl_object_type */, + cl_GLuint * /* gl_object_name */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetGLTextureInfo(cl_mem /* memobj */, + cl_gl_texture_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueAcquireGLObjects(cl_command_queue /* command_queue */, + cl_uint /* num_objects */, + const cl_mem * /* mem_objects */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReleaseGLObjects(cl_command_queue /* command_queue */, + cl_uint /* num_objects */, + const cl_mem * /* mem_objects */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + + +/* Deprecated OpenCL 1.1 APIs */ +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL +clCreateFromGLTexture2D(cl_context /* context */, + cl_mem_flags /* flags */, + cl_GLenum /* target */, + cl_GLint /* miplevel */, + cl_GLuint /* texture */, + cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL +clCreateFromGLTexture3D(cl_context /* context */, + cl_mem_flags /* flags */, + cl_GLenum /* target */, + cl_GLint /* miplevel */, + cl_GLuint /* texture */, + cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +/* cl_khr_gl_sharing extension */ + +#define cl_khr_gl_sharing 1 + +typedef cl_uint cl_gl_context_info; + +/* Additional Error Codes */ +#define CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR -1000 + +/* cl_gl_context_info */ +#define CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR 0x2006 +#define CL_DEVICES_FOR_GL_CONTEXT_KHR 0x2007 + +/* Additional cl_context_properties */ +#define CL_GL_CONTEXT_KHR 0x2008 +#define CL_EGL_DISPLAY_KHR 0x2009 +#define CL_GLX_DISPLAY_KHR 0x200A +#define CL_WGL_HDC_KHR 0x200B +#define CL_CGL_SHAREGROUP_KHR 0x200C + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetGLContextInfoKHR(const cl_context_properties * /* properties */, + cl_gl_context_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)( + const cl_context_properties * properties, + cl_gl_context_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret); + +#ifdef __cplusplus +} +#endif + +#endif /* __OPENCL_CL_GL_H */ diff --git a/CL/cl_platform.h b/CL/cl_platform.h new file mode 100644 index 00000000..873f837e --- /dev/null +++ b/CL/cl_platform.h @@ -0,0 +1,1395 @@ +/********************************************************************************** + * Copyright (c) 2008-2015 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS + * KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS + * SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT + * https://www.khronos.org/registry/ + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + **********************************************************************************/ + +/* $Revision: 11803 $ on $Date: 2010-06-25 10:02:12 -0700 (Fri, 25 Jun 2010) $ */ + +#ifndef __CL_PLATFORM_H +#define __CL_PLATFORM_H + +#ifdef __APPLE__ + /* Contains #defines for AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER below */ + #include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#if defined(_WIN32) + #define CL_API_ENTRY + #define CL_API_CALL __stdcall + #define CL_CALLBACK __stdcall +#else + #define CL_API_ENTRY + #define CL_API_CALL + #define CL_CALLBACK +#endif + +/* + * Deprecation flags refer to the last version of the header in which the + * feature was not deprecated. + * + * E.g. VERSION_1_1_DEPRECATED means the feature is present in 1.1 without + * deprecation but is deprecated in versions later than 1.1. + */ + +#ifdef __APPLE__ + #define CL_EXTENSION_WEAK_LINK __attribute__((weak_import)) + #define CL_API_SUFFIX__VERSION_1_0 AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER + #define CL_EXT_SUFFIX__VERSION_1_0 CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER + #define CL_API_SUFFIX__VERSION_1_1 AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER + #define GCL_API_SUFFIX__VERSION_1_1 AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER + #define CL_EXT_SUFFIX__VERSION_1_1 CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER + #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7 + + #ifdef AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER + #define CL_API_SUFFIX__VERSION_1_2 AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER + #define GCL_API_SUFFIX__VERSION_1_2 AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER + #define CL_EXT_SUFFIX__VERSION_1_2 CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER + #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED + #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8 + #else + #warning This path should never happen outside of internal operating system development. AvailabilityMacros do not function correctly here! + #define CL_API_SUFFIX__VERSION_1_2 AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER + #define GCL_API_SUFFIX__VERSION_1_2 AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER + #define CL_EXT_SUFFIX__VERSION_1_2 CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER + #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER + #endif + + // These aren't in the Khronos header file, but are needed to avoid + // build errors on OSX. + #define CL_API_SUFFIX__VERSION_2_0 + #define CL_EXT_SUFFIX__VERSION_2_0 + #define CL_API_SUFFIX__VERSION_2_1 + #define CL_EXT_SUFFIX__VERSION_2_1 + #define CL_API_SUFFIX__VERSION_2_2 + #define CL_EXT_SUFFIX__VERSION_2_2 + #define CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED + #define CL_EXT_PREFIX__VERSION_1_2_DEPRECATED + #define CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED + #define CL_EXT_PREFIX__VERSION_2_0_DEPRECATED + #define CL_EXT_SUFFIX__VERSION_2_1_DEPRECATED + #define CL_EXT_PREFIX__VERSION_2_1_DEPRECATED +#else + #define CL_EXTENSION_WEAK_LINK + #define CL_API_SUFFIX__VERSION_1_0 + #define CL_EXT_SUFFIX__VERSION_1_0 + #define CL_API_SUFFIX__VERSION_1_1 + #define CL_EXT_SUFFIX__VERSION_1_1 + #define CL_API_SUFFIX__VERSION_1_2 + #define CL_EXT_SUFFIX__VERSION_1_2 + #define CL_API_SUFFIX__VERSION_2_0 + #define CL_EXT_SUFFIX__VERSION_2_0 + #define CL_API_SUFFIX__VERSION_2_1 + #define CL_EXT_SUFFIX__VERSION_2_1 + #define CL_API_SUFFIX__VERSION_2_2 + #define CL_EXT_SUFFIX__VERSION_2_2 + + #ifdef __GNUC__ + #ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS + #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED + #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED + #else + #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED __attribute__((deprecated)) + #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED + #endif + + #ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS + #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED + #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED + #else + #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED __attribute__((deprecated)) + #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED + #endif + + #ifdef CL_USE_DEPRECATED_OPENCL_1_2_APIS + #define CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED + #define CL_EXT_PREFIX__VERSION_1_2_DEPRECATED + #else + #define CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED __attribute__((deprecated)) + #define CL_EXT_PREFIX__VERSION_1_2_DEPRECATED + #endif + + #ifdef CL_USE_DEPRECATED_OPENCL_2_0_APIS + #define CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED + #define CL_EXT_PREFIX__VERSION_2_0_DEPRECATED + #else + #define CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED __attribute__((deprecated)) + #define CL_EXT_PREFIX__VERSION_2_0_DEPRECATED + #endif + + #ifdef CL_USE_DEPRECATED_OPENCL_2_1_APIS + #define CL_EXT_SUFFIX__VERSION_2_1_DEPRECATED + #define CL_EXT_PREFIX__VERSION_2_1_DEPRECATED + #else + #define CL_EXT_SUFFIX__VERSION_2_1_DEPRECATED __attribute__((deprecated)) + #define CL_EXT_PREFIX__VERSION_2_1_DEPRECATED + #endif + #elif defined(_WIN32) + #ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS + #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED + #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED + #else + #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED + #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED __declspec(deprecated) + #endif + + #ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS + #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED + #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED + #else + #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED + #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED __declspec(deprecated) + #endif + + #ifdef CL_USE_DEPRECATED_OPENCL_1_2_APIS + #define CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED + #define CL_EXT_PREFIX__VERSION_1_2_DEPRECATED + #else + #define CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED + #define CL_EXT_PREFIX__VERSION_1_2_DEPRECATED __declspec(deprecated) + #endif + + #ifdef CL_USE_DEPRECATED_OPENCL_2_0_APIS + #define CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED + #define CL_EXT_PREFIX__VERSION_2_0_DEPRECATED + #else + #define CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED + #define CL_EXT_PREFIX__VERSION_2_0_DEPRECATED __declspec(deprecated) + #endif + + #ifdef CL_USE_DEPRECATED_OPENCL_2_1_APIS + #define CL_EXT_SUFFIX__VERSION_2_1_DEPRECATED + #define CL_EXT_PREFIX__VERSION_2_1_DEPRECATED + #else + #define CL_EXT_SUFFIX__VERSION_2_1_DEPRECATED + #define CL_EXT_PREFIX__VERSION_2_1_DEPRECATED __declspec(deprecated) + #endif + #else + #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED + #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED + + #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED + #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED + + #define CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED + #define CL_EXT_PREFIX__VERSION_1_2_DEPRECATED + + #define CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED + #define CL_EXT_PREFIX__VERSION_2_0_DEPRECATED + + #define CL_EXT_SUFFIX__VERSION_2_1_DEPRECATED + #define CL_EXT_PREFIX__VERSION_2_1_DEPRECATED + #endif +#endif + +#if (defined (_WIN32) && defined(_MSC_VER)) + +/* scalar types */ +typedef signed __int8 cl_char; +typedef unsigned __int8 cl_uchar; +typedef signed __int16 cl_short; +typedef unsigned __int16 cl_ushort; +typedef signed __int32 cl_int; +typedef unsigned __int32 cl_uint; +typedef signed __int64 cl_long; +typedef unsigned __int64 cl_ulong; + +typedef unsigned __int16 cl_half; +typedef float cl_float; +typedef double cl_double; + +/* Macro names and corresponding values defined by OpenCL */ +#define CL_CHAR_BIT 8 +#define CL_SCHAR_MAX 127 +#define CL_SCHAR_MIN (-127-1) +#define CL_CHAR_MAX CL_SCHAR_MAX +#define CL_CHAR_MIN CL_SCHAR_MIN +#define CL_UCHAR_MAX 255 +#define CL_SHRT_MAX 32767 +#define CL_SHRT_MIN (-32767-1) +#define CL_USHRT_MAX 65535 +#define CL_INT_MAX 2147483647 +#define CL_INT_MIN (-2147483647-1) +#define CL_UINT_MAX 0xffffffffU +#define CL_LONG_MAX ((cl_long) 0x7FFFFFFFFFFFFFFFLL) +#define CL_LONG_MIN ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL) +#define CL_ULONG_MAX ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL) + +#define CL_FLT_DIG 6 +#define CL_FLT_MANT_DIG 24 +#define CL_FLT_MAX_10_EXP +38 +#define CL_FLT_MAX_EXP +128 +#define CL_FLT_MIN_10_EXP -37 +#define CL_FLT_MIN_EXP -125 +#define CL_FLT_RADIX 2 +#define CL_FLT_MAX 340282346638528859811704183484516925440.0f +#define CL_FLT_MIN 1.175494350822287507969e-38f +#define CL_FLT_EPSILON 1.1920928955078125e-7f + +#define CL_HALF_DIG 3 +#define CL_HALF_MANT_DIG 11 +#define CL_HALF_MAX_10_EXP +4 +#define CL_HALF_MAX_EXP +16 +#define CL_HALF_MIN_10_EXP -4 +#define CL_HALF_MIN_EXP -13 +#define CL_HALF_RADIX 2 +#define CL_HALF_MAX 65504.0f +#define CL_HALF_MIN 6.103515625e-05f +#define CL_HALF_EPSILON 9.765625e-04f + +#define CL_DBL_DIG 15 +#define CL_DBL_MANT_DIG 53 +#define CL_DBL_MAX_10_EXP +308 +#define CL_DBL_MAX_EXP +1024 +#define CL_DBL_MIN_10_EXP -307 +#define CL_DBL_MIN_EXP -1021 +#define CL_DBL_RADIX 2 +#define CL_DBL_MAX 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.0 +#define CL_DBL_MIN 2.225073858507201383090e-308 +#define CL_DBL_EPSILON 2.220446049250313080847e-16 + +#define CL_M_E 2.718281828459045090796 +#define CL_M_LOG2E 1.442695040888963387005 +#define CL_M_LOG10E 0.434294481903251816668 +#define CL_M_LN2 0.693147180559945286227 +#define CL_M_LN10 2.302585092994045901094 +#define CL_M_PI 3.141592653589793115998 +#define CL_M_PI_2 1.570796326794896557999 +#define CL_M_PI_4 0.785398163397448278999 +#define CL_M_1_PI 0.318309886183790691216 +#define CL_M_2_PI 0.636619772367581382433 +#define CL_M_2_SQRTPI 1.128379167095512558561 +#define CL_M_SQRT2 1.414213562373095145475 +#define CL_M_SQRT1_2 0.707106781186547572737 + +#define CL_M_E_F 2.71828174591064f +#define CL_M_LOG2E_F 1.44269502162933f +#define CL_M_LOG10E_F 0.43429449200630f +#define CL_M_LN2_F 0.69314718246460f +#define CL_M_LN10_F 2.30258512496948f +#define CL_M_PI_F 3.14159274101257f +#define CL_M_PI_2_F 1.57079637050629f +#define CL_M_PI_4_F 0.78539818525314f +#define CL_M_1_PI_F 0.31830987334251f +#define CL_M_2_PI_F 0.63661974668503f +#define CL_M_2_SQRTPI_F 1.12837922573090f +#define CL_M_SQRT2_F 1.41421353816986f +#define CL_M_SQRT1_2_F 0.70710676908493f + +#define CL_NAN (CL_INFINITY - CL_INFINITY) +#define CL_HUGE_VALF ((cl_float) 1e50) +#define CL_HUGE_VAL ((cl_double) 1e500) +#define CL_MAXFLOAT CL_FLT_MAX +#define CL_INFINITY CL_HUGE_VALF + +#else + +#include + +/* scalar types */ +typedef int8_t cl_char; +typedef uint8_t cl_uchar; +typedef int16_t cl_short __attribute__((aligned(2))); +typedef uint16_t cl_ushort __attribute__((aligned(2))); +typedef int32_t cl_int __attribute__((aligned(4))); +typedef uint32_t cl_uint __attribute__((aligned(4))); +typedef int64_t cl_long __attribute__((aligned(8))); +typedef uint64_t cl_ulong __attribute__((aligned(8))); + +typedef uint16_t cl_half __attribute__((aligned(2))); +typedef float cl_float __attribute__((aligned(4))); +typedef double cl_double __attribute__((aligned(8))); + +/* Macro names and corresponding values defined by OpenCL */ +#define CL_CHAR_BIT 8 +#define CL_SCHAR_MAX 127 +#define CL_SCHAR_MIN (-127-1) +#define CL_CHAR_MAX CL_SCHAR_MAX +#define CL_CHAR_MIN CL_SCHAR_MIN +#define CL_UCHAR_MAX 255 +#define CL_SHRT_MAX 32767 +#define CL_SHRT_MIN (-32767-1) +#define CL_USHRT_MAX 65535 +#define CL_INT_MAX 2147483647 +#define CL_INT_MIN (-2147483647-1) +#define CL_UINT_MAX 0xffffffffU +#define CL_LONG_MAX ((cl_long) 0x7FFFFFFFFFFFFFFFLL) +#define CL_LONG_MIN ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL) +#define CL_ULONG_MAX ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL) + +#define CL_FLT_DIG 6 +#define CL_FLT_MANT_DIG 24 +#define CL_FLT_MAX_10_EXP +38 +#define CL_FLT_MAX_EXP +128 +#define CL_FLT_MIN_10_EXP -37 +#define CL_FLT_MIN_EXP -125 +#define CL_FLT_RADIX 2 +#define CL_FLT_MAX 340282346638528859811704183484516925440.0f +#define CL_FLT_MIN 1.175494350822287507969e-38f +#define CL_FLT_EPSILON 1.1920928955078125e-7f + +#define CL_HALF_DIG 3 +#define CL_HALF_MANT_DIG 11 +#define CL_HALF_MAX_10_EXP +4 +#define CL_HALF_MAX_EXP +16 +#define CL_HALF_MIN_10_EXP -4 +#define CL_HALF_MIN_EXP -13 +#define CL_HALF_RADIX 2 +#define CL_HALF_MAX 65504.0f +#define CL_HALF_MIN 6.103515625e-05f +#define CL_HALF_EPSILON 9.765625e-04f + +#define CL_DBL_DIG 15 +#define CL_DBL_MANT_DIG 53 +#define CL_DBL_MAX_10_EXP +308 +#define CL_DBL_MAX_EXP +1024 +#define CL_DBL_MIN_10_EXP -307 +#define CL_DBL_MIN_EXP -1021 +#define CL_DBL_RADIX 2 +#define CL_DBL_MAX 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.0 +#define CL_DBL_MIN 2.225073858507201383090e-308 +#define CL_DBL_EPSILON 2.220446049250313080847e-16 + +#define CL_M_E 2.718281828459045090796 +#define CL_M_LOG2E 1.442695040888963387005 +#define CL_M_LOG10E 0.434294481903251816668 +#define CL_M_LN2 0.693147180559945286227 +#define CL_M_LN10 2.302585092994045901094 +#define CL_M_PI 3.141592653589793115998 +#define CL_M_PI_2 1.570796326794896557999 +#define CL_M_PI_4 0.785398163397448278999 +#define CL_M_1_PI 0.318309886183790691216 +#define CL_M_2_PI 0.636619772367581382433 +#define CL_M_2_SQRTPI 1.128379167095512558561 +#define CL_M_SQRT2 1.414213562373095145475 +#define CL_M_SQRT1_2 0.707106781186547572737 + +#define CL_M_E_F 2.71828174591064f +#define CL_M_LOG2E_F 1.44269502162933f +#define CL_M_LOG10E_F 0.43429449200630f +#define CL_M_LN2_F 0.69314718246460f +#define CL_M_LN10_F 2.30258512496948f +#define CL_M_PI_F 3.14159274101257f +#define CL_M_PI_2_F 1.57079637050629f +#define CL_M_PI_4_F 0.78539818525314f +#define CL_M_1_PI_F 0.31830987334251f +#define CL_M_2_PI_F 0.63661974668503f +#define CL_M_2_SQRTPI_F 1.12837922573090f +#define CL_M_SQRT2_F 1.41421353816986f +#define CL_M_SQRT1_2_F 0.70710676908493f + +#if defined( __GNUC__ ) + #define CL_HUGE_VALF __builtin_huge_valf() + #define CL_HUGE_VAL __builtin_huge_val() + #define CL_NAN __builtin_nanf( "" ) +#else + #define CL_HUGE_VALF ((cl_float) 1e50) + #define CL_HUGE_VAL ((cl_double) 1e500) + float nanf( const char * ); + #define CL_NAN nanf( "" ) +#endif +#define CL_MAXFLOAT CL_FLT_MAX +#define CL_INFINITY CL_HUGE_VALF + +#endif + +#include + +/* Mirror types to GL types. Mirror types allow us to avoid deciding which headers to load based on whether we are using GL or GLES here. */ +typedef unsigned int cl_GLuint; +typedef int cl_GLint; +typedef unsigned int cl_GLenum; + +/* + * Vector types + * + * Note: OpenCL requires that all types be naturally aligned. + * This means that vector types must be naturally aligned. + * For example, a vector of four floats must be aligned to + * a 16 byte boundary (calculated as 4 * the natural 4-byte + * alignment of the float). The alignment qualifiers here + * will only function properly if your compiler supports them + * and if you don't actively work to defeat them. For example, + * in order for a cl_float4 to be 16 byte aligned in a struct, + * the start of the struct must itself be 16-byte aligned. + * + * Maintaining proper alignment is the user's responsibility. + */ + +/* Define basic vector types */ +#if defined( __VEC__ ) + #include /* may be omitted depending on compiler. AltiVec spec provides no way to detect whether the header is required. */ + typedef vector unsigned char __cl_uchar16; + typedef vector signed char __cl_char16; + typedef vector unsigned short __cl_ushort8; + typedef vector signed short __cl_short8; + typedef vector unsigned int __cl_uint4; + typedef vector signed int __cl_int4; + typedef vector float __cl_float4; + #define __CL_UCHAR16__ 1 + #define __CL_CHAR16__ 1 + #define __CL_USHORT8__ 1 + #define __CL_SHORT8__ 1 + #define __CL_UINT4__ 1 + #define __CL_INT4__ 1 + #define __CL_FLOAT4__ 1 +#endif + +#if defined( __SSE__ ) + #if defined( __MINGW64__ ) + #include + #else + #include + #endif + #if defined( __GNUC__ ) + typedef float __cl_float4 __attribute__((vector_size(16))); + #else + typedef __m128 __cl_float4; + #endif + #define __CL_FLOAT4__ 1 +#endif + +#if defined( __SSE2__ ) + #if defined( __MINGW64__ ) + #include + #else + #include + #endif + #if defined( __GNUC__ ) + typedef cl_uchar __cl_uchar16 __attribute__((vector_size(16))); + typedef cl_char __cl_char16 __attribute__((vector_size(16))); + typedef cl_ushort __cl_ushort8 __attribute__((vector_size(16))); + typedef cl_short __cl_short8 __attribute__((vector_size(16))); + typedef cl_uint __cl_uint4 __attribute__((vector_size(16))); + typedef cl_int __cl_int4 __attribute__((vector_size(16))); + typedef cl_ulong __cl_ulong2 __attribute__((vector_size(16))); + typedef cl_long __cl_long2 __attribute__((vector_size(16))); + typedef cl_double __cl_double2 __attribute__((vector_size(16))); + #else + typedef __m128i __cl_uchar16; + typedef __m128i __cl_char16; + typedef __m128i __cl_ushort8; + typedef __m128i __cl_short8; + typedef __m128i __cl_uint4; + typedef __m128i __cl_int4; + typedef __m128i __cl_ulong2; + typedef __m128i __cl_long2; + typedef __m128d __cl_double2; + #endif + #define __CL_UCHAR16__ 1 + #define __CL_CHAR16__ 1 + #define __CL_USHORT8__ 1 + #define __CL_SHORT8__ 1 + #define __CL_INT4__ 1 + #define __CL_UINT4__ 1 + #define __CL_ULONG2__ 1 + #define __CL_LONG2__ 1 + #define __CL_DOUBLE2__ 1 +#endif + +#if defined( __MMX__ ) + #include + #if defined( __GNUC__ ) + typedef cl_uchar __cl_uchar8 __attribute__((vector_size(8))); + typedef cl_char __cl_char8 __attribute__((vector_size(8))); + typedef cl_ushort __cl_ushort4 __attribute__((vector_size(8))); + typedef cl_short __cl_short4 __attribute__((vector_size(8))); + typedef cl_uint __cl_uint2 __attribute__((vector_size(8))); + typedef cl_int __cl_int2 __attribute__((vector_size(8))); + typedef cl_ulong __cl_ulong1 __attribute__((vector_size(8))); + typedef cl_long __cl_long1 __attribute__((vector_size(8))); + typedef cl_float __cl_float2 __attribute__((vector_size(8))); + #else + typedef __m64 __cl_uchar8; + typedef __m64 __cl_char8; + typedef __m64 __cl_ushort4; + typedef __m64 __cl_short4; + typedef __m64 __cl_uint2; + typedef __m64 __cl_int2; + typedef __m64 __cl_ulong1; + typedef __m64 __cl_long1; + typedef __m64 __cl_float2; + #endif + #define __CL_UCHAR8__ 1 + #define __CL_CHAR8__ 1 + #define __CL_USHORT4__ 1 + #define __CL_SHORT4__ 1 + #define __CL_INT2__ 1 + #define __CL_UINT2__ 1 + #define __CL_ULONG1__ 1 + #define __CL_LONG1__ 1 + #define __CL_FLOAT2__ 1 +#endif + +#if defined( __AVX__ ) + #if defined( __MINGW64__ ) + #include + #else + #include + #endif + #if defined( __GNUC__ ) + typedef cl_float __cl_float8 __attribute__((vector_size(32))); + typedef cl_double __cl_double4 __attribute__((vector_size(32))); + #else + typedef __m256 __cl_float8; + typedef __m256d __cl_double4; + #endif + #define __CL_FLOAT8__ 1 + #define __CL_DOUBLE4__ 1 +#endif + +/* Define capabilities for anonymous struct members. */ +#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) +#define __CL_HAS_ANON_STRUCT__ 1 +#define __CL_ANON_STRUCT__ __extension__ +#elif defined( _WIN32) && defined(_MSC_VER) + #if _MSC_VER >= 1500 + /* Microsoft Developer Studio 2008 supports anonymous structs, but + * complains by default. */ + #define __CL_HAS_ANON_STRUCT__ 1 + #define __CL_ANON_STRUCT__ + /* Disable warning C4201: nonstandard extension used : nameless + * struct/union */ + #pragma warning( push ) + #pragma warning( disable : 4201 ) + #endif +#else +#define __CL_HAS_ANON_STRUCT__ 0 +#define __CL_ANON_STRUCT__ +#endif + +/* Define alignment keys */ +#if defined( __GNUC__ ) + #define CL_ALIGNED(_x) __attribute__ ((aligned(_x))) +#elif defined( _WIN32) && (_MSC_VER) + /* Alignment keys neutered on windows because MSVC can't swallow function arguments with alignment requirements */ + /* http://msdn.microsoft.com/en-us/library/373ak2y1%28VS.71%29.aspx */ + /* #include */ + /* #define CL_ALIGNED(_x) _CRT_ALIGN(_x) */ + #define CL_ALIGNED(_x) +#else + #warning Need to implement some method to align data here + #define CL_ALIGNED(_x) +#endif + +/* Indicate whether .xyzw, .s0123 and .hi.lo are supported */ +#if __CL_HAS_ANON_STRUCT__ + /* .xyzw and .s0123...{f|F} are supported */ + #define CL_HAS_NAMED_VECTOR_FIELDS 1 + /* .hi and .lo are supported */ + #define CL_HAS_HI_LO_VECTOR_FIELDS 1 +#endif + +/* Define cl_vector types */ + +/* ---- cl_charn ---- */ +typedef union +{ + cl_char CL_ALIGNED(2) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_char x, y; }; + __CL_ANON_STRUCT__ struct{ cl_char s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_char lo, hi; }; +#endif +#if defined( __CL_CHAR2__) + __cl_char2 v2; +#endif +}cl_char2; + +typedef union +{ + cl_char CL_ALIGNED(4) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_char x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_char s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_char2 lo, hi; }; +#endif +#if defined( __CL_CHAR2__) + __cl_char2 v2[2]; +#endif +#if defined( __CL_CHAR4__) + __cl_char4 v4; +#endif +}cl_char4; + +/* cl_char3 is identical in size, alignment and behavior to cl_char4. See section 6.1.5. */ +typedef cl_char4 cl_char3; + +typedef union +{ + cl_char CL_ALIGNED(8) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_char x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_char s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_char4 lo, hi; }; +#endif +#if defined( __CL_CHAR2__) + __cl_char2 v2[4]; +#endif +#if defined( __CL_CHAR4__) + __cl_char4 v4[2]; +#endif +#if defined( __CL_CHAR8__ ) + __cl_char8 v8; +#endif +}cl_char8; + +typedef union +{ + cl_char CL_ALIGNED(16) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_char x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_char s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_char8 lo, hi; }; +#endif +#if defined( __CL_CHAR2__) + __cl_char2 v2[8]; +#endif +#if defined( __CL_CHAR4__) + __cl_char4 v4[4]; +#endif +#if defined( __CL_CHAR8__ ) + __cl_char8 v8[2]; +#endif +#if defined( __CL_CHAR16__ ) + __cl_char16 v16; +#endif +}cl_char16; + + +/* ---- cl_ucharn ---- */ +typedef union +{ + cl_uchar CL_ALIGNED(2) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uchar x, y; }; + __CL_ANON_STRUCT__ struct{ cl_uchar s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_uchar lo, hi; }; +#endif +#if defined( __cl_uchar2__) + __cl_uchar2 v2; +#endif +}cl_uchar2; + +typedef union +{ + cl_uchar CL_ALIGNED(4) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uchar x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_uchar s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_uchar2 lo, hi; }; +#endif +#if defined( __CL_UCHAR2__) + __cl_uchar2 v2[2]; +#endif +#if defined( __CL_UCHAR4__) + __cl_uchar4 v4; +#endif +}cl_uchar4; + +/* cl_uchar3 is identical in size, alignment and behavior to cl_uchar4. See section 6.1.5. */ +typedef cl_uchar4 cl_uchar3; + +typedef union +{ + cl_uchar CL_ALIGNED(8) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uchar x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_uchar s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_uchar4 lo, hi; }; +#endif +#if defined( __CL_UCHAR2__) + __cl_uchar2 v2[4]; +#endif +#if defined( __CL_UCHAR4__) + __cl_uchar4 v4[2]; +#endif +#if defined( __CL_UCHAR8__ ) + __cl_uchar8 v8; +#endif +}cl_uchar8; + +typedef union +{ + cl_uchar CL_ALIGNED(16) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uchar x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_uchar s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_uchar8 lo, hi; }; +#endif +#if defined( __CL_UCHAR2__) + __cl_uchar2 v2[8]; +#endif +#if defined( __CL_UCHAR4__) + __cl_uchar4 v4[4]; +#endif +#if defined( __CL_UCHAR8__ ) + __cl_uchar8 v8[2]; +#endif +#if defined( __CL_UCHAR16__ ) + __cl_uchar16 v16; +#endif +}cl_uchar16; + + +/* ---- cl_shortn ---- */ +typedef union +{ + cl_short CL_ALIGNED(4) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_short x, y; }; + __CL_ANON_STRUCT__ struct{ cl_short s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_short lo, hi; }; +#endif +#if defined( __CL_SHORT2__) + __cl_short2 v2; +#endif +}cl_short2; + +typedef union +{ + cl_short CL_ALIGNED(8) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_short x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_short s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_short2 lo, hi; }; +#endif +#if defined( __CL_SHORT2__) + __cl_short2 v2[2]; +#endif +#if defined( __CL_SHORT4__) + __cl_short4 v4; +#endif +}cl_short4; + +/* cl_short3 is identical in size, alignment and behavior to cl_short4. See section 6.1.5. */ +typedef cl_short4 cl_short3; + +typedef union +{ + cl_short CL_ALIGNED(16) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_short x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_short s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_short4 lo, hi; }; +#endif +#if defined( __CL_SHORT2__) + __cl_short2 v2[4]; +#endif +#if defined( __CL_SHORT4__) + __cl_short4 v4[2]; +#endif +#if defined( __CL_SHORT8__ ) + __cl_short8 v8; +#endif +}cl_short8; + +typedef union +{ + cl_short CL_ALIGNED(32) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_short x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_short s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_short8 lo, hi; }; +#endif +#if defined( __CL_SHORT2__) + __cl_short2 v2[8]; +#endif +#if defined( __CL_SHORT4__) + __cl_short4 v4[4]; +#endif +#if defined( __CL_SHORT8__ ) + __cl_short8 v8[2]; +#endif +#if defined( __CL_SHORT16__ ) + __cl_short16 v16; +#endif +}cl_short16; + + +/* ---- cl_ushortn ---- */ +typedef union +{ + cl_ushort CL_ALIGNED(4) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ushort x, y; }; + __CL_ANON_STRUCT__ struct{ cl_ushort s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_ushort lo, hi; }; +#endif +#if defined( __CL_USHORT2__) + __cl_ushort2 v2; +#endif +}cl_ushort2; + +typedef union +{ + cl_ushort CL_ALIGNED(8) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ushort x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_ushort s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_ushort2 lo, hi; }; +#endif +#if defined( __CL_USHORT2__) + __cl_ushort2 v2[2]; +#endif +#if defined( __CL_USHORT4__) + __cl_ushort4 v4; +#endif +}cl_ushort4; + +/* cl_ushort3 is identical in size, alignment and behavior to cl_ushort4. See section 6.1.5. */ +typedef cl_ushort4 cl_ushort3; + +typedef union +{ + cl_ushort CL_ALIGNED(16) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ushort x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_ushort s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_ushort4 lo, hi; }; +#endif +#if defined( __CL_USHORT2__) + __cl_ushort2 v2[4]; +#endif +#if defined( __CL_USHORT4__) + __cl_ushort4 v4[2]; +#endif +#if defined( __CL_USHORT8__ ) + __cl_ushort8 v8; +#endif +}cl_ushort8; + +typedef union +{ + cl_ushort CL_ALIGNED(32) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ushort x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_ushort s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_ushort8 lo, hi; }; +#endif +#if defined( __CL_USHORT2__) + __cl_ushort2 v2[8]; +#endif +#if defined( __CL_USHORT4__) + __cl_ushort4 v4[4]; +#endif +#if defined( __CL_USHORT8__ ) + __cl_ushort8 v8[2]; +#endif +#if defined( __CL_USHORT16__ ) + __cl_ushort16 v16; +#endif +}cl_ushort16; + +/* ---- cl_intn ---- */ +typedef union +{ + cl_int CL_ALIGNED(8) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_int x, y; }; + __CL_ANON_STRUCT__ struct{ cl_int s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_int lo, hi; }; +#endif +#if defined( __CL_INT2__) + __cl_int2 v2; +#endif +}cl_int2; + +typedef union +{ + cl_int CL_ALIGNED(16) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_int x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_int s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_int2 lo, hi; }; +#endif +#if defined( __CL_INT2__) + __cl_int2 v2[2]; +#endif +#if defined( __CL_INT4__) + __cl_int4 v4; +#endif +}cl_int4; + +/* cl_int3 is identical in size, alignment and behavior to cl_int4. See section 6.1.5. */ +typedef cl_int4 cl_int3; + +typedef union +{ + cl_int CL_ALIGNED(32) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_int x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_int s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_int4 lo, hi; }; +#endif +#if defined( __CL_INT2__) + __cl_int2 v2[4]; +#endif +#if defined( __CL_INT4__) + __cl_int4 v4[2]; +#endif +#if defined( __CL_INT8__ ) + __cl_int8 v8; +#endif +}cl_int8; + +typedef union +{ + cl_int CL_ALIGNED(64) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_int x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_int8 lo, hi; }; +#endif +#if defined( __CL_INT2__) + __cl_int2 v2[8]; +#endif +#if defined( __CL_INT4__) + __cl_int4 v4[4]; +#endif +#if defined( __CL_INT8__ ) + __cl_int8 v8[2]; +#endif +#if defined( __CL_INT16__ ) + __cl_int16 v16; +#endif +}cl_int16; + + +/* ---- cl_uintn ---- */ +typedef union +{ + cl_uint CL_ALIGNED(8) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uint x, y; }; + __CL_ANON_STRUCT__ struct{ cl_uint s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_uint lo, hi; }; +#endif +#if defined( __CL_UINT2__) + __cl_uint2 v2; +#endif +}cl_uint2; + +typedef union +{ + cl_uint CL_ALIGNED(16) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uint x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_uint s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_uint2 lo, hi; }; +#endif +#if defined( __CL_UINT2__) + __cl_uint2 v2[2]; +#endif +#if defined( __CL_UINT4__) + __cl_uint4 v4; +#endif +}cl_uint4; + +/* cl_uint3 is identical in size, alignment and behavior to cl_uint4. See section 6.1.5. */ +typedef cl_uint4 cl_uint3; + +typedef union +{ + cl_uint CL_ALIGNED(32) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uint x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_uint s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_uint4 lo, hi; }; +#endif +#if defined( __CL_UINT2__) + __cl_uint2 v2[4]; +#endif +#if defined( __CL_UINT4__) + __cl_uint4 v4[2]; +#endif +#if defined( __CL_UINT8__ ) + __cl_uint8 v8; +#endif +}cl_uint8; + +typedef union +{ + cl_uint CL_ALIGNED(64) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uint x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_uint s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_uint8 lo, hi; }; +#endif +#if defined( __CL_UINT2__) + __cl_uint2 v2[8]; +#endif +#if defined( __CL_UINT4__) + __cl_uint4 v4[4]; +#endif +#if defined( __CL_UINT8__ ) + __cl_uint8 v8[2]; +#endif +#if defined( __CL_UINT16__ ) + __cl_uint16 v16; +#endif +}cl_uint16; + +/* ---- cl_longn ---- */ +typedef union +{ + cl_long CL_ALIGNED(16) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_long x, y; }; + __CL_ANON_STRUCT__ struct{ cl_long s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_long lo, hi; }; +#endif +#if defined( __CL_LONG2__) + __cl_long2 v2; +#endif +}cl_long2; + +typedef union +{ + cl_long CL_ALIGNED(32) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_long x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_long s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_long2 lo, hi; }; +#endif +#if defined( __CL_LONG2__) + __cl_long2 v2[2]; +#endif +#if defined( __CL_LONG4__) + __cl_long4 v4; +#endif +}cl_long4; + +/* cl_long3 is identical in size, alignment and behavior to cl_long4. See section 6.1.5. */ +typedef cl_long4 cl_long3; + +typedef union +{ + cl_long CL_ALIGNED(64) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_long x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_long s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_long4 lo, hi; }; +#endif +#if defined( __CL_LONG2__) + __cl_long2 v2[4]; +#endif +#if defined( __CL_LONG4__) + __cl_long4 v4[2]; +#endif +#if defined( __CL_LONG8__ ) + __cl_long8 v8; +#endif +}cl_long8; + +typedef union +{ + cl_long CL_ALIGNED(128) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_long x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_long s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_long8 lo, hi; }; +#endif +#if defined( __CL_LONG2__) + __cl_long2 v2[8]; +#endif +#if defined( __CL_LONG4__) + __cl_long4 v4[4]; +#endif +#if defined( __CL_LONG8__ ) + __cl_long8 v8[2]; +#endif +#if defined( __CL_LONG16__ ) + __cl_long16 v16; +#endif +}cl_long16; + + +/* ---- cl_ulongn ---- */ +typedef union +{ + cl_ulong CL_ALIGNED(16) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ulong x, y; }; + __CL_ANON_STRUCT__ struct{ cl_ulong s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_ulong lo, hi; }; +#endif +#if defined( __CL_ULONG2__) + __cl_ulong2 v2; +#endif +}cl_ulong2; + +typedef union +{ + cl_ulong CL_ALIGNED(32) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ulong x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_ulong s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_ulong2 lo, hi; }; +#endif +#if defined( __CL_ULONG2__) + __cl_ulong2 v2[2]; +#endif +#if defined( __CL_ULONG4__) + __cl_ulong4 v4; +#endif +}cl_ulong4; + +/* cl_ulong3 is identical in size, alignment and behavior to cl_ulong4. See section 6.1.5. */ +typedef cl_ulong4 cl_ulong3; + +typedef union +{ + cl_ulong CL_ALIGNED(64) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ulong x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_ulong s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_ulong4 lo, hi; }; +#endif +#if defined( __CL_ULONG2__) + __cl_ulong2 v2[4]; +#endif +#if defined( __CL_ULONG4__) + __cl_ulong4 v4[2]; +#endif +#if defined( __CL_ULONG8__ ) + __cl_ulong8 v8; +#endif +}cl_ulong8; + +typedef union +{ + cl_ulong CL_ALIGNED(128) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ulong x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_ulong s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_ulong8 lo, hi; }; +#endif +#if defined( __CL_ULONG2__) + __cl_ulong2 v2[8]; +#endif +#if defined( __CL_ULONG4__) + __cl_ulong4 v4[4]; +#endif +#if defined( __CL_ULONG8__ ) + __cl_ulong8 v8[2]; +#endif +#if defined( __CL_ULONG16__ ) + __cl_ulong16 v16; +#endif +}cl_ulong16; + + +/* --- cl_floatn ---- */ + +typedef union +{ + cl_float CL_ALIGNED(8) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_float x, y; }; + __CL_ANON_STRUCT__ struct{ cl_float s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_float lo, hi; }; +#endif +#if defined( __CL_FLOAT2__) + __cl_float2 v2; +#endif +}cl_float2; + +typedef union +{ + cl_float CL_ALIGNED(16) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_float x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_float s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_float2 lo, hi; }; +#endif +#if defined( __CL_FLOAT2__) + __cl_float2 v2[2]; +#endif +#if defined( __CL_FLOAT4__) + __cl_float4 v4; +#endif +}cl_float4; + +/* cl_float3 is identical in size, alignment and behavior to cl_float4. See section 6.1.5. */ +typedef cl_float4 cl_float3; + +typedef union +{ + cl_float CL_ALIGNED(32) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_float x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_float s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_float4 lo, hi; }; +#endif +#if defined( __CL_FLOAT2__) + __cl_float2 v2[4]; +#endif +#if defined( __CL_FLOAT4__) + __cl_float4 v4[2]; +#endif +#if defined( __CL_FLOAT8__ ) + __cl_float8 v8; +#endif +}cl_float8; + +typedef union +{ + cl_float CL_ALIGNED(64) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_float x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_float s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_float8 lo, hi; }; +#endif +#if defined( __CL_FLOAT2__) + __cl_float2 v2[8]; +#endif +#if defined( __CL_FLOAT4__) + __cl_float4 v4[4]; +#endif +#if defined( __CL_FLOAT8__ ) + __cl_float8 v8[2]; +#endif +#if defined( __CL_FLOAT16__ ) + __cl_float16 v16; +#endif +}cl_float16; + +/* --- cl_doublen ---- */ + +typedef union +{ + cl_double CL_ALIGNED(16) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_double x, y; }; + __CL_ANON_STRUCT__ struct{ cl_double s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_double lo, hi; }; +#endif +#if defined( __CL_DOUBLE2__) + __cl_double2 v2; +#endif +}cl_double2; + +typedef union +{ + cl_double CL_ALIGNED(32) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_double x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_double s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_double2 lo, hi; }; +#endif +#if defined( __CL_DOUBLE2__) + __cl_double2 v2[2]; +#endif +#if defined( __CL_DOUBLE4__) + __cl_double4 v4; +#endif +}cl_double4; + +/* cl_double3 is identical in size, alignment and behavior to cl_double4. See section 6.1.5. */ +typedef cl_double4 cl_double3; + +typedef union +{ + cl_double CL_ALIGNED(64) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_double x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_double s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_double4 lo, hi; }; +#endif +#if defined( __CL_DOUBLE2__) + __cl_double2 v2[4]; +#endif +#if defined( __CL_DOUBLE4__) + __cl_double4 v4[2]; +#endif +#if defined( __CL_DOUBLE8__ ) + __cl_double8 v8; +#endif +}cl_double8; + +typedef union +{ + cl_double CL_ALIGNED(128) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_double x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_double s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_double8 lo, hi; }; +#endif +#if defined( __CL_DOUBLE2__) + __cl_double2 v2[8]; +#endif +#if defined( __CL_DOUBLE4__) + __cl_double4 v4[4]; +#endif +#if defined( __CL_DOUBLE8__ ) + __cl_double8 v8[2]; +#endif +#if defined( __CL_DOUBLE16__ ) + __cl_double16 v16; +#endif +}cl_double16; + +/* Macro to facilitate debugging + * Usage: + * Place CL_PROGRAM_STRING_DEBUG_INFO on the line before the first line of your source. + * The first line ends with: CL_PROGRAM_STRING_DEBUG_INFO \" + * Each line thereafter of OpenCL C source must end with: \n\ + * The last line ends in "; + * + * Example: + * + * const char *my_program = CL_PROGRAM_STRING_DEBUG_INFO "\ + * kernel void foo( int a, float * b ) \n\ + * { \n\ + * // my comment \n\ + * *b[ get_global_id(0)] = a; \n\ + * } \n\ + * "; + * + * This should correctly set up the line, (column) and file information for your source + * string so you can do source level debugging. + */ +#define __CL_STRINGIFY( _x ) # _x +#define _CL_STRINGIFY( _x ) __CL_STRINGIFY( _x ) +#define CL_PROGRAM_STRING_DEBUG_INFO "#line " _CL_STRINGIFY(__LINE__) " \"" __FILE__ "\" \n\n" + +#ifdef __cplusplus +} +#endif + +#undef __CL_HAS_ANON_STRUCT__ +#undef __CL_ANON_STRUCT__ +#if defined( _WIN32) && defined(_MSC_VER) + #if _MSC_VER >=1500 + #pragma warning( pop ) + #endif +#endif + +#endif /* __CL_PLATFORM_H */ diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 00000000..cabd8b50 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,261 @@ +# Copyright (c) 2018 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +cmake_minimum_required(VERSION 3.1 FATAL_ERROR) + +set_property(GLOBAL PROPERTY USE_FOLDERS ON) + +set(CMAKE_CONFIGURATION_TYPES Debug Release) +set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE) + +if (NOT CMAKE_BUILD_TYPE) + message(STATUS "No build type selected, default to Release") + set(CMAKE_BUILD_TYPE "Release" CACHE PATH "Build Type" FORCE) +endif() + +project(CLIntercept) + +find_package( Threads ) + +# This uses modules from: https://github.com/rpavlik/cmake-modules +# to get Git revision information and put it in the generated files: +# git_version.cpp - version information for CLIntercept log +# git_version.rc2 - DLL version information (Windows only) +add_definitions("-DCLINTERCEPT_CMAKE") +include(cmake_modules/GetGitRevisionDescription.cmake) +get_git_head_revision(GIT_REFSPEC GIT_SHA1) +git_describe(GIT_DESCRIBE) +configure_file(Src/git_version.cpp.in "${CMAKE_CURRENT_BINARY_DIR}/git_version.cpp" @ONLY) +configure_file(Src/git_version.rc.in "${CMAKE_CURRENT_BINARY_DIR}/git_version.rc2" @ONLY) + +# Build the CLIntercept config app for 32-bit Windows builds, but not for 64-bit or other OSes. +if(${CMAKE_SYSTEM_NAME} STREQUAL "Windows") + if(CMAKE_SIZEOF_VOID_P EQUAL 4) + add_subdirectory(config) + endif() +endif() + +include_directories( + ${CMAKE_CURRENT_BINARY_DIR} + ${CMAKE_CURRENT_SOURCE_DIR} + ${CMAKE_CURRENT_SOURCE_DIR}/Src +) +link_directories( +) + +set( CLINTERCEPT_OS_FILES + OS/OS.h +) +if(${CMAKE_SYSTEM_NAME} STREQUAL "Windows") + list( APPEND CLINTERCEPT_OS_FILES + OS/OS_timer.h + OS/OS_windows.cpp + OS/OS_windows.h + OS/OS_windows_common.cpp + OS/OS_windows_common.h + ) +elseif(${CMAKE_SYSTEM_NAME} STREQUAL "Linux") + list( APPEND CLINTERCEPT_OS_FILES + OS/OS_linux.cpp + OS/OS_linux.h + OS/OS_linux_common.cpp + OS/OS_linux_common.h + ) +elseif(${CMAKE_SYSTEM_NAME} STREQUAL "Darwin") + list( APPEND CLINTERCEPT_OS_FILES + OS/OS_mac.cpp + OS/OS_mac.h + OS/OS_mac_common.cpp + OS/OS_mac_common.h + ) +endif() +source_group( OS FILES + ${CLINTERCEPT_OS_FILES} +) + +set( CLINTERCEPT_RESOURCE_FILES + Kernels/builtin_kernels.cl + Kernels/precompiled_kernels.cl + resource/clIntercept.rc + resource/clIntercept_resource.h + "${CMAKE_CURRENT_BINARY_DIR}/git_version.rc2" +) +source_group( Resources FILES + ${CLINTERCEPT_RESOURCE_FILES} +) +if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux") + set(CLINTERCEPT_KERNELS_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/Kernels) + #TODO: This is currently hard-coded for 64-bit Linux builds. + set(CLINTERCEPT_KERNELS_OUTPUT_FORMAT elf64-x86-64) + add_custom_command(OUTPUT ${CLINTERCEPT_KERNELS_OUTPUT_DIRECTORY}/precompiled_kernels.o + COMMAND mkdir -p ${CLINTERCEPT_KERNELS_OUTPUT_DIRECTORY} + COMMAND cd ${CMAKE_CURRENT_SOURCE_DIR} && objcopy -I binary -O ${CLINTERCEPT_KERNELS_OUTPUT_FORMAT} --binary-architecture i386 + Kernels/precompiled_kernels.cl + ${CLINTERCEPT_KERNELS_OUTPUT_DIRECTORY}/precompiled_kernels.o + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/Kernels/precompiled_kernels.cl + ) + add_custom_command(OUTPUT ${CLINTERCEPT_KERNELS_OUTPUT_DIRECTORY}/builtin_kernels.o + COMMAND mkdir -p ${CLINTERCEPT_KERNELS_OUTPUT_DIRECTORY} + COMMAND cd ${CMAKE_CURRENT_SOURCE_DIR} && objcopy -I binary -O ${CLINTERCEPT_KERNELS_OUTPUT_FORMAT} --binary-architecture i386 + Kernels/builtin_kernels.cl + ${CLINTERCEPT_KERNELS_OUTPUT_DIRECTORY}/builtin_kernels.o + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/Kernels/builtin_kernels.cl + ) + list( APPEND CLINTERCEPT_RESOURCE_FILES + ${CLINTERCEPT_KERNELS_OUTPUT_DIRECTORY}/precompiled_kernels.o + ${CLINTERCEPT_KERNELS_OUTPUT_DIRECTORY}/builtin_kernels.o + ) +endif() + +set( CLINTERCEPT_SOURCE_FILES + Src/clIntercept.def + Src/clIntercept.map + Src/cli_ext.h + Src/common.h + Src/controls.h + Src/dispatch.cpp + Src/dispatch.h + Src/enummap.cpp + Src/enummap.h + Src/instrumentation.h + Src/intercept.cpp + Src/intercept.h + Src/main.cpp + Src/stubs.cpp + "${CMAKE_CURRENT_BINARY_DIR}/git_version.cpp" +) +source_group( Source FILES + ${CLINTERCEPT_SOURCE_FILES} +) + +set( CLINTERCEPT_CL_HEADERS + CL/cl.h + CL/cl_gl.h + CL/cl_platform.h +) +source_group( CL FILES + ${CLINTERCEPT_CL_HEADERS} +) + +# MDAPI Support (optional) +set( ENABLE_MDAPI CACHE BOOL "Enable MDAPI Support" ) +if( ENABLE_MDAPI ) + add_definitions("-DUSE_MDAPI") + include_directories( embargo/mdapi ) + set( CLINTERCEPT_MDAPI_FILES + embargo/mdapi/DriverStorePath.h + embargo/mdapi/intercept_mdapi.cpp + embargo/mdapi/MetricsDiscoveryHelper.cpp + embargo/mdapi/MetricsDiscoveryHelper.h + embargo/mdapi/metrics_discovery_api.h + embargo/mdapi/windef4linux.h + ) + source_group( MDAPI FILES + ${CLINTERCEPT_MDAPI_FILES} + ) +endif() + +add_library( OpenCL SHARED + ${CLINTERCEPT_CL_HEADERS} + ${CLINTERCEPT_OS_FILES} + ${CLINTERCEPT_RESOURCE_FILES} + ${CLINTERCEPT_SOURCE_FILES} + ${CLINTERCEPT_MDAPI_FILES} +) +set_target_properties( OpenCL PROPERTIES VERSION "1.2" SOVERSION "1" ) +target_link_libraries( OpenCL ${CMAKE_DL_LIBS} ) + +if(${CMAKE_SYSTEM_NAME} STREQUAL "Windows") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc") + target_link_libraries( OpenCL SetupAPI Shlwapi ) +elseif(${CMAKE_SYSTEM_NAME} STREQUAL "Linux") + find_package( Threads ) + set_target_properties(OpenCL PROPERTIES COMPILE_FLAGS "-g -Wall -std=gnu++11") + set_target_properties(OpenCL PROPERTIES LINK_FLAGS "-Wl,--version-script -Wl,${CMAKE_SOURCE_DIR}/Src/clIntercept.map") + target_link_libraries( OpenCL ${CMAKE_THREAD_LIBS_INIT} ) +elseif(${CMAKE_SYSTEM_NAME} STREQUAL "Darwin") + find_package(OpenCL REQUIRED) + set_target_properties(OpenCL PROPERTIES COMPILE_FLAGS "-g -Wall -arch i386 -arch x86_64") + target_link_libraries( OpenCL OpenCL::OpenCL ) +endif() + +# ITT Support (optional) +set( ENABLE_ITT CACHE BOOL "Enable ITT (Instrumentation Tracing Technology) API Support" ) +set( PROGRAMFILES_X86 "PROGRAMFILES(X86)" ) +find_path( VTUNE_INCLUDE_DIR ittnotify.h + HINTS + /opt/intel/vtune_amplifier_xe/include + "$ENV{${PROGRAMFILES_X86}}/Intel/VTune\ Amplifier\ XE/include" ) +if (CMAKE_SIZEOF_VOID_P EQUAL 4) + #message( STATUS "Searching for 32-bit ittnotify lib..." ) + find_library( VTUNE_ITTNOTIFY_LIB NAMES ittnotify libittnotify + HINTS + /opt/intel/vtune_amplifier_xe/lib32 + "$ENV{${PROGRAMFILES_X86}}/Intel/VTune\ Amplifier\ XE/lib32" ) +else() + #message( STATUS "Searching for 64-bit ittnotify lib..." ) + find_library( VTUNE_ITTNOTIFY_LIB NAMES ittnotify libittnotify + HINTS + /opt/intel/vtune_amplifier_xe/lib64 + "$ENV{${PROGRAMFILES_X86}}/Intel/VTune\ Amplifier\ XE/lib64" ) +endif() +if( ENABLE_ITT ) + add_definitions("-DUSE_ITT") + include_directories( ${VTUNE_INCLUDE_DIR} ) + message( STATUS "VTune ITTNotify Lib is: ${VTUNE_ITTNOTIFY_LIB}" ) + target_link_libraries( OpenCL ${VTUNE_ITTNOTIFY_LIB} ${CMAKE_THREAD_LIBS_INIT} ) +endif() + +if (${CMAKE_SYSTEM_NAME} STREQUAL "Windows") + if(CMAKE_SIZEOF_VOID_P EQUAL 8) + set(CLINTERCEPT_PLATFORM_NAME "x64") + elseif (CMAKE_SIZEOF_VOID_P EQUAL 4) + set(CLINTERCEPT_PLATFORM_NAME "Win32") + else() + set(CLINTERCEPT_PLATFORM_NAME "Unknown") + endif() + + if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) + set(CMAKE_INSTALL_PREFIX "${CMAKE_CURRENT_SOURCE_DIR}/Builds/${CLINTERCEPT_PLATFORM_NAME}" CACHE PATH "Install Path" FORCE) + endif() + + foreach( OUTPUTCONFIG ${CMAKE_CONFIGURATION_TYPES} ) + string( TOUPPER ${OUTPUTCONFIG} OUTPUTCONFIG_UPPER ) + + # For Windows debug builds, we want to statically link the C runtime. + # If we do not statically link the C runtime then the target machine + # must install Visual Studio, which is not desirable. + if( ${OUTPUTCONFIG_UPPER} MATCHES "DEBUG" ) + #message( STATUS "DEBUG build detected!" ) + #message( STATUS " 'CMAKE_CXX_FLAGS_${OUTPUTCONFIG_UPPER}': ${CMAKE_CXX_FLAGS_${OUTPUTCONFIG_UPPER}}" ) + #message( STATUS "->" ) + if( CMAKE_CXX_FLAGS_${OUTPUTCONFIG_UPPER} MATCHES "/MD" ) + string( REGEX REPLACE "/MD" "/MT" CMAKE_CXX_FLAGS_${OUTPUTCONFIG_UPPER} "${CMAKE_CXX_FLAGS_${OUTPUTCONFIG_UPPER}}" ) + endif() + #message( STATUS " 'CMAKE_CXX_FLAGS_${OUTPUTCONFIG_UPPER}': ${CMAKE_CXX_FLAGS_${OUTPUTCONFIG_UPPER}}" ) + endif() + install(TARGETS OpenCL DESTINATION ${OUTPUTCONFIG} CONFIGURATIONS ${OUTPUTCONFIG}) + endforeach() +else() + if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) + set(CMAKE_INSTALL_PREFIX "${CMAKE_CURRENT_SOURCE_DIR}/install" CACHE PATH "Install Path" FORCE) + endif() + install(TARGETS OpenCL DESTINATION "lib") +endif() diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 00000000..8aec231a --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,76 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +In the interest of fostering an open and welcoming environment, we as +contributors and maintainers pledge to making participation in our project and +our community a harassment-free experience for everyone, regardless of age, body +size, disability, ethnicity, gender identity and expression, level of experience, +nationality, personal appearance, race, religion, or sexual identity and +orientation. + +## Our Standards + +Examples of behavior that contributes to creating a positive environment +include: + +* Using welcoming and inclusive language +* Being respectful of differing viewpoints and experiences +* Gracefully accepting constructive criticism +* Focusing on what is best for the community +* Showing empathy towards other community members + +Examples of unacceptable behavior by participants include: + +* The use of sexualized language or imagery and unwelcome sexual attention or + advances +* Trolling, insulting/derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or electronic + address, without explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Our Responsibilities + +Project maintainers are responsible for clarifying the standards of acceptable +behavior and are expected to take appropriate and fair corrective action in +response to any instances of unacceptable behavior. + +Project maintainers have the right and responsibility to remove, edit, or +reject comments, commits, code, wiki edits, issues, and other contributions +that are not aligned to this Code of Conduct, or to ban temporarily or +permanently any contributor for other behaviors that they deem inappropriate, +threatening, offensive, or harmful. + +## Scope + +This Code of Conduct applies both within project spaces and in public spaces +when an individual is representing the project or its community. Examples of +representing a project or community include using an official project e-mail +address, posting via an official social media account, or acting as an appointed +representative at an online or offline event. Representation of a project may be +further defined and clarified by project maintainers. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported by contacting the project team via email at ben 'dot' ashbaugh 'at' +intel 'dot' com or any other Intel GitHub maintainer (see profile for email +address). All complaints will be reviewed and investigated and will result in +a response that is deemed necessary and appropriate to the circumstances. The +project team is obligated to maintain confidentiality with regard to the +reporter of an incident. Further details of specific enforcement policies may +be posted separately. + +Project maintainers who do not follow or enforce the Code of Conduct in good +faith may face temporary or permanent repercussions as determined by other +members of the project's leadership. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, +available at https://www.contributor-covenant.org/version/1/4/code-of-conduct/ + +[homepage]: https://www.contributor-covenant.org + diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 00000000..237d4f1c --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,32 @@ +# Contributing to the Intercept Layer for OpenCL Applications + +Thanks for your time! Contributions to the Intercept Layer for OpenCL +Applications are welcomed and encouraged. Below you can find guidelines +for contributing to the Intercept Layer for OpenCL Applications. + +## How to Report an Issue or Feature Request + +Public GitHub issues are the preferred method for reporting issues and feature +requests. Private or sensitive issues may be submitted via email to +this project's maintainer (Ben Ashbaugh - ben 'dot' ashbaugh 'at' intel 'dot' +com), or to any other Intel GitHub maintainer (see profile for email address). + +## How to Fix and Issue or Add a Feature + +If you have an idea how to improve the Intercept Layer for OpenCL Applications: + +1. Please share your proposal via a GitHub issue. This lets others know what + you're working on and gives others an opportunity to provide early feedback. +1. Implement, validate, and document your fix or feature. Be sure it doesn't + break any existing functionality! +1. Submit a pull request with your changes. + +After submitting a pull request your contribution will be reviewed. Your pull +request may be accepted as-is, or additional fixes or modifications may be +required. + +--- + +\* Other names and brands may be claimed as the property of others. + +Copyright (c) 2018, Intel(R) Corporation diff --git a/GL/glcorearb.h b/GL/glcorearb.h new file mode 100644 index 00000000..71e3b069 --- /dev/null +++ b/GL/glcorearb.h @@ -0,0 +1,3597 @@ +#ifndef __glcorearb_h_ +#define __glcorearb_h_ 1 + +#ifdef __cplusplus +extern "C" { +#endif + +/* +** Copyright (c) 2013-2014 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a +** copy of this software and/or associated documentation files (the +** "Materials"), to deal in the Materials without restriction, including +** without limitation the rights to use, copy, modify, merge, publish, +** distribute, sublicense, and/or sell copies of the Materials, and to +** permit persons to whom the Materials are furnished to do so, subject to +** the following conditions: +** +** The above copyright notice and this permission notice shall be included +** in all copies or substantial portions of the Materials. +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. +*/ +/* +** This header is generated from the Khronos OpenGL / OpenGL ES XML +** API Registry. The current version of the Registry, generator scripts +** used to make the header, and the header can be found at +** http://www.opengl.org/registry/ +** +** Khronos $Revision$ on $Date$ +*/ + +#if defined(_WIN32) && !defined(APIENTRY) && !defined(__CYGWIN__) && !defined(__SCITECH_SNAP__) +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN 1 +#endif +#include +#endif + +#ifndef APIENTRY +#define APIENTRY +#endif +#ifndef APIENTRYP +#define APIENTRYP APIENTRY * +#endif +#ifndef GLAPI +#define GLAPI extern +#endif + +/* glcorearb.h is for use with OpenGL core profile implementations. +** It should should be placed in the same directory as gl.h and +** included as . +** +** glcorearb.h includes only APIs in the latest OpenGL core profile +** implementation together with APIs in newer ARB extensions which +** can be supported by the core profile. It does not, and never will +** include functionality removed from the core profile, such as +** fixed-function vertex and fragment processing. +** +** Do not #include both and either of or +** in the same source file. +*/ + +/* Generated C header for: + * API: gl + * Profile: core + * Versions considered: .* + * Versions emitted: .* + * Default extensions included: glcore + * Additional extensions included: _nomatch_^ + * Extensions removed: _nomatch_^ + */ + +#ifndef GL_VERSION_1_0 +#define GL_VERSION_1_0 1 +typedef void GLvoid; +typedef unsigned int GLenum; +typedef float GLfloat; +typedef int GLint; +typedef int GLsizei; +typedef unsigned int GLbitfield; +typedef double GLdouble; +typedef unsigned int GLuint; +typedef unsigned char GLboolean; +typedef unsigned char GLubyte; +typedef void (APIENTRYP PFNGLCULLFACEPROC) (GLenum mode); +typedef void (APIENTRYP PFNGLFRONTFACEPROC) (GLenum mode); +typedef void (APIENTRYP PFNGLHINTPROC) (GLenum target, GLenum mode); +typedef void (APIENTRYP PFNGLLINEWIDTHPROC) (GLfloat width); +typedef void (APIENTRYP PFNGLPOINTSIZEPROC) (GLfloat size); +typedef void (APIENTRYP PFNGLPOLYGONMODEPROC) (GLenum face, GLenum mode); +typedef void (APIENTRYP PFNGLSCISSORPROC) (GLint x, GLint y, GLsizei width, GLsizei height); +typedef void (APIENTRYP PFNGLTEXPARAMETERFPROC) (GLenum target, GLenum pname, GLfloat param); +typedef void (APIENTRYP PFNGLTEXPARAMETERFVPROC) (GLenum target, GLenum pname, const GLfloat *params); +typedef void (APIENTRYP PFNGLTEXPARAMETERIPROC) (GLenum target, GLenum pname, GLint param); +typedef void (APIENTRYP PFNGLTEXPARAMETERIVPROC) (GLenum target, GLenum pname, const GLint *params); +typedef void (APIENTRYP PFNGLTEXIMAGE1DPROC) (GLenum target, GLint level, GLint internalformat, GLsizei width, GLint border, GLenum format, GLenum type, const void *pixels); +typedef void (APIENTRYP PFNGLTEXIMAGE2DPROC) (GLenum target, GLint level, GLint internalformat, GLsizei width, GLsizei height, GLint border, GLenum format, GLenum type, const void *pixels); +typedef void (APIENTRYP PFNGLDRAWBUFFERPROC) (GLenum buf); +typedef void (APIENTRYP PFNGLCLEARPROC) (GLbitfield mask); +typedef void (APIENTRYP PFNGLCLEARCOLORPROC) (GLfloat red, GLfloat green, GLfloat blue, GLfloat alpha); +typedef void (APIENTRYP PFNGLCLEARSTENCILPROC) (GLint s); +typedef void (APIENTRYP PFNGLCLEARDEPTHPROC) (GLdouble depth); +typedef void (APIENTRYP PFNGLSTENCILMASKPROC) (GLuint mask); +typedef void (APIENTRYP PFNGLCOLORMASKPROC) (GLboolean red, GLboolean green, GLboolean blue, GLboolean alpha); +typedef void (APIENTRYP PFNGLDEPTHMASKPROC) (GLboolean flag); +typedef void (APIENTRYP PFNGLDISABLEPROC) (GLenum cap); +typedef void (APIENTRYP PFNGLENABLEPROC) (GLenum cap); +typedef void (APIENTRYP PFNGLFINISHPROC) (void); +typedef void (APIENTRYP PFNGLFLUSHPROC) (void); +typedef void (APIENTRYP PFNGLBLENDFUNCPROC) (GLenum sfactor, GLenum dfactor); +typedef void (APIENTRYP PFNGLLOGICOPPROC) (GLenum opcode); +typedef void (APIENTRYP PFNGLSTENCILFUNCPROC) (GLenum func, GLint ref, GLuint mask); +typedef void (APIENTRYP PFNGLSTENCILOPPROC) (GLenum fail, GLenum zfail, GLenum zpass); +typedef void (APIENTRYP PFNGLDEPTHFUNCPROC) (GLenum func); +typedef void (APIENTRYP PFNGLPIXELSTOREFPROC) (GLenum pname, GLfloat param); +typedef void (APIENTRYP PFNGLPIXELSTOREIPROC) (GLenum pname, GLint param); +typedef void (APIENTRYP PFNGLREADBUFFERPROC) (GLenum src); +typedef void (APIENTRYP PFNGLREADPIXELSPROC) (GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, void *pixels); +typedef void (APIENTRYP PFNGLGETBOOLEANVPROC) (GLenum pname, GLboolean *data); +typedef void (APIENTRYP PFNGLGETDOUBLEVPROC) (GLenum pname, GLdouble *data); +typedef GLenum (APIENTRYP PFNGLGETERRORPROC) (void); +typedef void (APIENTRYP PFNGLGETFLOATVPROC) (GLenum pname, GLfloat *data); +typedef void (APIENTRYP PFNGLGETINTEGERVPROC) (GLenum pname, GLint *data); +typedef const GLubyte *(APIENTRYP PFNGLGETSTRINGPROC) (GLenum name); +typedef void (APIENTRYP PFNGLGETTEXIMAGEPROC) (GLenum target, GLint level, GLenum format, GLenum type, void *pixels); +typedef void (APIENTRYP PFNGLGETTEXPARAMETERFVPROC) (GLenum target, GLenum pname, GLfloat *params); +typedef void (APIENTRYP PFNGLGETTEXPARAMETERIVPROC) (GLenum target, GLenum pname, GLint *params); +typedef void (APIENTRYP PFNGLGETTEXLEVELPARAMETERFVPROC) (GLenum target, GLint level, GLenum pname, GLfloat *params); +typedef void (APIENTRYP PFNGLGETTEXLEVELPARAMETERIVPROC) (GLenum target, GLint level, GLenum pname, GLint *params); +typedef GLboolean (APIENTRYP PFNGLISENABLEDPROC) (GLenum cap); +typedef void (APIENTRYP PFNGLDEPTHRANGEPROC) (GLdouble near, GLdouble far); +typedef void (APIENTRYP PFNGLVIEWPORTPROC) (GLint x, GLint y, GLsizei width, GLsizei height); +#ifdef GL_GLEXT_PROTOTYPES +GLAPI void APIENTRY glCullFace (GLenum mode); +GLAPI void APIENTRY glFrontFace (GLenum mode); +GLAPI void APIENTRY glHint (GLenum target, GLenum mode); +GLAPI void APIENTRY glLineWidth (GLfloat width); +GLAPI void APIENTRY glPointSize (GLfloat size); +GLAPI void APIENTRY glPolygonMode (GLenum face, GLenum mode); +GLAPI void APIENTRY glScissor (GLint x, GLint y, GLsizei width, GLsizei height); +GLAPI void APIENTRY glTexParameterf (GLenum target, GLenum pname, GLfloat param); +GLAPI void APIENTRY glTexParameterfv (GLenum target, GLenum pname, const GLfloat *params); +GLAPI void APIENTRY glTexParameteri (GLenum target, GLenum pname, GLint param); +GLAPI void APIENTRY glTexParameteriv (GLenum target, GLenum pname, const GLint *params); +GLAPI void APIENTRY glTexImage1D (GLenum target, GLint level, GLint internalformat, GLsizei width, GLint border, GLenum format, GLenum type, const void *pixels); +GLAPI void APIENTRY glTexImage2D (GLenum target, GLint level, GLint internalformat, GLsizei width, GLsizei height, GLint border, GLenum format, GLenum type, const void *pixels); +GLAPI void APIENTRY glDrawBuffer (GLenum buf); +GLAPI void APIENTRY glClear (GLbitfield mask); +GLAPI void APIENTRY glClearColor (GLfloat red, GLfloat green, GLfloat blue, GLfloat alpha); +GLAPI void APIENTRY glClearStencil (GLint s); +GLAPI void APIENTRY glClearDepth (GLdouble depth); +GLAPI void APIENTRY glStencilMask (GLuint mask); +GLAPI void APIENTRY glColorMask (GLboolean red, GLboolean green, GLboolean blue, GLboolean alpha); +GLAPI void APIENTRY glDepthMask (GLboolean flag); +GLAPI void APIENTRY glDisable (GLenum cap); +GLAPI void APIENTRY glEnable (GLenum cap); +GLAPI void APIENTRY glFinish (void); +GLAPI void APIENTRY glFlush (void); +GLAPI void APIENTRY glBlendFunc (GLenum sfactor, GLenum dfactor); +GLAPI void APIENTRY glLogicOp (GLenum opcode); +GLAPI void APIENTRY glStencilFunc (GLenum func, GLint ref, GLuint mask); +GLAPI void APIENTRY glStencilOp (GLenum fail, GLenum zfail, GLenum zpass); +GLAPI void APIENTRY glDepthFunc (GLenum func); +GLAPI void APIENTRY glPixelStoref (GLenum pname, GLfloat param); +GLAPI void APIENTRY glPixelStorei (GLenum pname, GLint param); +GLAPI void APIENTRY glReadBuffer (GLenum src); +GLAPI void APIENTRY glReadPixels (GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, void *pixels); +GLAPI void APIENTRY glGetBooleanv (GLenum pname, GLboolean *data); +GLAPI void APIENTRY glGetDoublev (GLenum pname, GLdouble *data); +GLAPI GLenum APIENTRY glGetError (void); +GLAPI void APIENTRY glGetFloatv (GLenum pname, GLfloat *data); +GLAPI void APIENTRY glGetIntegerv (GLenum pname, GLint *data); +GLAPI const GLubyte *APIENTRY glGetString (GLenum name); +GLAPI void APIENTRY glGetTexImage (GLenum target, GLint level, GLenum format, GLenum type, void *pixels); +GLAPI void APIENTRY glGetTexParameterfv (GLenum target, GLenum pname, GLfloat *params); +GLAPI void APIENTRY glGetTexParameteriv (GLenum target, GLenum pname, GLint *params); +GLAPI void APIENTRY glGetTexLevelParameterfv (GLenum target, GLint level, GLenum pname, GLfloat *params); +GLAPI void APIENTRY glGetTexLevelParameteriv (GLenum target, GLint level, GLenum pname, GLint *params); +GLAPI GLboolean APIENTRY glIsEnabled (GLenum cap); +GLAPI void APIENTRY glDepthRange (GLdouble near, GLdouble far); +GLAPI void APIENTRY glViewport (GLint x, GLint y, GLsizei width, GLsizei height); +#endif +#endif /* GL_VERSION_1_0 */ + +#ifndef GL_VERSION_1_1 +#define GL_VERSION_1_1 1 +typedef float GLclampf; +typedef double GLclampd; +#define GL_DEPTH_BUFFER_BIT 0x00000100 +#define GL_STENCIL_BUFFER_BIT 0x00000400 +#define GL_COLOR_BUFFER_BIT 0x00004000 +#define GL_FALSE 0 +#define GL_TRUE 1 +#define GL_POINTS 0x0000 +#define GL_LINES 0x0001 +#define GL_LINE_LOOP 0x0002 +#define GL_LINE_STRIP 0x0003 +#define GL_TRIANGLES 0x0004 +#define GL_TRIANGLE_STRIP 0x0005 +#define GL_TRIANGLE_FAN 0x0006 +#define GL_QUADS 0x0007 +#define GL_NEVER 0x0200 +#define GL_LESS 0x0201 +#define GL_EQUAL 0x0202 +#define GL_LEQUAL 0x0203 +#define GL_GREATER 0x0204 +#define GL_NOTEQUAL 0x0205 +#define GL_GEQUAL 0x0206 +#define GL_ALWAYS 0x0207 +#define GL_ZERO 0 +#define GL_ONE 1 +#define GL_SRC_COLOR 0x0300 +#define GL_ONE_MINUS_SRC_COLOR 0x0301 +#define GL_SRC_ALPHA 0x0302 +#define GL_ONE_MINUS_SRC_ALPHA 0x0303 +#define GL_DST_ALPHA 0x0304 +#define GL_ONE_MINUS_DST_ALPHA 0x0305 +#define GL_DST_COLOR 0x0306 +#define GL_ONE_MINUS_DST_COLOR 0x0307 +#define GL_SRC_ALPHA_SATURATE 0x0308 +#define GL_NONE 0 +#define GL_FRONT_LEFT 0x0400 +#define GL_FRONT_RIGHT 0x0401 +#define GL_BACK_LEFT 0x0402 +#define GL_BACK_RIGHT 0x0403 +#define GL_FRONT 0x0404 +#define GL_BACK 0x0405 +#define GL_LEFT 0x0406 +#define GL_RIGHT 0x0407 +#define GL_FRONT_AND_BACK 0x0408 +#define GL_NO_ERROR 0 +#define GL_INVALID_ENUM 0x0500 +#define GL_INVALID_VALUE 0x0501 +#define GL_INVALID_OPERATION 0x0502 +#define GL_OUT_OF_MEMORY 0x0505 +#define GL_CW 0x0900 +#define GL_CCW 0x0901 +#define GL_POINT_SIZE 0x0B11 +#define GL_POINT_SIZE_RANGE 0x0B12 +#define GL_POINT_SIZE_GRANULARITY 0x0B13 +#define GL_LINE_SMOOTH 0x0B20 +#define GL_LINE_WIDTH 0x0B21 +#define GL_LINE_WIDTH_RANGE 0x0B22 +#define GL_LINE_WIDTH_GRANULARITY 0x0B23 +#define GL_POLYGON_MODE 0x0B40 +#define GL_POLYGON_SMOOTH 0x0B41 +#define GL_CULL_FACE 0x0B44 +#define GL_CULL_FACE_MODE 0x0B45 +#define GL_FRONT_FACE 0x0B46 +#define GL_DEPTH_RANGE 0x0B70 +#define GL_DEPTH_TEST 0x0B71 +#define GL_DEPTH_WRITEMASK 0x0B72 +#define GL_DEPTH_CLEAR_VALUE 0x0B73 +#define GL_DEPTH_FUNC 0x0B74 +#define GL_STENCIL_TEST 0x0B90 +#define GL_STENCIL_CLEAR_VALUE 0x0B91 +#define GL_STENCIL_FUNC 0x0B92 +#define GL_STENCIL_VALUE_MASK 0x0B93 +#define GL_STENCIL_FAIL 0x0B94 +#define GL_STENCIL_PASS_DEPTH_FAIL 0x0B95 +#define GL_STENCIL_PASS_DEPTH_PASS 0x0B96 +#define GL_STENCIL_REF 0x0B97 +#define GL_STENCIL_WRITEMASK 0x0B98 +#define GL_VIEWPORT 0x0BA2 +#define GL_DITHER 0x0BD0 +#define GL_BLEND_DST 0x0BE0 +#define GL_BLEND_SRC 0x0BE1 +#define GL_BLEND 0x0BE2 +#define GL_LOGIC_OP_MODE 0x0BF0 +#define GL_COLOR_LOGIC_OP 0x0BF2 +#define GL_DRAW_BUFFER 0x0C01 +#define GL_READ_BUFFER 0x0C02 +#define GL_SCISSOR_BOX 0x0C10 +#define GL_SCISSOR_TEST 0x0C11 +#define GL_COLOR_CLEAR_VALUE 0x0C22 +#define GL_COLOR_WRITEMASK 0x0C23 +#define GL_DOUBLEBUFFER 0x0C32 +#define GL_STEREO 0x0C33 +#define GL_LINE_SMOOTH_HINT 0x0C52 +#define GL_POLYGON_SMOOTH_HINT 0x0C53 +#define GL_UNPACK_SWAP_BYTES 0x0CF0 +#define GL_UNPACK_LSB_FIRST 0x0CF1 +#define GL_UNPACK_ROW_LENGTH 0x0CF2 +#define GL_UNPACK_SKIP_ROWS 0x0CF3 +#define GL_UNPACK_SKIP_PIXELS 0x0CF4 +#define GL_UNPACK_ALIGNMENT 0x0CF5 +#define GL_PACK_SWAP_BYTES 0x0D00 +#define GL_PACK_LSB_FIRST 0x0D01 +#define GL_PACK_ROW_LENGTH 0x0D02 +#define GL_PACK_SKIP_ROWS 0x0D03 +#define GL_PACK_SKIP_PIXELS 0x0D04 +#define GL_PACK_ALIGNMENT 0x0D05 +#define GL_MAX_TEXTURE_SIZE 0x0D33 +#define GL_MAX_VIEWPORT_DIMS 0x0D3A +#define GL_SUBPIXEL_BITS 0x0D50 +#define GL_TEXTURE_1D 0x0DE0 +#define GL_TEXTURE_2D 0x0DE1 +#define GL_POLYGON_OFFSET_UNITS 0x2A00 +#define GL_POLYGON_OFFSET_POINT 0x2A01 +#define GL_POLYGON_OFFSET_LINE 0x2A02 +#define GL_POLYGON_OFFSET_FILL 0x8037 +#define GL_POLYGON_OFFSET_FACTOR 0x8038 +#define GL_TEXTURE_BINDING_1D 0x8068 +#define GL_TEXTURE_BINDING_2D 0x8069 +#define GL_TEXTURE_WIDTH 0x1000 +#define GL_TEXTURE_HEIGHT 0x1001 +#define GL_TEXTURE_INTERNAL_FORMAT 0x1003 +#define GL_TEXTURE_BORDER_COLOR 0x1004 +#define GL_TEXTURE_RED_SIZE 0x805C +#define GL_TEXTURE_GREEN_SIZE 0x805D +#define GL_TEXTURE_BLUE_SIZE 0x805E +#define GL_TEXTURE_ALPHA_SIZE 0x805F +#define GL_DONT_CARE 0x1100 +#define GL_FASTEST 0x1101 +#define GL_NICEST 0x1102 +#define GL_BYTE 0x1400 +#define GL_UNSIGNED_BYTE 0x1401 +#define GL_SHORT 0x1402 +#define GL_UNSIGNED_SHORT 0x1403 +#define GL_INT 0x1404 +#define GL_UNSIGNED_INT 0x1405 +#define GL_FLOAT 0x1406 +#define GL_DOUBLE 0x140A +#define GL_STACK_OVERFLOW 0x0503 +#define GL_STACK_UNDERFLOW 0x0504 +#define GL_CLEAR 0x1500 +#define GL_AND 0x1501 +#define GL_AND_REVERSE 0x1502 +#define GL_COPY 0x1503 +#define GL_AND_INVERTED 0x1504 +#define GL_NOOP 0x1505 +#define GL_XOR 0x1506 +#define GL_OR 0x1507 +#define GL_NOR 0x1508 +#define GL_EQUIV 0x1509 +#define GL_INVERT 0x150A +#define GL_OR_REVERSE 0x150B +#define GL_COPY_INVERTED 0x150C +#define GL_OR_INVERTED 0x150D +#define GL_NAND 0x150E +#define GL_SET 0x150F +#define GL_TEXTURE 0x1702 +#define GL_COLOR 0x1800 +#define GL_DEPTH 0x1801 +#define GL_STENCIL 0x1802 +#define GL_STENCIL_INDEX 0x1901 +#define GL_DEPTH_COMPONENT 0x1902 +#define GL_RED 0x1903 +#define GL_GREEN 0x1904 +#define GL_BLUE 0x1905 +#define GL_ALPHA 0x1906 +#define GL_RGB 0x1907 +#define GL_RGBA 0x1908 +#define GL_POINT 0x1B00 +#define GL_LINE 0x1B01 +#define GL_FILL 0x1B02 +#define GL_KEEP 0x1E00 +#define GL_REPLACE 0x1E01 +#define GL_INCR 0x1E02 +#define GL_DECR 0x1E03 +#define GL_VENDOR 0x1F00 +#define GL_RENDERER 0x1F01 +#define GL_VERSION 0x1F02 +#define GL_EXTENSIONS 0x1F03 +#define GL_NEAREST 0x2600 +#define GL_LINEAR 0x2601 +#define GL_NEAREST_MIPMAP_NEAREST 0x2700 +#define GL_LINEAR_MIPMAP_NEAREST 0x2701 +#define GL_NEAREST_MIPMAP_LINEAR 0x2702 +#define GL_LINEAR_MIPMAP_LINEAR 0x2703 +#define GL_TEXTURE_MAG_FILTER 0x2800 +#define GL_TEXTURE_MIN_FILTER 0x2801 +#define GL_TEXTURE_WRAP_S 0x2802 +#define GL_TEXTURE_WRAP_T 0x2803 +#define GL_PROXY_TEXTURE_1D 0x8063 +#define GL_PROXY_TEXTURE_2D 0x8064 +#define GL_REPEAT 0x2901 +#define GL_R3_G3_B2 0x2A10 +#define GL_RGB4 0x804F +#define GL_RGB5 0x8050 +#define GL_RGB8 0x8051 +#define GL_RGB10 0x8052 +#define GL_RGB12 0x8053 +#define GL_RGB16 0x8054 +#define GL_RGBA2 0x8055 +#define GL_RGBA4 0x8056 +#define GL_RGB5_A1 0x8057 +#define GL_RGBA8 0x8058 +#define GL_RGB10_A2 0x8059 +#define GL_RGBA12 0x805A +#define GL_RGBA16 0x805B +#define GL_VERTEX_ARRAY 0x8074 +typedef void (APIENTRYP PFNGLDRAWARRAYSPROC) (GLenum mode, GLint first, GLsizei count); +typedef void (APIENTRYP PFNGLDRAWELEMENTSPROC) (GLenum mode, GLsizei count, GLenum type, const void *indices); +typedef void (APIENTRYP PFNGLGETPOINTERVPROC) (GLenum pname, void **params); +typedef void (APIENTRYP PFNGLPOLYGONOFFSETPROC) (GLfloat factor, GLfloat units); +typedef void (APIENTRYP PFNGLCOPYTEXIMAGE1DPROC) (GLenum target, GLint level, GLenum internalformat, GLint x, GLint y, GLsizei width, GLint border); +typedef void (APIENTRYP PFNGLCOPYTEXIMAGE2DPROC) (GLenum target, GLint level, GLenum internalformat, GLint x, GLint y, GLsizei width, GLsizei height, GLint border); +typedef void (APIENTRYP PFNGLCOPYTEXSUBIMAGE1DPROC) (GLenum target, GLint level, GLint xoffset, GLint x, GLint y, GLsizei width); +typedef void (APIENTRYP PFNGLCOPYTEXSUBIMAGE2DPROC) (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint x, GLint y, GLsizei width, GLsizei height); +typedef void (APIENTRYP PFNGLTEXSUBIMAGE1DPROC) (GLenum target, GLint level, GLint xoffset, GLsizei width, GLenum format, GLenum type, const void *pixels); +typedef void (APIENTRYP PFNGLTEXSUBIMAGE2DPROC) (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLenum type, const void *pixels); +typedef void (APIENTRYP PFNGLBINDTEXTUREPROC) (GLenum target, GLuint texture); +typedef void (APIENTRYP PFNGLDELETETEXTURESPROC) (GLsizei n, const GLuint *textures); +typedef void (APIENTRYP PFNGLGENTEXTURESPROC) (GLsizei n, GLuint *textures); +typedef GLboolean (APIENTRYP PFNGLISTEXTUREPROC) (GLuint texture); +#ifdef GL_GLEXT_PROTOTYPES +GLAPI void APIENTRY glDrawArrays (GLenum mode, GLint first, GLsizei count); +GLAPI void APIENTRY glDrawElements (GLenum mode, GLsizei count, GLenum type, const void *indices); +GLAPI void APIENTRY glGetPointerv (GLenum pname, void **params); +GLAPI void APIENTRY glPolygonOffset (GLfloat factor, GLfloat units); +GLAPI void APIENTRY glCopyTexImage1D (GLenum target, GLint level, GLenum internalformat, GLint x, GLint y, GLsizei width, GLint border); +GLAPI void APIENTRY glCopyTexImage2D (GLenum target, GLint level, GLenum internalformat, GLint x, GLint y, GLsizei width, GLsizei height, GLint border); +GLAPI void APIENTRY glCopyTexSubImage1D (GLenum target, GLint level, GLint xoffset, GLint x, GLint y, GLsizei width); +GLAPI void APIENTRY glCopyTexSubImage2D (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint x, GLint y, GLsizei width, GLsizei height); +GLAPI void APIENTRY glTexSubImage1D (GLenum target, GLint level, GLint xoffset, GLsizei width, GLenum format, GLenum type, const void *pixels); +GLAPI void APIENTRY glTexSubImage2D (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLenum type, const void *pixels); +GLAPI void APIENTRY glBindTexture (GLenum target, GLuint texture); +GLAPI void APIENTRY glDeleteTextures (GLsizei n, const GLuint *textures); +GLAPI void APIENTRY glGenTextures (GLsizei n, GLuint *textures); +GLAPI GLboolean APIENTRY glIsTexture (GLuint texture); +#endif +#endif /* GL_VERSION_1_1 */ + +#ifndef GL_VERSION_1_2 +#define GL_VERSION_1_2 1 +#define GL_UNSIGNED_BYTE_3_3_2 0x8032 +#define GL_UNSIGNED_SHORT_4_4_4_4 0x8033 +#define GL_UNSIGNED_SHORT_5_5_5_1 0x8034 +#define GL_UNSIGNED_INT_8_8_8_8 0x8035 +#define GL_UNSIGNED_INT_10_10_10_2 0x8036 +#define GL_TEXTURE_BINDING_3D 0x806A +#define GL_PACK_SKIP_IMAGES 0x806B +#define GL_PACK_IMAGE_HEIGHT 0x806C +#define GL_UNPACK_SKIP_IMAGES 0x806D +#define GL_UNPACK_IMAGE_HEIGHT 0x806E +#define GL_TEXTURE_3D 0x806F +#define GL_PROXY_TEXTURE_3D 0x8070 +#define GL_TEXTURE_DEPTH 0x8071 +#define GL_TEXTURE_WRAP_R 0x8072 +#define GL_MAX_3D_TEXTURE_SIZE 0x8073 +#define GL_UNSIGNED_BYTE_2_3_3_REV 0x8362 +#define GL_UNSIGNED_SHORT_5_6_5 0x8363 +#define GL_UNSIGNED_SHORT_5_6_5_REV 0x8364 +#define GL_UNSIGNED_SHORT_4_4_4_4_REV 0x8365 +#define GL_UNSIGNED_SHORT_1_5_5_5_REV 0x8366 +#define GL_UNSIGNED_INT_8_8_8_8_REV 0x8367 +#define GL_UNSIGNED_INT_2_10_10_10_REV 0x8368 +#define GL_BGR 0x80E0 +#define GL_BGRA 0x80E1 +#define GL_MAX_ELEMENTS_VERTICES 0x80E8 +#define GL_MAX_ELEMENTS_INDICES 0x80E9 +#define GL_CLAMP_TO_EDGE 0x812F +#define GL_TEXTURE_MIN_LOD 0x813A +#define GL_TEXTURE_MAX_LOD 0x813B +#define GL_TEXTURE_BASE_LEVEL 0x813C +#define GL_TEXTURE_MAX_LEVEL 0x813D +#define GL_SMOOTH_POINT_SIZE_RANGE 0x0B12 +#define GL_SMOOTH_POINT_SIZE_GRANULARITY 0x0B13 +#define GL_SMOOTH_LINE_WIDTH_RANGE 0x0B22 +#define GL_SMOOTH_LINE_WIDTH_GRANULARITY 0x0B23 +#define GL_ALIASED_LINE_WIDTH_RANGE 0x846E +typedef void (APIENTRYP PFNGLDRAWRANGEELEMENTSPROC) (GLenum mode, GLuint start, GLuint end, GLsizei count, GLenum type, const void *indices); +typedef void (APIENTRYP PFNGLTEXIMAGE3DPROC) (GLenum target, GLint level, GLint internalformat, GLsizei width, GLsizei height, GLsizei depth, GLint border, GLenum format, GLenum type, const void *pixels); +typedef void (APIENTRYP PFNGLTEXSUBIMAGE3DPROC) (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, const void *pixels); +typedef void (APIENTRYP PFNGLCOPYTEXSUBIMAGE3DPROC) (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLint x, GLint y, GLsizei width, GLsizei height); +#ifdef GL_GLEXT_PROTOTYPES +GLAPI void APIENTRY glDrawRangeElements (GLenum mode, GLuint start, GLuint end, GLsizei count, GLenum type, const void *indices); +GLAPI void APIENTRY glTexImage3D (GLenum target, GLint level, GLint internalformat, GLsizei width, GLsizei height, GLsizei depth, GLint border, GLenum format, GLenum type, const void *pixels); +GLAPI void APIENTRY glTexSubImage3D (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, const void *pixels); +GLAPI void APIENTRY glCopyTexSubImage3D (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLint x, GLint y, GLsizei width, GLsizei height); +#endif +#endif /* GL_VERSION_1_2 */ + +#ifndef GL_VERSION_1_3 +#define GL_VERSION_1_3 1 +#define GL_TEXTURE0 0x84C0 +#define GL_TEXTURE1 0x84C1 +#define GL_TEXTURE2 0x84C2 +#define GL_TEXTURE3 0x84C3 +#define GL_TEXTURE4 0x84C4 +#define GL_TEXTURE5 0x84C5 +#define GL_TEXTURE6 0x84C6 +#define GL_TEXTURE7 0x84C7 +#define GL_TEXTURE8 0x84C8 +#define GL_TEXTURE9 0x84C9 +#define GL_TEXTURE10 0x84CA +#define GL_TEXTURE11 0x84CB +#define GL_TEXTURE12 0x84CC +#define GL_TEXTURE13 0x84CD +#define GL_TEXTURE14 0x84CE +#define GL_TEXTURE15 0x84CF +#define GL_TEXTURE16 0x84D0 +#define GL_TEXTURE17 0x84D1 +#define GL_TEXTURE18 0x84D2 +#define GL_TEXTURE19 0x84D3 +#define GL_TEXTURE20 0x84D4 +#define GL_TEXTURE21 0x84D5 +#define GL_TEXTURE22 0x84D6 +#define GL_TEXTURE23 0x84D7 +#define GL_TEXTURE24 0x84D8 +#define GL_TEXTURE25 0x84D9 +#define GL_TEXTURE26 0x84DA +#define GL_TEXTURE27 0x84DB +#define GL_TEXTURE28 0x84DC +#define GL_TEXTURE29 0x84DD +#define GL_TEXTURE30 0x84DE +#define GL_TEXTURE31 0x84DF +#define GL_ACTIVE_TEXTURE 0x84E0 +#define GL_MULTISAMPLE 0x809D +#define GL_SAMPLE_ALPHA_TO_COVERAGE 0x809E +#define GL_SAMPLE_ALPHA_TO_ONE 0x809F +#define GL_SAMPLE_COVERAGE 0x80A0 +#define GL_SAMPLE_BUFFERS 0x80A8 +#define GL_SAMPLES 0x80A9 +#define GL_SAMPLE_COVERAGE_VALUE 0x80AA +#define GL_SAMPLE_COVERAGE_INVERT 0x80AB +#define GL_TEXTURE_CUBE_MAP 0x8513 +#define GL_TEXTURE_BINDING_CUBE_MAP 0x8514 +#define GL_TEXTURE_CUBE_MAP_POSITIVE_X 0x8515 +#define GL_TEXTURE_CUBE_MAP_NEGATIVE_X 0x8516 +#define GL_TEXTURE_CUBE_MAP_POSITIVE_Y 0x8517 +#define GL_TEXTURE_CUBE_MAP_NEGATIVE_Y 0x8518 +#define GL_TEXTURE_CUBE_MAP_POSITIVE_Z 0x8519 +#define GL_TEXTURE_CUBE_MAP_NEGATIVE_Z 0x851A +#define GL_PROXY_TEXTURE_CUBE_MAP 0x851B +#define GL_MAX_CUBE_MAP_TEXTURE_SIZE 0x851C +#define GL_COMPRESSED_RGB 0x84ED +#define GL_COMPRESSED_RGBA 0x84EE +#define GL_TEXTURE_COMPRESSION_HINT 0x84EF +#define GL_TEXTURE_COMPRESSED_IMAGE_SIZE 0x86A0 +#define GL_TEXTURE_COMPRESSED 0x86A1 +#define GL_NUM_COMPRESSED_TEXTURE_FORMATS 0x86A2 +#define GL_COMPRESSED_TEXTURE_FORMATS 0x86A3 +#define GL_CLAMP_TO_BORDER 0x812D +typedef void (APIENTRYP PFNGLACTIVETEXTUREPROC) (GLenum texture); +typedef void (APIENTRYP PFNGLSAMPLECOVERAGEPROC) (GLfloat value, GLboolean invert); +typedef void (APIENTRYP PFNGLCOMPRESSEDTEXIMAGE3DPROC) (GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth, GLint border, GLsizei imageSize, const void *data); +typedef void (APIENTRYP PFNGLCOMPRESSEDTEXIMAGE2DPROC) (GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLint border, GLsizei imageSize, const void *data); +typedef void (APIENTRYP PFNGLCOMPRESSEDTEXIMAGE1DPROC) (GLenum target, GLint level, GLenum internalformat, GLsizei width, GLint border, GLsizei imageSize, const void *data); +typedef void (APIENTRYP PFNGLCOMPRESSEDTEXSUBIMAGE3DPROC) (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLsizei imageSize, const void *data); +typedef void (APIENTRYP PFNGLCOMPRESSEDTEXSUBIMAGE2DPROC) (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLsizei imageSize, const void *data); +typedef void (APIENTRYP PFNGLCOMPRESSEDTEXSUBIMAGE1DPROC) (GLenum target, GLint level, GLint xoffset, GLsizei width, GLenum format, GLsizei imageSize, const void *data); +typedef void (APIENTRYP PFNGLGETCOMPRESSEDTEXIMAGEPROC) (GLenum target, GLint level, void *img); +#ifdef GL_GLEXT_PROTOTYPES +GLAPI void APIENTRY glActiveTexture (GLenum texture); +GLAPI void APIENTRY glSampleCoverage (GLfloat value, GLboolean invert); +GLAPI void APIENTRY glCompressedTexImage3D (GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth, GLint border, GLsizei imageSize, const void *data); +GLAPI void APIENTRY glCompressedTexImage2D (GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLint border, GLsizei imageSize, const void *data); +GLAPI void APIENTRY glCompressedTexImage1D (GLenum target, GLint level, GLenum internalformat, GLsizei width, GLint border, GLsizei imageSize, const void *data); +GLAPI void APIENTRY glCompressedTexSubImage3D (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLsizei imageSize, const void *data); +GLAPI void APIENTRY glCompressedTexSubImage2D (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLsizei imageSize, const void *data); +GLAPI void APIENTRY glCompressedTexSubImage1D (GLenum target, GLint level, GLint xoffset, GLsizei width, GLenum format, GLsizei imageSize, const void *data); +GLAPI void APIENTRY glGetCompressedTexImage (GLenum target, GLint level, void *img); +#endif +#endif /* GL_VERSION_1_3 */ + +#ifndef GL_VERSION_1_4 +#define GL_VERSION_1_4 1 +#define GL_BLEND_DST_RGB 0x80C8 +#define GL_BLEND_SRC_RGB 0x80C9 +#define GL_BLEND_DST_ALPHA 0x80CA +#define GL_BLEND_SRC_ALPHA 0x80CB +#define GL_POINT_FADE_THRESHOLD_SIZE 0x8128 +#define GL_DEPTH_COMPONENT16 0x81A5 +#define GL_DEPTH_COMPONENT24 0x81A6 +#define GL_DEPTH_COMPONENT32 0x81A7 +#define GL_MIRRORED_REPEAT 0x8370 +#define GL_MAX_TEXTURE_LOD_BIAS 0x84FD +#define GL_TEXTURE_LOD_BIAS 0x8501 +#define GL_INCR_WRAP 0x8507 +#define GL_DECR_WRAP 0x8508 +#define GL_TEXTURE_DEPTH_SIZE 0x884A +#define GL_TEXTURE_COMPARE_MODE 0x884C +#define GL_TEXTURE_COMPARE_FUNC 0x884D +#define GL_FUNC_ADD 0x8006 +#define GL_FUNC_SUBTRACT 0x800A +#define GL_FUNC_REVERSE_SUBTRACT 0x800B +#define GL_MIN 0x8007 +#define GL_MAX 0x8008 +#define GL_CONSTANT_COLOR 0x8001 +#define GL_ONE_MINUS_CONSTANT_COLOR 0x8002 +#define GL_CONSTANT_ALPHA 0x8003 +#define GL_ONE_MINUS_CONSTANT_ALPHA 0x8004 +typedef void (APIENTRYP PFNGLBLENDFUNCSEPARATEPROC) (GLenum sfactorRGB, GLenum dfactorRGB, GLenum sfactorAlpha, GLenum dfactorAlpha); +typedef void (APIENTRYP PFNGLMULTIDRAWARRAYSPROC) (GLenum mode, const GLint *first, const GLsizei *count, GLsizei drawcount); +typedef void (APIENTRYP PFNGLMULTIDRAWELEMENTSPROC) (GLenum mode, const GLsizei *count, GLenum type, const void *const*indices, GLsizei drawcount); +typedef void (APIENTRYP PFNGLPOINTPARAMETERFPROC) (GLenum pname, GLfloat param); +typedef void (APIENTRYP PFNGLPOINTPARAMETERFVPROC) (GLenum pname, const GLfloat *params); +typedef void (APIENTRYP PFNGLPOINTPARAMETERIPROC) (GLenum pname, GLint param); +typedef void (APIENTRYP PFNGLPOINTPARAMETERIVPROC) (GLenum pname, const GLint *params); +typedef void (APIENTRYP PFNGLBLENDCOLORPROC) (GLfloat red, GLfloat green, GLfloat blue, GLfloat alpha); +typedef void (APIENTRYP PFNGLBLENDEQUATIONPROC) (GLenum mode); +#ifdef GL_GLEXT_PROTOTYPES +GLAPI void APIENTRY glBlendFuncSeparate (GLenum sfactorRGB, GLenum dfactorRGB, GLenum sfactorAlpha, GLenum dfactorAlpha); +GLAPI void APIENTRY glMultiDrawArrays (GLenum mode, const GLint *first, const GLsizei *count, GLsizei drawcount); +GLAPI void APIENTRY glMultiDrawElements (GLenum mode, const GLsizei *count, GLenum type, const void *const*indices, GLsizei drawcount); +GLAPI void APIENTRY glPointParameterf (GLenum pname, GLfloat param); +GLAPI void APIENTRY glPointParameterfv (GLenum pname, const GLfloat *params); +GLAPI void APIENTRY glPointParameteri (GLenum pname, GLint param); +GLAPI void APIENTRY glPointParameteriv (GLenum pname, const GLint *params); +GLAPI void APIENTRY glBlendColor (GLfloat red, GLfloat green, GLfloat blue, GLfloat alpha); +GLAPI void APIENTRY glBlendEquation (GLenum mode); +#endif +#endif /* GL_VERSION_1_4 */ + +#ifndef GL_VERSION_1_5 +#define GL_VERSION_1_5 1 +#include +typedef ptrdiff_t GLsizeiptr; +typedef ptrdiff_t GLintptr; +#define GL_BUFFER_SIZE 0x8764 +#define GL_BUFFER_USAGE 0x8765 +#define GL_QUERY_COUNTER_BITS 0x8864 +#define GL_CURRENT_QUERY 0x8865 +#define GL_QUERY_RESULT 0x8866 +#define GL_QUERY_RESULT_AVAILABLE 0x8867 +#define GL_ARRAY_BUFFER 0x8892 +#define GL_ELEMENT_ARRAY_BUFFER 0x8893 +#define GL_ARRAY_BUFFER_BINDING 0x8894 +#define GL_ELEMENT_ARRAY_BUFFER_BINDING 0x8895 +#define GL_VERTEX_ATTRIB_ARRAY_BUFFER_BINDING 0x889F +#define GL_READ_ONLY 0x88B8 +#define GL_WRITE_ONLY 0x88B9 +#define GL_READ_WRITE 0x88BA +#define GL_BUFFER_ACCESS 0x88BB +#define GL_BUFFER_MAPPED 0x88BC +#define GL_BUFFER_MAP_POINTER 0x88BD +#define GL_STREAM_DRAW 0x88E0 +#define GL_STREAM_READ 0x88E1 +#define GL_STREAM_COPY 0x88E2 +#define GL_STATIC_DRAW 0x88E4 +#define GL_STATIC_READ 0x88E5 +#define GL_STATIC_COPY 0x88E6 +#define GL_DYNAMIC_DRAW 0x88E8 +#define GL_DYNAMIC_READ 0x88E9 +#define GL_DYNAMIC_COPY 0x88EA +#define GL_SAMPLES_PASSED 0x8914 +#define GL_SRC1_ALPHA 0x8589 +typedef void (APIENTRYP PFNGLGENQUERIESPROC) (GLsizei n, GLuint *ids); +typedef void (APIENTRYP PFNGLDELETEQUERIESPROC) (GLsizei n, const GLuint *ids); +typedef GLboolean (APIENTRYP PFNGLISQUERYPROC) (GLuint id); +typedef void (APIENTRYP PFNGLBEGINQUERYPROC) (GLenum target, GLuint id); +typedef void (APIENTRYP PFNGLENDQUERYPROC) (GLenum target); +typedef void (APIENTRYP PFNGLGETQUERYIVPROC) (GLenum target, GLenum pname, GLint *params); +typedef void (APIENTRYP PFNGLGETQUERYOBJECTIVPROC) (GLuint id, GLenum pname, GLint *params); +typedef void (APIENTRYP PFNGLGETQUERYOBJECTUIVPROC) (GLuint id, GLenum pname, GLuint *params); +typedef void (APIENTRYP PFNGLBINDBUFFERPROC) (GLenum target, GLuint buffer); +typedef void (APIENTRYP PFNGLDELETEBUFFERSPROC) (GLsizei n, const GLuint *buffers); +typedef void (APIENTRYP PFNGLGENBUFFERSPROC) (GLsizei n, GLuint *buffers); +typedef GLboolean (APIENTRYP PFNGLISBUFFERPROC) (GLuint buffer); +typedef void (APIENTRYP PFNGLBUFFERDATAPROC) (GLenum target, GLsizeiptr size, const void *data, GLenum usage); +typedef void (APIENTRYP PFNGLBUFFERSUBDATAPROC) (GLenum target, GLintptr offset, GLsizeiptr size, const void *data); +typedef void (APIENTRYP PFNGLGETBUFFERSUBDATAPROC) (GLenum target, GLintptr offset, GLsizeiptr size, void *data); +typedef void *(APIENTRYP PFNGLMAPBUFFERPROC) (GLenum target, GLenum access); +typedef GLboolean (APIENTRYP PFNGLUNMAPBUFFERPROC) (GLenum target); +typedef void (APIENTRYP PFNGLGETBUFFERPARAMETERIVPROC) (GLenum target, GLenum pname, GLint *params); +typedef void (APIENTRYP PFNGLGETBUFFERPOINTERVPROC) (GLenum target, GLenum pname, void **params); +#ifdef GL_GLEXT_PROTOTYPES +GLAPI void APIENTRY glGenQueries (GLsizei n, GLuint *ids); +GLAPI void APIENTRY glDeleteQueries (GLsizei n, const GLuint *ids); +GLAPI GLboolean APIENTRY glIsQuery (GLuint id); +GLAPI void APIENTRY glBeginQuery (GLenum target, GLuint id); +GLAPI void APIENTRY glEndQuery (GLenum target); +GLAPI void APIENTRY glGetQueryiv (GLenum target, GLenum pname, GLint *params); +GLAPI void APIENTRY glGetQueryObjectiv (GLuint id, GLenum pname, GLint *params); +GLAPI void APIENTRY glGetQueryObjectuiv (GLuint id, GLenum pname, GLuint *params); +GLAPI void APIENTRY glBindBuffer (GLenum target, GLuint buffer); +GLAPI void APIENTRY glDeleteBuffers (GLsizei n, const GLuint *buffers); +GLAPI void APIENTRY glGenBuffers (GLsizei n, GLuint *buffers); +GLAPI GLboolean APIENTRY glIsBuffer (GLuint buffer); +GLAPI void APIENTRY glBufferData (GLenum target, GLsizeiptr size, const void *data, GLenum usage); +GLAPI void APIENTRY glBufferSubData (GLenum target, GLintptr offset, GLsizeiptr size, const void *data); +GLAPI void APIENTRY glGetBufferSubData (GLenum target, GLintptr offset, GLsizeiptr size, void *data); +GLAPI void *APIENTRY glMapBuffer (GLenum target, GLenum access); +GLAPI GLboolean APIENTRY glUnmapBuffer (GLenum target); +GLAPI void APIENTRY glGetBufferParameteriv (GLenum target, GLenum pname, GLint *params); +GLAPI void APIENTRY glGetBufferPointerv (GLenum target, GLenum pname, void **params); +#endif +#endif /* GL_VERSION_1_5 */ + +#ifndef GL_VERSION_2_0 +#define GL_VERSION_2_0 1 +typedef char GLchar; +typedef short GLshort; +typedef signed char GLbyte; +typedef unsigned short GLushort; +#define GL_BLEND_EQUATION_RGB 0x8009 +#define GL_VERTEX_ATTRIB_ARRAY_ENABLED 0x8622 +#define GL_VERTEX_ATTRIB_ARRAY_SIZE 0x8623 +#define GL_VERTEX_ATTRIB_ARRAY_STRIDE 0x8624 +#define GL_VERTEX_ATTRIB_ARRAY_TYPE 0x8625 +#define GL_CURRENT_VERTEX_ATTRIB 0x8626 +#define GL_VERTEX_PROGRAM_POINT_SIZE 0x8642 +#define GL_VERTEX_ATTRIB_ARRAY_POINTER 0x8645 +#define GL_STENCIL_BACK_FUNC 0x8800 +#define GL_STENCIL_BACK_FAIL 0x8801 +#define GL_STENCIL_BACK_PASS_DEPTH_FAIL 0x8802 +#define GL_STENCIL_BACK_PASS_DEPTH_PASS 0x8803 +#define GL_MAX_DRAW_BUFFERS 0x8824 +#define GL_DRAW_BUFFER0 0x8825 +#define GL_DRAW_BUFFER1 0x8826 +#define GL_DRAW_BUFFER2 0x8827 +#define GL_DRAW_BUFFER3 0x8828 +#define GL_DRAW_BUFFER4 0x8829 +#define GL_DRAW_BUFFER5 0x882A +#define GL_DRAW_BUFFER6 0x882B +#define GL_DRAW_BUFFER7 0x882C +#define GL_DRAW_BUFFER8 0x882D +#define GL_DRAW_BUFFER9 0x882E +#define GL_DRAW_BUFFER10 0x882F +#define GL_DRAW_BUFFER11 0x8830 +#define GL_DRAW_BUFFER12 0x8831 +#define GL_DRAW_BUFFER13 0x8832 +#define GL_DRAW_BUFFER14 0x8833 +#define GL_DRAW_BUFFER15 0x8834 +#define GL_BLEND_EQUATION_ALPHA 0x883D +#define GL_MAX_VERTEX_ATTRIBS 0x8869 +#define GL_VERTEX_ATTRIB_ARRAY_NORMALIZED 0x886A +#define GL_MAX_TEXTURE_IMAGE_UNITS 0x8872 +#define GL_FRAGMENT_SHADER 0x8B30 +#define GL_VERTEX_SHADER 0x8B31 +#define GL_MAX_FRAGMENT_UNIFORM_COMPONENTS 0x8B49 +#define GL_MAX_VERTEX_UNIFORM_COMPONENTS 0x8B4A +#define GL_MAX_VARYING_FLOATS 0x8B4B +#define GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS 0x8B4C +#define GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS 0x8B4D +#define GL_SHADER_TYPE 0x8B4F +#define GL_FLOAT_VEC2 0x8B50 +#define GL_FLOAT_VEC3 0x8B51 +#define GL_FLOAT_VEC4 0x8B52 +#define GL_INT_VEC2 0x8B53 +#define GL_INT_VEC3 0x8B54 +#define GL_INT_VEC4 0x8B55 +#define GL_BOOL 0x8B56 +#define GL_BOOL_VEC2 0x8B57 +#define GL_BOOL_VEC3 0x8B58 +#define GL_BOOL_VEC4 0x8B59 +#define GL_FLOAT_MAT2 0x8B5A +#define GL_FLOAT_MAT3 0x8B5B +#define GL_FLOAT_MAT4 0x8B5C +#define GL_SAMPLER_1D 0x8B5D +#define GL_SAMPLER_2D 0x8B5E +#define GL_SAMPLER_3D 0x8B5F +#define GL_SAMPLER_CUBE 0x8B60 +#define GL_SAMPLER_1D_SHADOW 0x8B61 +#define GL_SAMPLER_2D_SHADOW 0x8B62 +#define GL_DELETE_STATUS 0x8B80 +#define GL_COMPILE_STATUS 0x8B81 +#define GL_LINK_STATUS 0x8B82 +#define GL_VALIDATE_STATUS 0x8B83 +#define GL_INFO_LOG_LENGTH 0x8B84 +#define GL_ATTACHED_SHADERS 0x8B85 +#define GL_ACTIVE_UNIFORMS 0x8B86 +#define GL_ACTIVE_UNIFORM_MAX_LENGTH 0x8B87 +#define GL_SHADER_SOURCE_LENGTH 0x8B88 +#define GL_ACTIVE_ATTRIBUTES 0x8B89 +#define GL_ACTIVE_ATTRIBUTE_MAX_LENGTH 0x8B8A +#define GL_FRAGMENT_SHADER_DERIVATIVE_HINT 0x8B8B +#define GL_SHADING_LANGUAGE_VERSION 0x8B8C +#define GL_CURRENT_PROGRAM 0x8B8D +#define GL_POINT_SPRITE_COORD_ORIGIN 0x8CA0 +#define GL_LOWER_LEFT 0x8CA1 +#define GL_UPPER_LEFT 0x8CA2 +#define GL_STENCIL_BACK_REF 0x8CA3 +#define GL_STENCIL_BACK_VALUE_MASK 0x8CA4 +#define GL_STENCIL_BACK_WRITEMASK 0x8CA5 +typedef void (APIENTRYP PFNGLBLENDEQUATIONSEPARATEPROC) (GLenum modeRGB, GLenum modeAlpha); +typedef void (APIENTRYP PFNGLDRAWBUFFERSPROC) (GLsizei n, const GLenum *bufs); +typedef void (APIENTRYP PFNGLSTENCILOPSEPARATEPROC) (GLenum face, GLenum sfail, GLenum dpfail, GLenum dppass); +typedef void (APIENTRYP PFNGLSTENCILFUNCSEPARATEPROC) (GLenum face, GLenum func, GLint ref, GLuint mask); +typedef void (APIENTRYP PFNGLSTENCILMASKSEPARATEPROC) (GLenum face, GLuint mask); +typedef void (APIENTRYP PFNGLATTACHSHADERPROC) (GLuint program, GLuint shader); +typedef void (APIENTRYP PFNGLBINDATTRIBLOCATIONPROC) (GLuint program, GLuint index, const GLchar *name); +typedef void (APIENTRYP PFNGLCOMPILESHADERPROC) (GLuint shader); +typedef GLuint (APIENTRYP PFNGLCREATEPROGRAMPROC) (void); +typedef GLuint (APIENTRYP PFNGLCREATESHADERPROC) (GLenum type); +typedef void (APIENTRYP PFNGLDELETEPROGRAMPROC) (GLuint program); +typedef void (APIENTRYP PFNGLDELETESHADERPROC) (GLuint shader); +typedef void (APIENTRYP PFNGLDETACHSHADERPROC) (GLuint program, GLuint shader); +typedef void (APIENTRYP PFNGLDISABLEVERTEXATTRIBARRAYPROC) (GLuint index); +typedef void (APIENTRYP PFNGLENABLEVERTEXATTRIBARRAYPROC) (GLuint index); +typedef void (APIENTRYP PFNGLGETACTIVEATTRIBPROC) (GLuint program, GLuint index, GLsizei bufSize, GLsizei *length, GLint *size, GLenum *type, GLchar *name); +typedef void (APIENTRYP PFNGLGETACTIVEUNIFORMPROC) (GLuint program, GLuint index, GLsizei bufSize, GLsizei *length, GLint *size, GLenum *type, GLchar *name); +typedef void (APIENTRYP PFNGLGETATTACHEDSHADERSPROC) (GLuint program, GLsizei maxCount, GLsizei *count, GLuint *shaders); +typedef GLint (APIENTRYP PFNGLGETATTRIBLOCATIONPROC) (GLuint program, const GLchar *name); +typedef void (APIENTRYP PFNGLGETPROGRAMIVPROC) (GLuint program, GLenum pname, GLint *params); +typedef void (APIENTRYP PFNGLGETPROGRAMINFOLOGPROC) (GLuint program, GLsizei bufSize, GLsizei *length, GLchar *infoLog); +typedef void (APIENTRYP PFNGLGETSHADERIVPROC) (GLuint shader, GLenum pname, GLint *params); +typedef void (APIENTRYP PFNGLGETSHADERINFOLOGPROC) (GLuint shader, GLsizei bufSize, GLsizei *length, GLchar *infoLog); +typedef void (APIENTRYP PFNGLGETSHADERSOURCEPROC) (GLuint shader, GLsizei bufSize, GLsizei *length, GLchar *source); +typedef GLint (APIENTRYP PFNGLGETUNIFORMLOCATIONPROC) (GLuint program, const GLchar *name); +typedef void (APIENTRYP PFNGLGETUNIFORMFVPROC) (GLuint program, GLint location, GLfloat *params); +typedef void (APIENTRYP PFNGLGETUNIFORMIVPROC) (GLuint program, GLint location, GLint *params); +typedef void (APIENTRYP PFNGLGETVERTEXATTRIBDVPROC) (GLuint index, GLenum pname, GLdouble *params); +typedef void (APIENTRYP PFNGLGETVERTEXATTRIBFVPROC) (GLuint index, GLenum pname, GLfloat *params); +typedef void (APIENTRYP PFNGLGETVERTEXATTRIBIVPROC) (GLuint index, GLenum pname, GLint *params); +typedef void (APIENTRYP PFNGLGETVERTEXATTRIBPOINTERVPROC) (GLuint index, GLenum pname, void **pointer); +typedef GLboolean (APIENTRYP PFNGLISPROGRAMPROC) (GLuint program); +typedef GLboolean (APIENTRYP PFNGLISSHADERPROC) (GLuint shader); +typedef void (APIENTRYP PFNGLLINKPROGRAMPROC) (GLuint program); +typedef void (APIENTRYP PFNGLSHADERSOURCEPROC) (GLuint shader, GLsizei count, const GLchar *const*string, const GLint *length); +typedef void (APIENTRYP PFNGLUSEPROGRAMPROC) (GLuint program); +typedef void (APIENTRYP PFNGLUNIFORM1FPROC) (GLint location, GLfloat v0); +typedef void (APIENTRYP PFNGLUNIFORM2FPROC) (GLint location, GLfloat v0, GLfloat v1); +typedef void (APIENTRYP PFNGLUNIFORM3FPROC) (GLint location, GLfloat v0, GLfloat v1, GLfloat v2); +typedef void (APIENTRYP PFNGLUNIFORM4FPROC) (GLint location, GLfloat v0, GLfloat v1, GLfloat v2, GLfloat v3); +typedef void (APIENTRYP PFNGLUNIFORM1IPROC) (GLint location, GLint v0); +typedef void (APIENTRYP PFNGLUNIFORM2IPROC) (GLint location, GLint v0, GLint v1); +typedef void (APIENTRYP PFNGLUNIFORM3IPROC) (GLint location, GLint v0, GLint v1, GLint v2); +typedef void (APIENTRYP PFNGLUNIFORM4IPROC) (GLint location, GLint v0, GLint v1, GLint v2, GLint v3); +typedef void (APIENTRYP PFNGLUNIFORM1FVPROC) (GLint location, GLsizei count, const GLfloat *value); +typedef void (APIENTRYP PFNGLUNIFORM2FVPROC) (GLint location, GLsizei count, const GLfloat *value); +typedef void (APIENTRYP PFNGLUNIFORM3FVPROC) (GLint location, GLsizei count, const GLfloat *value); +typedef void (APIENTRYP PFNGLUNIFORM4FVPROC) (GLint location, GLsizei count, const GLfloat *value); +typedef void (APIENTRYP PFNGLUNIFORM1IVPROC) (GLint location, GLsizei count, const GLint *value); +typedef void (APIENTRYP PFNGLUNIFORM2IVPROC) (GLint location, GLsizei count, const GLint *value); +typedef void (APIENTRYP PFNGLUNIFORM3IVPROC) (GLint location, GLsizei count, const GLint *value); +typedef void (APIENTRYP PFNGLUNIFORM4IVPROC) (GLint location, GLsizei count, const GLint *value); +typedef void (APIENTRYP PFNGLUNIFORMMATRIX2FVPROC) (GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +typedef void (APIENTRYP PFNGLUNIFORMMATRIX3FVPROC) (GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +typedef void (APIENTRYP PFNGLUNIFORMMATRIX4FVPROC) (GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +typedef void (APIENTRYP PFNGLVALIDATEPROGRAMPROC) (GLuint program); +typedef void (APIENTRYP PFNGLVERTEXATTRIB1DPROC) (GLuint index, GLdouble x); +typedef void (APIENTRYP PFNGLVERTEXATTRIB1DVPROC) (GLuint index, const GLdouble *v); +typedef void (APIENTRYP PFNGLVERTEXATTRIB1FPROC) (GLuint index, GLfloat x); +typedef void (APIENTRYP PFNGLVERTEXATTRIB1FVPROC) (GLuint index, const GLfloat *v); +typedef void (APIENTRYP PFNGLVERTEXATTRIB1SPROC) (GLuint index, GLshort x); +typedef void (APIENTRYP PFNGLVERTEXATTRIB1SVPROC) (GLuint index, const GLshort *v); +typedef void (APIENTRYP PFNGLVERTEXATTRIB2DPROC) (GLuint index, GLdouble x, GLdouble y); +typedef void (APIENTRYP PFNGLVERTEXATTRIB2DVPROC) (GLuint index, const GLdouble *v); +typedef void (APIENTRYP PFNGLVERTEXATTRIB2FPROC) (GLuint index, GLfloat x, GLfloat y); +typedef void (APIENTRYP PFNGLVERTEXATTRIB2FVPROC) (GLuint index, const GLfloat *v); +typedef void (APIENTRYP PFNGLVERTEXATTRIB2SPROC) (GLuint index, GLshort x, GLshort y); +typedef void (APIENTRYP PFNGLVERTEXATTRIB2SVPROC) (GLuint index, const GLshort *v); +typedef void (APIENTRYP PFNGLVERTEXATTRIB3DPROC) (GLuint index, GLdouble x, GLdouble y, GLdouble z); +typedef void (APIENTRYP PFNGLVERTEXATTRIB3DVPROC) (GLuint index, const GLdouble *v); +typedef void (APIENTRYP PFNGLVERTEXATTRIB3FPROC) (GLuint index, GLfloat x, GLfloat y, GLfloat z); +typedef void (APIENTRYP PFNGLVERTEXATTRIB3FVPROC) (GLuint index, const GLfloat *v); +typedef void (APIENTRYP PFNGLVERTEXATTRIB3SPROC) (GLuint index, GLshort x, GLshort y, GLshort z); +typedef void (APIENTRYP PFNGLVERTEXATTRIB3SVPROC) (GLuint index, const GLshort *v); +typedef void (APIENTRYP PFNGLVERTEXATTRIB4NBVPROC) (GLuint index, const GLbyte *v); +typedef void (APIENTRYP PFNGLVERTEXATTRIB4NIVPROC) (GLuint index, const GLint *v); +typedef void (APIENTRYP PFNGLVERTEXATTRIB4NSVPROC) (GLuint index, const GLshort *v); +typedef void (APIENTRYP PFNGLVERTEXATTRIB4NUBPROC) (GLuint index, GLubyte x, GLubyte y, GLubyte z, GLubyte w); +typedef void (APIENTRYP PFNGLVERTEXATTRIB4NUBVPROC) (GLuint index, const GLubyte *v); +typedef void (APIENTRYP PFNGLVERTEXATTRIB4NUIVPROC) (GLuint index, const GLuint *v); +typedef void (APIENTRYP PFNGLVERTEXATTRIB4NUSVPROC) (GLuint index, const GLushort *v); +typedef void (APIENTRYP PFNGLVERTEXATTRIB4BVPROC) (GLuint index, const GLbyte *v); +typedef void (APIENTRYP PFNGLVERTEXATTRIB4DPROC) (GLuint index, GLdouble x, GLdouble y, GLdouble z, GLdouble w); +typedef void (APIENTRYP PFNGLVERTEXATTRIB4DVPROC) (GLuint index, const GLdouble *v); +typedef void (APIENTRYP PFNGLVERTEXATTRIB4FPROC) (GLuint index, GLfloat x, GLfloat y, GLfloat z, GLfloat w); +typedef void (APIENTRYP PFNGLVERTEXATTRIB4FVPROC) (GLuint index, const GLfloat *v); +typedef void (APIENTRYP PFNGLVERTEXATTRIB4IVPROC) (GLuint index, const GLint *v); +typedef void (APIENTRYP PFNGLVERTEXATTRIB4SPROC) (GLuint index, GLshort x, GLshort y, GLshort z, GLshort w); +typedef void (APIENTRYP PFNGLVERTEXATTRIB4SVPROC) (GLuint index, const GLshort *v); +typedef void (APIENTRYP PFNGLVERTEXATTRIB4UBVPROC) (GLuint index, const GLubyte *v); +typedef void (APIENTRYP PFNGLVERTEXATTRIB4UIVPROC) (GLuint index, const GLuint *v); +typedef void (APIENTRYP PFNGLVERTEXATTRIB4USVPROC) (GLuint index, const GLushort *v); +typedef void (APIENTRYP PFNGLVERTEXATTRIBPOINTERPROC) (GLuint index, GLint size, GLenum type, GLboolean normalized, GLsizei stride, const void *pointer); +#ifdef GL_GLEXT_PROTOTYPES +GLAPI void APIENTRY glBlendEquationSeparate (GLenum modeRGB, GLenum modeAlpha); +GLAPI void APIENTRY glDrawBuffers (GLsizei n, const GLenum *bufs); +GLAPI void APIENTRY glStencilOpSeparate (GLenum face, GLenum sfail, GLenum dpfail, GLenum dppass); +GLAPI void APIENTRY glStencilFuncSeparate (GLenum face, GLenum func, GLint ref, GLuint mask); +GLAPI void APIENTRY glStencilMaskSeparate (GLenum face, GLuint mask); +GLAPI void APIENTRY glAttachShader (GLuint program, GLuint shader); +GLAPI void APIENTRY glBindAttribLocation (GLuint program, GLuint index, const GLchar *name); +GLAPI void APIENTRY glCompileShader (GLuint shader); +GLAPI GLuint APIENTRY glCreateProgram (void); +GLAPI GLuint APIENTRY glCreateShader (GLenum type); +GLAPI void APIENTRY glDeleteProgram (GLuint program); +GLAPI void APIENTRY glDeleteShader (GLuint shader); +GLAPI void APIENTRY glDetachShader (GLuint program, GLuint shader); +GLAPI void APIENTRY glDisableVertexAttribArray (GLuint index); +GLAPI void APIENTRY glEnableVertexAttribArray (GLuint index); +GLAPI void APIENTRY glGetActiveAttrib (GLuint program, GLuint index, GLsizei bufSize, GLsizei *length, GLint *size, GLenum *type, GLchar *name); +GLAPI void APIENTRY glGetActiveUniform (GLuint program, GLuint index, GLsizei bufSize, GLsizei *length, GLint *size, GLenum *type, GLchar *name); +GLAPI void APIENTRY glGetAttachedShaders (GLuint program, GLsizei maxCount, GLsizei *count, GLuint *shaders); +GLAPI GLint APIENTRY glGetAttribLocation (GLuint program, const GLchar *name); +GLAPI void APIENTRY glGetProgramiv (GLuint program, GLenum pname, GLint *params); +GLAPI void APIENTRY glGetProgramInfoLog (GLuint program, GLsizei bufSize, GLsizei *length, GLchar *infoLog); +GLAPI void APIENTRY glGetShaderiv (GLuint shader, GLenum pname, GLint *params); +GLAPI void APIENTRY glGetShaderInfoLog (GLuint shader, GLsizei bufSize, GLsizei *length, GLchar *infoLog); +GLAPI void APIENTRY glGetShaderSource (GLuint shader, GLsizei bufSize, GLsizei *length, GLchar *source); +GLAPI GLint APIENTRY glGetUniformLocation (GLuint program, const GLchar *name); +GLAPI void APIENTRY glGetUniformfv (GLuint program, GLint location, GLfloat *params); +GLAPI void APIENTRY glGetUniformiv (GLuint program, GLint location, GLint *params); +GLAPI void APIENTRY glGetVertexAttribdv (GLuint index, GLenum pname, GLdouble *params); +GLAPI void APIENTRY glGetVertexAttribfv (GLuint index, GLenum pname, GLfloat *params); +GLAPI void APIENTRY glGetVertexAttribiv (GLuint index, GLenum pname, GLint *params); +GLAPI void APIENTRY glGetVertexAttribPointerv (GLuint index, GLenum pname, void **pointer); +GLAPI GLboolean APIENTRY glIsProgram (GLuint program); +GLAPI GLboolean APIENTRY glIsShader (GLuint shader); +GLAPI void APIENTRY glLinkProgram (GLuint program); +GLAPI void APIENTRY glShaderSource (GLuint shader, GLsizei count, const GLchar *const*string, const GLint *length); +GLAPI void APIENTRY glUseProgram (GLuint program); +GLAPI void APIENTRY glUniform1f (GLint location, GLfloat v0); +GLAPI void APIENTRY glUniform2f (GLint location, GLfloat v0, GLfloat v1); +GLAPI void APIENTRY glUniform3f (GLint location, GLfloat v0, GLfloat v1, GLfloat v2); +GLAPI void APIENTRY glUniform4f (GLint location, GLfloat v0, GLfloat v1, GLfloat v2, GLfloat v3); +GLAPI void APIENTRY glUniform1i (GLint location, GLint v0); +GLAPI void APIENTRY glUniform2i (GLint location, GLint v0, GLint v1); +GLAPI void APIENTRY glUniform3i (GLint location, GLint v0, GLint v1, GLint v2); +GLAPI void APIENTRY glUniform4i (GLint location, GLint v0, GLint v1, GLint v2, GLint v3); +GLAPI void APIENTRY glUniform1fv (GLint location, GLsizei count, const GLfloat *value); +GLAPI void APIENTRY glUniform2fv (GLint location, GLsizei count, const GLfloat *value); +GLAPI void APIENTRY glUniform3fv (GLint location, GLsizei count, const GLfloat *value); +GLAPI void APIENTRY glUniform4fv (GLint location, GLsizei count, const GLfloat *value); +GLAPI void APIENTRY glUniform1iv (GLint location, GLsizei count, const GLint *value); +GLAPI void APIENTRY glUniform2iv (GLint location, GLsizei count, const GLint *value); +GLAPI void APIENTRY glUniform3iv (GLint location, GLsizei count, const GLint *value); +GLAPI void APIENTRY glUniform4iv (GLint location, GLsizei count, const GLint *value); +GLAPI void APIENTRY glUniformMatrix2fv (GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +GLAPI void APIENTRY glUniformMatrix3fv (GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +GLAPI void APIENTRY glUniformMatrix4fv (GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +GLAPI void APIENTRY glValidateProgram (GLuint program); +GLAPI void APIENTRY glVertexAttrib1d (GLuint index, GLdouble x); +GLAPI void APIENTRY glVertexAttrib1dv (GLuint index, const GLdouble *v); +GLAPI void APIENTRY glVertexAttrib1f (GLuint index, GLfloat x); +GLAPI void APIENTRY glVertexAttrib1fv (GLuint index, const GLfloat *v); +GLAPI void APIENTRY glVertexAttrib1s (GLuint index, GLshort x); +GLAPI void APIENTRY glVertexAttrib1sv (GLuint index, const GLshort *v); +GLAPI void APIENTRY glVertexAttrib2d (GLuint index, GLdouble x, GLdouble y); +GLAPI void APIENTRY glVertexAttrib2dv (GLuint index, const GLdouble *v); +GLAPI void APIENTRY glVertexAttrib2f (GLuint index, GLfloat x, GLfloat y); +GLAPI void APIENTRY glVertexAttrib2fv (GLuint index, const GLfloat *v); +GLAPI void APIENTRY glVertexAttrib2s (GLuint index, GLshort x, GLshort y); +GLAPI void APIENTRY glVertexAttrib2sv (GLuint index, const GLshort *v); +GLAPI void APIENTRY glVertexAttrib3d (GLuint index, GLdouble x, GLdouble y, GLdouble z); +GLAPI void APIENTRY glVertexAttrib3dv (GLuint index, const GLdouble *v); +GLAPI void APIENTRY glVertexAttrib3f (GLuint index, GLfloat x, GLfloat y, GLfloat z); +GLAPI void APIENTRY glVertexAttrib3fv (GLuint index, const GLfloat *v); +GLAPI void APIENTRY glVertexAttrib3s (GLuint index, GLshort x, GLshort y, GLshort z); +GLAPI void APIENTRY glVertexAttrib3sv (GLuint index, const GLshort *v); +GLAPI void APIENTRY glVertexAttrib4Nbv (GLuint index, const GLbyte *v); +GLAPI void APIENTRY glVertexAttrib4Niv (GLuint index, const GLint *v); +GLAPI void APIENTRY glVertexAttrib4Nsv (GLuint index, const GLshort *v); +GLAPI void APIENTRY glVertexAttrib4Nub (GLuint index, GLubyte x, GLubyte y, GLubyte z, GLubyte w); +GLAPI void APIENTRY glVertexAttrib4Nubv (GLuint index, const GLubyte *v); +GLAPI void APIENTRY glVertexAttrib4Nuiv (GLuint index, const GLuint *v); +GLAPI void APIENTRY glVertexAttrib4Nusv (GLuint index, const GLushort *v); +GLAPI void APIENTRY glVertexAttrib4bv (GLuint index, const GLbyte *v); +GLAPI void APIENTRY glVertexAttrib4d (GLuint index, GLdouble x, GLdouble y, GLdouble z, GLdouble w); +GLAPI void APIENTRY glVertexAttrib4dv (GLuint index, const GLdouble *v); +GLAPI void APIENTRY glVertexAttrib4f (GLuint index, GLfloat x, GLfloat y, GLfloat z, GLfloat w); +GLAPI void APIENTRY glVertexAttrib4fv (GLuint index, const GLfloat *v); +GLAPI void APIENTRY glVertexAttrib4iv (GLuint index, const GLint *v); +GLAPI void APIENTRY glVertexAttrib4s (GLuint index, GLshort x, GLshort y, GLshort z, GLshort w); +GLAPI void APIENTRY glVertexAttrib4sv (GLuint index, const GLshort *v); +GLAPI void APIENTRY glVertexAttrib4ubv (GLuint index, const GLubyte *v); +GLAPI void APIENTRY glVertexAttrib4uiv (GLuint index, const GLuint *v); +GLAPI void APIENTRY glVertexAttrib4usv (GLuint index, const GLushort *v); +GLAPI void APIENTRY glVertexAttribPointer (GLuint index, GLint size, GLenum type, GLboolean normalized, GLsizei stride, const void *pointer); +#endif +#endif /* GL_VERSION_2_0 */ + +#ifndef GL_VERSION_2_1 +#define GL_VERSION_2_1 1 +#define GL_PIXEL_PACK_BUFFER 0x88EB +#define GL_PIXEL_UNPACK_BUFFER 0x88EC +#define GL_PIXEL_PACK_BUFFER_BINDING 0x88ED +#define GL_PIXEL_UNPACK_BUFFER_BINDING 0x88EF +#define GL_FLOAT_MAT2x3 0x8B65 +#define GL_FLOAT_MAT2x4 0x8B66 +#define GL_FLOAT_MAT3x2 0x8B67 +#define GL_FLOAT_MAT3x4 0x8B68 +#define GL_FLOAT_MAT4x2 0x8B69 +#define GL_FLOAT_MAT4x3 0x8B6A +#define GL_SRGB 0x8C40 +#define GL_SRGB8 0x8C41 +#define GL_SRGB_ALPHA 0x8C42 +#define GL_SRGB8_ALPHA8 0x8C43 +#define GL_COMPRESSED_SRGB 0x8C48 +#define GL_COMPRESSED_SRGB_ALPHA 0x8C49 +typedef void (APIENTRYP PFNGLUNIFORMMATRIX2X3FVPROC) (GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +typedef void (APIENTRYP PFNGLUNIFORMMATRIX3X2FVPROC) (GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +typedef void (APIENTRYP PFNGLUNIFORMMATRIX2X4FVPROC) (GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +typedef void (APIENTRYP PFNGLUNIFORMMATRIX4X2FVPROC) (GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +typedef void (APIENTRYP PFNGLUNIFORMMATRIX3X4FVPROC) (GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +typedef void (APIENTRYP PFNGLUNIFORMMATRIX4X3FVPROC) (GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +#ifdef GL_GLEXT_PROTOTYPES +GLAPI void APIENTRY glUniformMatrix2x3fv (GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +GLAPI void APIENTRY glUniformMatrix3x2fv (GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +GLAPI void APIENTRY glUniformMatrix2x4fv (GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +GLAPI void APIENTRY glUniformMatrix4x2fv (GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +GLAPI void APIENTRY glUniformMatrix3x4fv (GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +GLAPI void APIENTRY glUniformMatrix4x3fv (GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +#endif +#endif /* GL_VERSION_2_1 */ + +#ifndef GL_VERSION_3_0 +#define GL_VERSION_3_0 1 +typedef unsigned short GLhalf; +#define GL_COMPARE_REF_TO_TEXTURE 0x884E +#define GL_CLIP_DISTANCE0 0x3000 +#define GL_CLIP_DISTANCE1 0x3001 +#define GL_CLIP_DISTANCE2 0x3002 +#define GL_CLIP_DISTANCE3 0x3003 +#define GL_CLIP_DISTANCE4 0x3004 +#define GL_CLIP_DISTANCE5 0x3005 +#define GL_CLIP_DISTANCE6 0x3006 +#define GL_CLIP_DISTANCE7 0x3007 +#define GL_MAX_CLIP_DISTANCES 0x0D32 +#define GL_MAJOR_VERSION 0x821B +#define GL_MINOR_VERSION 0x821C +#define GL_NUM_EXTENSIONS 0x821D +#define GL_CONTEXT_FLAGS 0x821E +#define GL_COMPRESSED_RED 0x8225 +#define GL_COMPRESSED_RG 0x8226 +#define GL_CONTEXT_FLAG_FORWARD_COMPATIBLE_BIT 0x00000001 +#define GL_RGBA32F 0x8814 +#define GL_RGB32F 0x8815 +#define GL_RGBA16F 0x881A +#define GL_RGB16F 0x881B +#define GL_VERTEX_ATTRIB_ARRAY_INTEGER 0x88FD +#define GL_MAX_ARRAY_TEXTURE_LAYERS 0x88FF +#define GL_MIN_PROGRAM_TEXEL_OFFSET 0x8904 +#define GL_MAX_PROGRAM_TEXEL_OFFSET 0x8905 +#define GL_CLAMP_READ_COLOR 0x891C +#define GL_FIXED_ONLY 0x891D +#define GL_MAX_VARYING_COMPONENTS 0x8B4B +#define GL_TEXTURE_1D_ARRAY 0x8C18 +#define GL_PROXY_TEXTURE_1D_ARRAY 0x8C19 +#define GL_TEXTURE_2D_ARRAY 0x8C1A +#define GL_PROXY_TEXTURE_2D_ARRAY 0x8C1B +#define GL_TEXTURE_BINDING_1D_ARRAY 0x8C1C +#define GL_TEXTURE_BINDING_2D_ARRAY 0x8C1D +#define GL_R11F_G11F_B10F 0x8C3A +#define GL_UNSIGNED_INT_10F_11F_11F_REV 0x8C3B +#define GL_RGB9_E5 0x8C3D +#define GL_UNSIGNED_INT_5_9_9_9_REV 0x8C3E +#define GL_TEXTURE_SHARED_SIZE 0x8C3F +#define GL_TRANSFORM_FEEDBACK_VARYING_MAX_LENGTH 0x8C76 +#define GL_TRANSFORM_FEEDBACK_BUFFER_MODE 0x8C7F +#define GL_MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS 0x8C80 +#define GL_TRANSFORM_FEEDBACK_VARYINGS 0x8C83 +#define GL_TRANSFORM_FEEDBACK_BUFFER_START 0x8C84 +#define GL_TRANSFORM_FEEDBACK_BUFFER_SIZE 0x8C85 +#define GL_PRIMITIVES_GENERATED 0x8C87 +#define GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN 0x8C88 +#define GL_RASTERIZER_DISCARD 0x8C89 +#define GL_MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS 0x8C8A +#define GL_MAX_TRANSFORM_FEEDBACK_SEPARATE_ATTRIBS 0x8C8B +#define GL_INTERLEAVED_ATTRIBS 0x8C8C +#define GL_SEPARATE_ATTRIBS 0x8C8D +#define GL_TRANSFORM_FEEDBACK_BUFFER 0x8C8E +#define GL_TRANSFORM_FEEDBACK_BUFFER_BINDING 0x8C8F +#define GL_RGBA32UI 0x8D70 +#define GL_RGB32UI 0x8D71 +#define GL_RGBA16UI 0x8D76 +#define GL_RGB16UI 0x8D77 +#define GL_RGBA8UI 0x8D7C +#define GL_RGB8UI 0x8D7D +#define GL_RGBA32I 0x8D82 +#define GL_RGB32I 0x8D83 +#define GL_RGBA16I 0x8D88 +#define GL_RGB16I 0x8D89 +#define GL_RGBA8I 0x8D8E +#define GL_RGB8I 0x8D8F +#define GL_RED_INTEGER 0x8D94 +#define GL_GREEN_INTEGER 0x8D95 +#define GL_BLUE_INTEGER 0x8D96 +#define GL_RGB_INTEGER 0x8D98 +#define GL_RGBA_INTEGER 0x8D99 +#define GL_BGR_INTEGER 0x8D9A +#define GL_BGRA_INTEGER 0x8D9B +#define GL_SAMPLER_1D_ARRAY 0x8DC0 +#define GL_SAMPLER_2D_ARRAY 0x8DC1 +#define GL_SAMPLER_1D_ARRAY_SHADOW 0x8DC3 +#define GL_SAMPLER_2D_ARRAY_SHADOW 0x8DC4 +#define GL_SAMPLER_CUBE_SHADOW 0x8DC5 +#define GL_UNSIGNED_INT_VEC2 0x8DC6 +#define GL_UNSIGNED_INT_VEC3 0x8DC7 +#define GL_UNSIGNED_INT_VEC4 0x8DC8 +#define GL_INT_SAMPLER_1D 0x8DC9 +#define GL_INT_SAMPLER_2D 0x8DCA +#define GL_INT_SAMPLER_3D 0x8DCB +#define GL_INT_SAMPLER_CUBE 0x8DCC +#define GL_INT_SAMPLER_1D_ARRAY 0x8DCE +#define GL_INT_SAMPLER_2D_ARRAY 0x8DCF +#define GL_UNSIGNED_INT_SAMPLER_1D 0x8DD1 +#define GL_UNSIGNED_INT_SAMPLER_2D 0x8DD2 +#define GL_UNSIGNED_INT_SAMPLER_3D 0x8DD3 +#define GL_UNSIGNED_INT_SAMPLER_CUBE 0x8DD4 +#define GL_UNSIGNED_INT_SAMPLER_1D_ARRAY 0x8DD6 +#define GL_UNSIGNED_INT_SAMPLER_2D_ARRAY 0x8DD7 +#define GL_QUERY_WAIT 0x8E13 +#define GL_QUERY_NO_WAIT 0x8E14 +#define GL_QUERY_BY_REGION_WAIT 0x8E15 +#define GL_QUERY_BY_REGION_NO_WAIT 0x8E16 +#define GL_BUFFER_ACCESS_FLAGS 0x911F +#define GL_BUFFER_MAP_LENGTH 0x9120 +#define GL_BUFFER_MAP_OFFSET 0x9121 +#define GL_DEPTH_COMPONENT32F 0x8CAC +#define GL_DEPTH32F_STENCIL8 0x8CAD +#define GL_FLOAT_32_UNSIGNED_INT_24_8_REV 0x8DAD +#define GL_INVALID_FRAMEBUFFER_OPERATION 0x0506 +#define GL_FRAMEBUFFER_ATTACHMENT_COLOR_ENCODING 0x8210 +#define GL_FRAMEBUFFER_ATTACHMENT_COMPONENT_TYPE 0x8211 +#define GL_FRAMEBUFFER_ATTACHMENT_RED_SIZE 0x8212 +#define GL_FRAMEBUFFER_ATTACHMENT_GREEN_SIZE 0x8213 +#define GL_FRAMEBUFFER_ATTACHMENT_BLUE_SIZE 0x8214 +#define GL_FRAMEBUFFER_ATTACHMENT_ALPHA_SIZE 0x8215 +#define GL_FRAMEBUFFER_ATTACHMENT_DEPTH_SIZE 0x8216 +#define GL_FRAMEBUFFER_ATTACHMENT_STENCIL_SIZE 0x8217 +#define GL_FRAMEBUFFER_DEFAULT 0x8218 +#define GL_FRAMEBUFFER_UNDEFINED 0x8219 +#define GL_DEPTH_STENCIL_ATTACHMENT 0x821A +#define GL_MAX_RENDERBUFFER_SIZE 0x84E8 +#define GL_DEPTH_STENCIL 0x84F9 +#define GL_UNSIGNED_INT_24_8 0x84FA +#define GL_DEPTH24_STENCIL8 0x88F0 +#define GL_TEXTURE_STENCIL_SIZE 0x88F1 +#define GL_TEXTURE_RED_TYPE 0x8C10 +#define GL_TEXTURE_GREEN_TYPE 0x8C11 +#define GL_TEXTURE_BLUE_TYPE 0x8C12 +#define GL_TEXTURE_ALPHA_TYPE 0x8C13 +#define GL_TEXTURE_DEPTH_TYPE 0x8C16 +#define GL_UNSIGNED_NORMALIZED 0x8C17 +#define GL_FRAMEBUFFER_BINDING 0x8CA6 +#define GL_DRAW_FRAMEBUFFER_BINDING 0x8CA6 +#define GL_RENDERBUFFER_BINDING 0x8CA7 +#define GL_READ_FRAMEBUFFER 0x8CA8 +#define GL_DRAW_FRAMEBUFFER 0x8CA9 +#define GL_READ_FRAMEBUFFER_BINDING 0x8CAA +#define GL_RENDERBUFFER_SAMPLES 0x8CAB +#define GL_FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE 0x8CD0 +#define GL_FRAMEBUFFER_ATTACHMENT_OBJECT_NAME 0x8CD1 +#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_LEVEL 0x8CD2 +#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_CUBE_MAP_FACE 0x8CD3 +#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_LAYER 0x8CD4 +#define GL_FRAMEBUFFER_COMPLETE 0x8CD5 +#define GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT 0x8CD6 +#define GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT 0x8CD7 +#define GL_FRAMEBUFFER_INCOMPLETE_DRAW_BUFFER 0x8CDB +#define GL_FRAMEBUFFER_INCOMPLETE_READ_BUFFER 0x8CDC +#define GL_FRAMEBUFFER_UNSUPPORTED 0x8CDD +#define GL_MAX_COLOR_ATTACHMENTS 0x8CDF +#define GL_COLOR_ATTACHMENT0 0x8CE0 +#define GL_COLOR_ATTACHMENT1 0x8CE1 +#define GL_COLOR_ATTACHMENT2 0x8CE2 +#define GL_COLOR_ATTACHMENT3 0x8CE3 +#define GL_COLOR_ATTACHMENT4 0x8CE4 +#define GL_COLOR_ATTACHMENT5 0x8CE5 +#define GL_COLOR_ATTACHMENT6 0x8CE6 +#define GL_COLOR_ATTACHMENT7 0x8CE7 +#define GL_COLOR_ATTACHMENT8 0x8CE8 +#define GL_COLOR_ATTACHMENT9 0x8CE9 +#define GL_COLOR_ATTACHMENT10 0x8CEA +#define GL_COLOR_ATTACHMENT11 0x8CEB +#define GL_COLOR_ATTACHMENT12 0x8CEC +#define GL_COLOR_ATTACHMENT13 0x8CED +#define GL_COLOR_ATTACHMENT14 0x8CEE +#define GL_COLOR_ATTACHMENT15 0x8CEF +#define GL_DEPTH_ATTACHMENT 0x8D00 +#define GL_STENCIL_ATTACHMENT 0x8D20 +#define GL_FRAMEBUFFER 0x8D40 +#define GL_RENDERBUFFER 0x8D41 +#define GL_RENDERBUFFER_WIDTH 0x8D42 +#define GL_RENDERBUFFER_HEIGHT 0x8D43 +#define GL_RENDERBUFFER_INTERNAL_FORMAT 0x8D44 +#define GL_STENCIL_INDEX1 0x8D46 +#define GL_STENCIL_INDEX4 0x8D47 +#define GL_STENCIL_INDEX8 0x8D48 +#define GL_STENCIL_INDEX16 0x8D49 +#define GL_RENDERBUFFER_RED_SIZE 0x8D50 +#define GL_RENDERBUFFER_GREEN_SIZE 0x8D51 +#define GL_RENDERBUFFER_BLUE_SIZE 0x8D52 +#define GL_RENDERBUFFER_ALPHA_SIZE 0x8D53 +#define GL_RENDERBUFFER_DEPTH_SIZE 0x8D54 +#define GL_RENDERBUFFER_STENCIL_SIZE 0x8D55 +#define GL_FRAMEBUFFER_INCOMPLETE_MULTISAMPLE 0x8D56 +#define GL_MAX_SAMPLES 0x8D57 +#define GL_FRAMEBUFFER_SRGB 0x8DB9 +#define GL_HALF_FLOAT 0x140B +#define GL_MAP_READ_BIT 0x0001 +#define GL_MAP_WRITE_BIT 0x0002 +#define GL_MAP_INVALIDATE_RANGE_BIT 0x0004 +#define GL_MAP_INVALIDATE_BUFFER_BIT 0x0008 +#define GL_MAP_FLUSH_EXPLICIT_BIT 0x0010 +#define GL_MAP_UNSYNCHRONIZED_BIT 0x0020 +#define GL_COMPRESSED_RED_RGTC1 0x8DBB +#define GL_COMPRESSED_SIGNED_RED_RGTC1 0x8DBC +#define GL_COMPRESSED_RG_RGTC2 0x8DBD +#define GL_COMPRESSED_SIGNED_RG_RGTC2 0x8DBE +#define GL_RG 0x8227 +#define GL_RG_INTEGER 0x8228 +#define GL_R8 0x8229 +#define GL_R16 0x822A +#define GL_RG8 0x822B +#define GL_RG16 0x822C +#define GL_R16F 0x822D +#define GL_R32F 0x822E +#define GL_RG16F 0x822F +#define GL_RG32F 0x8230 +#define GL_R8I 0x8231 +#define GL_R8UI 0x8232 +#define GL_R16I 0x8233 +#define GL_R16UI 0x8234 +#define GL_R32I 0x8235 +#define GL_R32UI 0x8236 +#define GL_RG8I 0x8237 +#define GL_RG8UI 0x8238 +#define GL_RG16I 0x8239 +#define GL_RG16UI 0x823A +#define GL_RG32I 0x823B +#define GL_RG32UI 0x823C +#define GL_VERTEX_ARRAY_BINDING 0x85B5 +typedef void (APIENTRYP PFNGLCOLORMASKIPROC) (GLuint index, GLboolean r, GLboolean g, GLboolean b, GLboolean a); +typedef void (APIENTRYP PFNGLGETBOOLEANI_VPROC) (GLenum target, GLuint index, GLboolean *data); +typedef void (APIENTRYP PFNGLGETINTEGERI_VPROC) (GLenum target, GLuint index, GLint *data); +typedef void (APIENTRYP PFNGLENABLEIPROC) (GLenum target, GLuint index); +typedef void (APIENTRYP PFNGLDISABLEIPROC) (GLenum target, GLuint index); +typedef GLboolean (APIENTRYP PFNGLISENABLEDIPROC) (GLenum target, GLuint index); +typedef void (APIENTRYP PFNGLBEGINTRANSFORMFEEDBACKPROC) (GLenum primitiveMode); +typedef void (APIENTRYP PFNGLENDTRANSFORMFEEDBACKPROC) (void); +typedef void (APIENTRYP PFNGLBINDBUFFERRANGEPROC) (GLenum target, GLuint index, GLuint buffer, GLintptr offset, GLsizeiptr size); +typedef void (APIENTRYP PFNGLBINDBUFFERBASEPROC) (GLenum target, GLuint index, GLuint buffer); +typedef void (APIENTRYP PFNGLTRANSFORMFEEDBACKVARYINGSPROC) (GLuint program, GLsizei count, const GLchar *const*varyings, GLenum bufferMode); +typedef void (APIENTRYP PFNGLGETTRANSFORMFEEDBACKVARYINGPROC) (GLuint program, GLuint index, GLsizei bufSize, GLsizei *length, GLsizei *size, GLenum *type, GLchar *name); +typedef void (APIENTRYP PFNGLCLAMPCOLORPROC) (GLenum target, GLenum clamp); +typedef void (APIENTRYP PFNGLBEGINCONDITIONALRENDERPROC) (GLuint id, GLenum mode); +typedef void (APIENTRYP PFNGLENDCONDITIONALRENDERPROC) (void); +typedef void (APIENTRYP PFNGLVERTEXATTRIBIPOINTERPROC) (GLuint index, GLint size, GLenum type, GLsizei stride, const void *pointer); +typedef void (APIENTRYP PFNGLGETVERTEXATTRIBIIVPROC) (GLuint index, GLenum pname, GLint *params); +typedef void (APIENTRYP PFNGLGETVERTEXATTRIBIUIVPROC) (GLuint index, GLenum pname, GLuint *params); +typedef void (APIENTRYP PFNGLVERTEXATTRIBI1IPROC) (GLuint index, GLint x); +typedef void (APIENTRYP PFNGLVERTEXATTRIBI2IPROC) (GLuint index, GLint x, GLint y); +typedef void (APIENTRYP PFNGLVERTEXATTRIBI3IPROC) (GLuint index, GLint x, GLint y, GLint z); +typedef void (APIENTRYP PFNGLVERTEXATTRIBI4IPROC) (GLuint index, GLint x, GLint y, GLint z, GLint w); +typedef void (APIENTRYP PFNGLVERTEXATTRIBI1UIPROC) (GLuint index, GLuint x); +typedef void (APIENTRYP PFNGLVERTEXATTRIBI2UIPROC) (GLuint index, GLuint x, GLuint y); +typedef void (APIENTRYP PFNGLVERTEXATTRIBI3UIPROC) (GLuint index, GLuint x, GLuint y, GLuint z); +typedef void (APIENTRYP PFNGLVERTEXATTRIBI4UIPROC) (GLuint index, GLuint x, GLuint y, GLuint z, GLuint w); +typedef void (APIENTRYP PFNGLVERTEXATTRIBI1IVPROC) (GLuint index, const GLint *v); +typedef void (APIENTRYP PFNGLVERTEXATTRIBI2IVPROC) (GLuint index, const GLint *v); +typedef void (APIENTRYP PFNGLVERTEXATTRIBI3IVPROC) (GLuint index, const GLint *v); +typedef void (APIENTRYP PFNGLVERTEXATTRIBI4IVPROC) (GLuint index, const GLint *v); +typedef void (APIENTRYP PFNGLVERTEXATTRIBI1UIVPROC) (GLuint index, const GLuint *v); +typedef void (APIENTRYP PFNGLVERTEXATTRIBI2UIVPROC) (GLuint index, const GLuint *v); +typedef void (APIENTRYP PFNGLVERTEXATTRIBI3UIVPROC) (GLuint index, const GLuint *v); +typedef void (APIENTRYP PFNGLVERTEXATTRIBI4UIVPROC) (GLuint index, const GLuint *v); +typedef void (APIENTRYP PFNGLVERTEXATTRIBI4BVPROC) (GLuint index, const GLbyte *v); +typedef void (APIENTRYP PFNGLVERTEXATTRIBI4SVPROC) (GLuint index, const GLshort *v); +typedef void (APIENTRYP PFNGLVERTEXATTRIBI4UBVPROC) (GLuint index, const GLubyte *v); +typedef void (APIENTRYP PFNGLVERTEXATTRIBI4USVPROC) (GLuint index, const GLushort *v); +typedef void (APIENTRYP PFNGLGETUNIFORMUIVPROC) (GLuint program, GLint location, GLuint *params); +typedef void (APIENTRYP PFNGLBINDFRAGDATALOCATIONPROC) (GLuint program, GLuint color, const GLchar *name); +typedef GLint (APIENTRYP PFNGLGETFRAGDATALOCATIONPROC) (GLuint program, const GLchar *name); +typedef void (APIENTRYP PFNGLUNIFORM1UIPROC) (GLint location, GLuint v0); +typedef void (APIENTRYP PFNGLUNIFORM2UIPROC) (GLint location, GLuint v0, GLuint v1); +typedef void (APIENTRYP PFNGLUNIFORM3UIPROC) (GLint location, GLuint v0, GLuint v1, GLuint v2); +typedef void (APIENTRYP PFNGLUNIFORM4UIPROC) (GLint location, GLuint v0, GLuint v1, GLuint v2, GLuint v3); +typedef void (APIENTRYP PFNGLUNIFORM1UIVPROC) (GLint location, GLsizei count, const GLuint *value); +typedef void (APIENTRYP PFNGLUNIFORM2UIVPROC) (GLint location, GLsizei count, const GLuint *value); +typedef void (APIENTRYP PFNGLUNIFORM3UIVPROC) (GLint location, GLsizei count, const GLuint *value); +typedef void (APIENTRYP PFNGLUNIFORM4UIVPROC) (GLint location, GLsizei count, const GLuint *value); +typedef void (APIENTRYP PFNGLTEXPARAMETERIIVPROC) (GLenum target, GLenum pname, const GLint *params); +typedef void (APIENTRYP PFNGLTEXPARAMETERIUIVPROC) (GLenum target, GLenum pname, const GLuint *params); +typedef void (APIENTRYP PFNGLGETTEXPARAMETERIIVPROC) (GLenum target, GLenum pname, GLint *params); +typedef void (APIENTRYP PFNGLGETTEXPARAMETERIUIVPROC) (GLenum target, GLenum pname, GLuint *params); +typedef void (APIENTRYP PFNGLCLEARBUFFERIVPROC) (GLenum buffer, GLint drawbuffer, const GLint *value); +typedef void (APIENTRYP PFNGLCLEARBUFFERUIVPROC) (GLenum buffer, GLint drawbuffer, const GLuint *value); +typedef void (APIENTRYP PFNGLCLEARBUFFERFVPROC) (GLenum buffer, GLint drawbuffer, const GLfloat *value); +typedef void (APIENTRYP PFNGLCLEARBUFFERFIPROC) (GLenum buffer, GLint drawbuffer, GLfloat depth, GLint stencil); +typedef const GLubyte *(APIENTRYP PFNGLGETSTRINGIPROC) (GLenum name, GLuint index); +typedef GLboolean (APIENTRYP PFNGLISRENDERBUFFERPROC) (GLuint renderbuffer); +typedef void (APIENTRYP PFNGLBINDRENDERBUFFERPROC) (GLenum target, GLuint renderbuffer); +typedef void (APIENTRYP PFNGLDELETERENDERBUFFERSPROC) (GLsizei n, const GLuint *renderbuffers); +typedef void (APIENTRYP PFNGLGENRENDERBUFFERSPROC) (GLsizei n, GLuint *renderbuffers); +typedef void (APIENTRYP PFNGLRENDERBUFFERSTORAGEPROC) (GLenum target, GLenum internalformat, GLsizei width, GLsizei height); +typedef void (APIENTRYP PFNGLGETRENDERBUFFERPARAMETERIVPROC) (GLenum target, GLenum pname, GLint *params); +typedef GLboolean (APIENTRYP PFNGLISFRAMEBUFFERPROC) (GLuint framebuffer); +typedef void (APIENTRYP PFNGLBINDFRAMEBUFFERPROC) (GLenum target, GLuint framebuffer); +typedef void (APIENTRYP PFNGLDELETEFRAMEBUFFERSPROC) (GLsizei n, const GLuint *framebuffers); +typedef void (APIENTRYP PFNGLGENFRAMEBUFFERSPROC) (GLsizei n, GLuint *framebuffers); +typedef GLenum (APIENTRYP PFNGLCHECKFRAMEBUFFERSTATUSPROC) (GLenum target); +typedef void (APIENTRYP PFNGLFRAMEBUFFERTEXTURE1DPROC) (GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level); +typedef void (APIENTRYP PFNGLFRAMEBUFFERTEXTURE2DPROC) (GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level); +typedef void (APIENTRYP PFNGLFRAMEBUFFERTEXTURE3DPROC) (GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level, GLint zoffset); +typedef void (APIENTRYP PFNGLFRAMEBUFFERRENDERBUFFERPROC) (GLenum target, GLenum attachment, GLenum renderbuffertarget, GLuint renderbuffer); +typedef void (APIENTRYP PFNGLGETFRAMEBUFFERATTACHMENTPARAMETERIVPROC) (GLenum target, GLenum attachment, GLenum pname, GLint *params); +typedef void (APIENTRYP PFNGLGENERATEMIPMAPPROC) (GLenum target); +typedef void (APIENTRYP PFNGLBLITFRAMEBUFFERPROC) (GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, GLbitfield mask, GLenum filter); +typedef void (APIENTRYP PFNGLRENDERBUFFERSTORAGEMULTISAMPLEPROC) (GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height); +typedef void (APIENTRYP PFNGLFRAMEBUFFERTEXTURELAYERPROC) (GLenum target, GLenum attachment, GLuint texture, GLint level, GLint layer); +typedef void *(APIENTRYP PFNGLMAPBUFFERRANGEPROC) (GLenum target, GLintptr offset, GLsizeiptr length, GLbitfield access); +typedef void (APIENTRYP PFNGLFLUSHMAPPEDBUFFERRANGEPROC) (GLenum target, GLintptr offset, GLsizeiptr length); +typedef void (APIENTRYP PFNGLBINDVERTEXARRAYPROC) (GLuint array); +typedef void (APIENTRYP PFNGLDELETEVERTEXARRAYSPROC) (GLsizei n, const GLuint *arrays); +typedef void (APIENTRYP PFNGLGENVERTEXARRAYSPROC) (GLsizei n, GLuint *arrays); +typedef GLboolean (APIENTRYP PFNGLISVERTEXARRAYPROC) (GLuint array); +#ifdef GL_GLEXT_PROTOTYPES +GLAPI void APIENTRY glColorMaski (GLuint index, GLboolean r, GLboolean g, GLboolean b, GLboolean a); +GLAPI void APIENTRY glGetBooleani_v (GLenum target, GLuint index, GLboolean *data); +GLAPI void APIENTRY glGetIntegeri_v (GLenum target, GLuint index, GLint *data); +GLAPI void APIENTRY glEnablei (GLenum target, GLuint index); +GLAPI void APIENTRY glDisablei (GLenum target, GLuint index); +GLAPI GLboolean APIENTRY glIsEnabledi (GLenum target, GLuint index); +GLAPI void APIENTRY glBeginTransformFeedback (GLenum primitiveMode); +GLAPI void APIENTRY glEndTransformFeedback (void); +GLAPI void APIENTRY glBindBufferRange (GLenum target, GLuint index, GLuint buffer, GLintptr offset, GLsizeiptr size); +GLAPI void APIENTRY glBindBufferBase (GLenum target, GLuint index, GLuint buffer); +GLAPI void APIENTRY glTransformFeedbackVaryings (GLuint program, GLsizei count, const GLchar *const*varyings, GLenum bufferMode); +GLAPI void APIENTRY glGetTransformFeedbackVarying (GLuint program, GLuint index, GLsizei bufSize, GLsizei *length, GLsizei *size, GLenum *type, GLchar *name); +GLAPI void APIENTRY glClampColor (GLenum target, GLenum clamp); +GLAPI void APIENTRY glBeginConditionalRender (GLuint id, GLenum mode); +GLAPI void APIENTRY glEndConditionalRender (void); +GLAPI void APIENTRY glVertexAttribIPointer (GLuint index, GLint size, GLenum type, GLsizei stride, const void *pointer); +GLAPI void APIENTRY glGetVertexAttribIiv (GLuint index, GLenum pname, GLint *params); +GLAPI void APIENTRY glGetVertexAttribIuiv (GLuint index, GLenum pname, GLuint *params); +GLAPI void APIENTRY glVertexAttribI1i (GLuint index, GLint x); +GLAPI void APIENTRY glVertexAttribI2i (GLuint index, GLint x, GLint y); +GLAPI void APIENTRY glVertexAttribI3i (GLuint index, GLint x, GLint y, GLint z); +GLAPI void APIENTRY glVertexAttribI4i (GLuint index, GLint x, GLint y, GLint z, GLint w); +GLAPI void APIENTRY glVertexAttribI1ui (GLuint index, GLuint x); +GLAPI void APIENTRY glVertexAttribI2ui (GLuint index, GLuint x, GLuint y); +GLAPI void APIENTRY glVertexAttribI3ui (GLuint index, GLuint x, GLuint y, GLuint z); +GLAPI void APIENTRY glVertexAttribI4ui (GLuint index, GLuint x, GLuint y, GLuint z, GLuint w); +GLAPI void APIENTRY glVertexAttribI1iv (GLuint index, const GLint *v); +GLAPI void APIENTRY glVertexAttribI2iv (GLuint index, const GLint *v); +GLAPI void APIENTRY glVertexAttribI3iv (GLuint index, const GLint *v); +GLAPI void APIENTRY glVertexAttribI4iv (GLuint index, const GLint *v); +GLAPI void APIENTRY glVertexAttribI1uiv (GLuint index, const GLuint *v); +GLAPI void APIENTRY glVertexAttribI2uiv (GLuint index, const GLuint *v); +GLAPI void APIENTRY glVertexAttribI3uiv (GLuint index, const GLuint *v); +GLAPI void APIENTRY glVertexAttribI4uiv (GLuint index, const GLuint *v); +GLAPI void APIENTRY glVertexAttribI4bv (GLuint index, const GLbyte *v); +GLAPI void APIENTRY glVertexAttribI4sv (GLuint index, const GLshort *v); +GLAPI void APIENTRY glVertexAttribI4ubv (GLuint index, const GLubyte *v); +GLAPI void APIENTRY glVertexAttribI4usv (GLuint index, const GLushort *v); +GLAPI void APIENTRY glGetUniformuiv (GLuint program, GLint location, GLuint *params); +GLAPI void APIENTRY glBindFragDataLocation (GLuint program, GLuint color, const GLchar *name); +GLAPI GLint APIENTRY glGetFragDataLocation (GLuint program, const GLchar *name); +GLAPI void APIENTRY glUniform1ui (GLint location, GLuint v0); +GLAPI void APIENTRY glUniform2ui (GLint location, GLuint v0, GLuint v1); +GLAPI void APIENTRY glUniform3ui (GLint location, GLuint v0, GLuint v1, GLuint v2); +GLAPI void APIENTRY glUniform4ui (GLint location, GLuint v0, GLuint v1, GLuint v2, GLuint v3); +GLAPI void APIENTRY glUniform1uiv (GLint location, GLsizei count, const GLuint *value); +GLAPI void APIENTRY glUniform2uiv (GLint location, GLsizei count, const GLuint *value); +GLAPI void APIENTRY glUniform3uiv (GLint location, GLsizei count, const GLuint *value); +GLAPI void APIENTRY glUniform4uiv (GLint location, GLsizei count, const GLuint *value); +GLAPI void APIENTRY glTexParameterIiv (GLenum target, GLenum pname, const GLint *params); +GLAPI void APIENTRY glTexParameterIuiv (GLenum target, GLenum pname, const GLuint *params); +GLAPI void APIENTRY glGetTexParameterIiv (GLenum target, GLenum pname, GLint *params); +GLAPI void APIENTRY glGetTexParameterIuiv (GLenum target, GLenum pname, GLuint *params); +GLAPI void APIENTRY glClearBufferiv (GLenum buffer, GLint drawbuffer, const GLint *value); +GLAPI void APIENTRY glClearBufferuiv (GLenum buffer, GLint drawbuffer, const GLuint *value); +GLAPI void APIENTRY glClearBufferfv (GLenum buffer, GLint drawbuffer, const GLfloat *value); +GLAPI void APIENTRY glClearBufferfi (GLenum buffer, GLint drawbuffer, GLfloat depth, GLint stencil); +GLAPI const GLubyte *APIENTRY glGetStringi (GLenum name, GLuint index); +GLAPI GLboolean APIENTRY glIsRenderbuffer (GLuint renderbuffer); +GLAPI void APIENTRY glBindRenderbuffer (GLenum target, GLuint renderbuffer); +GLAPI void APIENTRY glDeleteRenderbuffers (GLsizei n, const GLuint *renderbuffers); +GLAPI void APIENTRY glGenRenderbuffers (GLsizei n, GLuint *renderbuffers); +GLAPI void APIENTRY glRenderbufferStorage (GLenum target, GLenum internalformat, GLsizei width, GLsizei height); +GLAPI void APIENTRY glGetRenderbufferParameteriv (GLenum target, GLenum pname, GLint *params); +GLAPI GLboolean APIENTRY glIsFramebuffer (GLuint framebuffer); +GLAPI void APIENTRY glBindFramebuffer (GLenum target, GLuint framebuffer); +GLAPI void APIENTRY glDeleteFramebuffers (GLsizei n, const GLuint *framebuffers); +GLAPI void APIENTRY glGenFramebuffers (GLsizei n, GLuint *framebuffers); +GLAPI GLenum APIENTRY glCheckFramebufferStatus (GLenum target); +GLAPI void APIENTRY glFramebufferTexture1D (GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level); +GLAPI void APIENTRY glFramebufferTexture2D (GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level); +GLAPI void APIENTRY glFramebufferTexture3D (GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level, GLint zoffset); +GLAPI void APIENTRY glFramebufferRenderbuffer (GLenum target, GLenum attachment, GLenum renderbuffertarget, GLuint renderbuffer); +GLAPI void APIENTRY glGetFramebufferAttachmentParameteriv (GLenum target, GLenum attachment, GLenum pname, GLint *params); +GLAPI void APIENTRY glGenerateMipmap (GLenum target); +GLAPI void APIENTRY glBlitFramebuffer (GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, GLbitfield mask, GLenum filter); +GLAPI void APIENTRY glRenderbufferStorageMultisample (GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height); +GLAPI void APIENTRY glFramebufferTextureLayer (GLenum target, GLenum attachment, GLuint texture, GLint level, GLint layer); +GLAPI void *APIENTRY glMapBufferRange (GLenum target, GLintptr offset, GLsizeiptr length, GLbitfield access); +GLAPI void APIENTRY glFlushMappedBufferRange (GLenum target, GLintptr offset, GLsizeiptr length); +GLAPI void APIENTRY glBindVertexArray (GLuint array); +GLAPI void APIENTRY glDeleteVertexArrays (GLsizei n, const GLuint *arrays); +GLAPI void APIENTRY glGenVertexArrays (GLsizei n, GLuint *arrays); +GLAPI GLboolean APIENTRY glIsVertexArray (GLuint array); +#endif +#endif /* GL_VERSION_3_0 */ + +#ifndef GL_VERSION_3_1 +#define GL_VERSION_3_1 1 +#define GL_SAMPLER_2D_RECT 0x8B63 +#define GL_SAMPLER_2D_RECT_SHADOW 0x8B64 +#define GL_SAMPLER_BUFFER 0x8DC2 +#define GL_INT_SAMPLER_2D_RECT 0x8DCD +#define GL_INT_SAMPLER_BUFFER 0x8DD0 +#define GL_UNSIGNED_INT_SAMPLER_2D_RECT 0x8DD5 +#define GL_UNSIGNED_INT_SAMPLER_BUFFER 0x8DD8 +#define GL_TEXTURE_BUFFER 0x8C2A +#define GL_MAX_TEXTURE_BUFFER_SIZE 0x8C2B +#define GL_TEXTURE_BINDING_BUFFER 0x8C2C +#define GL_TEXTURE_BUFFER_DATA_STORE_BINDING 0x8C2D +#define GL_TEXTURE_RECTANGLE 0x84F5 +#define GL_TEXTURE_BINDING_RECTANGLE 0x84F6 +#define GL_PROXY_TEXTURE_RECTANGLE 0x84F7 +#define GL_MAX_RECTANGLE_TEXTURE_SIZE 0x84F8 +#define GL_R8_SNORM 0x8F94 +#define GL_RG8_SNORM 0x8F95 +#define GL_RGB8_SNORM 0x8F96 +#define GL_RGBA8_SNORM 0x8F97 +#define GL_R16_SNORM 0x8F98 +#define GL_RG16_SNORM 0x8F99 +#define GL_RGB16_SNORM 0x8F9A +#define GL_RGBA16_SNORM 0x8F9B +#define GL_SIGNED_NORMALIZED 0x8F9C +#define GL_PRIMITIVE_RESTART 0x8F9D +#define GL_PRIMITIVE_RESTART_INDEX 0x8F9E +#define GL_COPY_READ_BUFFER 0x8F36 +#define GL_COPY_WRITE_BUFFER 0x8F37 +#define GL_UNIFORM_BUFFER 0x8A11 +#define GL_UNIFORM_BUFFER_BINDING 0x8A28 +#define GL_UNIFORM_BUFFER_START 0x8A29 +#define GL_UNIFORM_BUFFER_SIZE 0x8A2A +#define GL_MAX_VERTEX_UNIFORM_BLOCKS 0x8A2B +#define GL_MAX_GEOMETRY_UNIFORM_BLOCKS 0x8A2C +#define GL_MAX_FRAGMENT_UNIFORM_BLOCKS 0x8A2D +#define GL_MAX_COMBINED_UNIFORM_BLOCKS 0x8A2E +#define GL_MAX_UNIFORM_BUFFER_BINDINGS 0x8A2F +#define GL_MAX_UNIFORM_BLOCK_SIZE 0x8A30 +#define GL_MAX_COMBINED_VERTEX_UNIFORM_COMPONENTS 0x8A31 +#define GL_MAX_COMBINED_GEOMETRY_UNIFORM_COMPONENTS 0x8A32 +#define GL_MAX_COMBINED_FRAGMENT_UNIFORM_COMPONENTS 0x8A33 +#define GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT 0x8A34 +#define GL_ACTIVE_UNIFORM_BLOCK_MAX_NAME_LENGTH 0x8A35 +#define GL_ACTIVE_UNIFORM_BLOCKS 0x8A36 +#define GL_UNIFORM_TYPE 0x8A37 +#define GL_UNIFORM_SIZE 0x8A38 +#define GL_UNIFORM_NAME_LENGTH 0x8A39 +#define GL_UNIFORM_BLOCK_INDEX 0x8A3A +#define GL_UNIFORM_OFFSET 0x8A3B +#define GL_UNIFORM_ARRAY_STRIDE 0x8A3C +#define GL_UNIFORM_MATRIX_STRIDE 0x8A3D +#define GL_UNIFORM_IS_ROW_MAJOR 0x8A3E +#define GL_UNIFORM_BLOCK_BINDING 0x8A3F +#define GL_UNIFORM_BLOCK_DATA_SIZE 0x8A40 +#define GL_UNIFORM_BLOCK_NAME_LENGTH 0x8A41 +#define GL_UNIFORM_BLOCK_ACTIVE_UNIFORMS 0x8A42 +#define GL_UNIFORM_BLOCK_ACTIVE_UNIFORM_INDICES 0x8A43 +#define GL_UNIFORM_BLOCK_REFERENCED_BY_VERTEX_SHADER 0x8A44 +#define GL_UNIFORM_BLOCK_REFERENCED_BY_GEOMETRY_SHADER 0x8A45 +#define GL_UNIFORM_BLOCK_REFERENCED_BY_FRAGMENT_SHADER 0x8A46 +#define GL_INVALID_INDEX 0xFFFFFFFFu +typedef void (APIENTRYP PFNGLDRAWARRAYSINSTANCEDPROC) (GLenum mode, GLint first, GLsizei count, GLsizei instancecount); +typedef void (APIENTRYP PFNGLDRAWELEMENTSINSTANCEDPROC) (GLenum mode, GLsizei count, GLenum type, const void *indices, GLsizei instancecount); +typedef void (APIENTRYP PFNGLTEXBUFFERPROC) (GLenum target, GLenum internalformat, GLuint buffer); +typedef void (APIENTRYP PFNGLPRIMITIVERESTARTINDEXPROC) (GLuint index); +typedef void (APIENTRYP PFNGLCOPYBUFFERSUBDATAPROC) (GLenum readTarget, GLenum writeTarget, GLintptr readOffset, GLintptr writeOffset, GLsizeiptr size); +typedef void (APIENTRYP PFNGLGETUNIFORMINDICESPROC) (GLuint program, GLsizei uniformCount, const GLchar *const*uniformNames, GLuint *uniformIndices); +typedef void (APIENTRYP PFNGLGETACTIVEUNIFORMSIVPROC) (GLuint program, GLsizei uniformCount, const GLuint *uniformIndices, GLenum pname, GLint *params); +typedef void (APIENTRYP PFNGLGETACTIVEUNIFORMNAMEPROC) (GLuint program, GLuint uniformIndex, GLsizei bufSize, GLsizei *length, GLchar *uniformName); +typedef GLuint (APIENTRYP PFNGLGETUNIFORMBLOCKINDEXPROC) (GLuint program, const GLchar *uniformBlockName); +typedef void (APIENTRYP PFNGLGETACTIVEUNIFORMBLOCKIVPROC) (GLuint program, GLuint uniformBlockIndex, GLenum pname, GLint *params); +typedef void (APIENTRYP PFNGLGETACTIVEUNIFORMBLOCKNAMEPROC) (GLuint program, GLuint uniformBlockIndex, GLsizei bufSize, GLsizei *length, GLchar *uniformBlockName); +typedef void (APIENTRYP PFNGLUNIFORMBLOCKBINDINGPROC) (GLuint program, GLuint uniformBlockIndex, GLuint uniformBlockBinding); +#ifdef GL_GLEXT_PROTOTYPES +GLAPI void APIENTRY glDrawArraysInstanced (GLenum mode, GLint first, GLsizei count, GLsizei instancecount); +GLAPI void APIENTRY glDrawElementsInstanced (GLenum mode, GLsizei count, GLenum type, const void *indices, GLsizei instancecount); +GLAPI void APIENTRY glTexBuffer (GLenum target, GLenum internalformat, GLuint buffer); +GLAPI void APIENTRY glPrimitiveRestartIndex (GLuint index); +GLAPI void APIENTRY glCopyBufferSubData (GLenum readTarget, GLenum writeTarget, GLintptr readOffset, GLintptr writeOffset, GLsizeiptr size); +GLAPI void APIENTRY glGetUniformIndices (GLuint program, GLsizei uniformCount, const GLchar *const*uniformNames, GLuint *uniformIndices); +GLAPI void APIENTRY glGetActiveUniformsiv (GLuint program, GLsizei uniformCount, const GLuint *uniformIndices, GLenum pname, GLint *params); +GLAPI void APIENTRY glGetActiveUniformName (GLuint program, GLuint uniformIndex, GLsizei bufSize, GLsizei *length, GLchar *uniformName); +GLAPI GLuint APIENTRY glGetUniformBlockIndex (GLuint program, const GLchar *uniformBlockName); +GLAPI void APIENTRY glGetActiveUniformBlockiv (GLuint program, GLuint uniformBlockIndex, GLenum pname, GLint *params); +GLAPI void APIENTRY glGetActiveUniformBlockName (GLuint program, GLuint uniformBlockIndex, GLsizei bufSize, GLsizei *length, GLchar *uniformBlockName); +GLAPI void APIENTRY glUniformBlockBinding (GLuint program, GLuint uniformBlockIndex, GLuint uniformBlockBinding); +#endif +#endif /* GL_VERSION_3_1 */ + +#ifndef GL_VERSION_3_2 +#define GL_VERSION_3_2 1 +typedef struct __GLsync *GLsync; +#ifndef GLEXT_64_TYPES_DEFINED +/* This code block is duplicated in glxext.h, so must be protected */ +#define GLEXT_64_TYPES_DEFINED +/* Define int32_t, int64_t, and uint64_t types for UST/MSC */ +/* (as used in the GL_EXT_timer_query extension). */ +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L +#include +#elif defined(__sun__) || defined(__digital__) +#include +#if defined(__STDC__) +#if defined(__arch64__) || defined(_LP64) +typedef long int int64_t; +typedef unsigned long int uint64_t; +#else +typedef long long int int64_t; +typedef unsigned long long int uint64_t; +#endif /* __arch64__ */ +#endif /* __STDC__ */ +#elif defined( __VMS ) || defined(__sgi) +#include +#elif defined(__SCO__) || defined(__USLC__) +#include +#elif defined(__UNIXOS2__) || defined(__SOL64__) +typedef long int int32_t; +typedef long long int int64_t; +typedef unsigned long long int uint64_t; +#elif defined(_WIN32) && defined(__GNUC__) +#include +#elif defined(_WIN32) +typedef __int32 int32_t; +typedef __int64 int64_t; +typedef unsigned __int64 uint64_t; +#else +/* Fallback if nothing above works */ +#include +#endif +#endif +typedef uint64_t GLuint64; +typedef int64_t GLint64; +#define GL_CONTEXT_CORE_PROFILE_BIT 0x00000001 +#define GL_CONTEXT_COMPATIBILITY_PROFILE_BIT 0x00000002 +#define GL_LINES_ADJACENCY 0x000A +#define GL_LINE_STRIP_ADJACENCY 0x000B +#define GL_TRIANGLES_ADJACENCY 0x000C +#define GL_TRIANGLE_STRIP_ADJACENCY 0x000D +#define GL_PROGRAM_POINT_SIZE 0x8642 +#define GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS 0x8C29 +#define GL_FRAMEBUFFER_ATTACHMENT_LAYERED 0x8DA7 +#define GL_FRAMEBUFFER_INCOMPLETE_LAYER_TARGETS 0x8DA8 +#define GL_GEOMETRY_SHADER 0x8DD9 +#define GL_GEOMETRY_VERTICES_OUT 0x8916 +#define GL_GEOMETRY_INPUT_TYPE 0x8917 +#define GL_GEOMETRY_OUTPUT_TYPE 0x8918 +#define GL_MAX_GEOMETRY_UNIFORM_COMPONENTS 0x8DDF +#define GL_MAX_GEOMETRY_OUTPUT_VERTICES 0x8DE0 +#define GL_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS 0x8DE1 +#define GL_MAX_VERTEX_OUTPUT_COMPONENTS 0x9122 +#define GL_MAX_GEOMETRY_INPUT_COMPONENTS 0x9123 +#define GL_MAX_GEOMETRY_OUTPUT_COMPONENTS 0x9124 +#define GL_MAX_FRAGMENT_INPUT_COMPONENTS 0x9125 +#define GL_CONTEXT_PROFILE_MASK 0x9126 +#define GL_DEPTH_CLAMP 0x864F +#define GL_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION 0x8E4C +#define GL_FIRST_VERTEX_CONVENTION 0x8E4D +#define GL_LAST_VERTEX_CONVENTION 0x8E4E +#define GL_PROVOKING_VERTEX 0x8E4F +#define GL_TEXTURE_CUBE_MAP_SEAMLESS 0x884F +#define GL_MAX_SERVER_WAIT_TIMEOUT 0x9111 +#define GL_OBJECT_TYPE 0x9112 +#define GL_SYNC_CONDITION 0x9113 +#define GL_SYNC_STATUS 0x9114 +#define GL_SYNC_FLAGS 0x9115 +#define GL_SYNC_FENCE 0x9116 +#define GL_SYNC_GPU_COMMANDS_COMPLETE 0x9117 +#define GL_UNSIGNALED 0x9118 +#define GL_SIGNALED 0x9119 +#define GL_ALREADY_SIGNALED 0x911A +#define GL_TIMEOUT_EXPIRED 0x911B +#define GL_CONDITION_SATISFIED 0x911C +#define GL_WAIT_FAILED 0x911D +#define GL_TIMEOUT_IGNORED 0xFFFFFFFFFFFFFFFFull +#define GL_SYNC_FLUSH_COMMANDS_BIT 0x00000001 +#define GL_SAMPLE_POSITION 0x8E50 +#define GL_SAMPLE_MASK 0x8E51 +#define GL_SAMPLE_MASK_VALUE 0x8E52 +#define GL_MAX_SAMPLE_MASK_WORDS 0x8E59 +#define GL_TEXTURE_2D_MULTISAMPLE 0x9100 +#define GL_PROXY_TEXTURE_2D_MULTISAMPLE 0x9101 +#define GL_TEXTURE_2D_MULTISAMPLE_ARRAY 0x9102 +#define GL_PROXY_TEXTURE_2D_MULTISAMPLE_ARRAY 0x9103 +#define GL_TEXTURE_BINDING_2D_MULTISAMPLE 0x9104 +#define GL_TEXTURE_BINDING_2D_MULTISAMPLE_ARRAY 0x9105 +#define GL_TEXTURE_SAMPLES 0x9106 +#define GL_TEXTURE_FIXED_SAMPLE_LOCATIONS 0x9107 +#define GL_SAMPLER_2D_MULTISAMPLE 0x9108 +#define GL_INT_SAMPLER_2D_MULTISAMPLE 0x9109 +#define GL_UNSIGNED_INT_SAMPLER_2D_MULTISAMPLE 0x910A +#define GL_SAMPLER_2D_MULTISAMPLE_ARRAY 0x910B +#define GL_INT_SAMPLER_2D_MULTISAMPLE_ARRAY 0x910C +#define GL_UNSIGNED_INT_SAMPLER_2D_MULTISAMPLE_ARRAY 0x910D +#define GL_MAX_COLOR_TEXTURE_SAMPLES 0x910E +#define GL_MAX_DEPTH_TEXTURE_SAMPLES 0x910F +#define GL_MAX_INTEGER_SAMPLES 0x9110 +typedef void (APIENTRYP PFNGLDRAWELEMENTSBASEVERTEXPROC) (GLenum mode, GLsizei count, GLenum type, const void *indices, GLint basevertex); +typedef void (APIENTRYP PFNGLDRAWRANGEELEMENTSBASEVERTEXPROC) (GLenum mode, GLuint start, GLuint end, GLsizei count, GLenum type, const void *indices, GLint basevertex); +typedef void (APIENTRYP PFNGLDRAWELEMENTSINSTANCEDBASEVERTEXPROC) (GLenum mode, GLsizei count, GLenum type, const void *indices, GLsizei instancecount, GLint basevertex); +typedef void (APIENTRYP PFNGLMULTIDRAWELEMENTSBASEVERTEXPROC) (GLenum mode, const GLsizei *count, GLenum type, const void *const*indices, GLsizei drawcount, const GLint *basevertex); +typedef void (APIENTRYP PFNGLPROVOKINGVERTEXPROC) (GLenum mode); +typedef GLsync (APIENTRYP PFNGLFENCESYNCPROC) (GLenum condition, GLbitfield flags); +typedef GLboolean (APIENTRYP PFNGLISSYNCPROC) (GLsync sync); +typedef void (APIENTRYP PFNGLDELETESYNCPROC) (GLsync sync); +typedef GLenum (APIENTRYP PFNGLCLIENTWAITSYNCPROC) (GLsync sync, GLbitfield flags, GLuint64 timeout); +typedef void (APIENTRYP PFNGLWAITSYNCPROC) (GLsync sync, GLbitfield flags, GLuint64 timeout); +typedef void (APIENTRYP PFNGLGETINTEGER64VPROC) (GLenum pname, GLint64 *data); +typedef void (APIENTRYP PFNGLGETSYNCIVPROC) (GLsync sync, GLenum pname, GLsizei bufSize, GLsizei *length, GLint *values); +typedef void (APIENTRYP PFNGLGETINTEGER64I_VPROC) (GLenum target, GLuint index, GLint64 *data); +typedef void (APIENTRYP PFNGLGETBUFFERPARAMETERI64VPROC) (GLenum target, GLenum pname, GLint64 *params); +typedef void (APIENTRYP PFNGLFRAMEBUFFERTEXTUREPROC) (GLenum target, GLenum attachment, GLuint texture, GLint level); +typedef void (APIENTRYP PFNGLTEXIMAGE2DMULTISAMPLEPROC) (GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height, GLboolean fixedsamplelocations); +typedef void (APIENTRYP PFNGLTEXIMAGE3DMULTISAMPLEPROC) (GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth, GLboolean fixedsamplelocations); +typedef void (APIENTRYP PFNGLGETMULTISAMPLEFVPROC) (GLenum pname, GLuint index, GLfloat *val); +typedef void (APIENTRYP PFNGLSAMPLEMASKIPROC) (GLuint maskNumber, GLbitfield mask); +#ifdef GL_GLEXT_PROTOTYPES +GLAPI void APIENTRY glDrawElementsBaseVertex (GLenum mode, GLsizei count, GLenum type, const void *indices, GLint basevertex); +GLAPI void APIENTRY glDrawRangeElementsBaseVertex (GLenum mode, GLuint start, GLuint end, GLsizei count, GLenum type, const void *indices, GLint basevertex); +GLAPI void APIENTRY glDrawElementsInstancedBaseVertex (GLenum mode, GLsizei count, GLenum type, const void *indices, GLsizei instancecount, GLint basevertex); +GLAPI void APIENTRY glMultiDrawElementsBaseVertex (GLenum mode, const GLsizei *count, GLenum type, const void *const*indices, GLsizei drawcount, const GLint *basevertex); +GLAPI void APIENTRY glProvokingVertex (GLenum mode); +GLAPI GLsync APIENTRY glFenceSync (GLenum condition, GLbitfield flags); +GLAPI GLboolean APIENTRY glIsSync (GLsync sync); +GLAPI void APIENTRY glDeleteSync (GLsync sync); +GLAPI GLenum APIENTRY glClientWaitSync (GLsync sync, GLbitfield flags, GLuint64 timeout); +GLAPI void APIENTRY glWaitSync (GLsync sync, GLbitfield flags, GLuint64 timeout); +GLAPI void APIENTRY glGetInteger64v (GLenum pname, GLint64 *data); +GLAPI void APIENTRY glGetSynciv (GLsync sync, GLenum pname, GLsizei bufSize, GLsizei *length, GLint *values); +GLAPI void APIENTRY glGetInteger64i_v (GLenum target, GLuint index, GLint64 *data); +GLAPI void APIENTRY glGetBufferParameteri64v (GLenum target, GLenum pname, GLint64 *params); +GLAPI void APIENTRY glFramebufferTexture (GLenum target, GLenum attachment, GLuint texture, GLint level); +GLAPI void APIENTRY glTexImage2DMultisample (GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height, GLboolean fixedsamplelocations); +GLAPI void APIENTRY glTexImage3DMultisample (GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth, GLboolean fixedsamplelocations); +GLAPI void APIENTRY glGetMultisamplefv (GLenum pname, GLuint index, GLfloat *val); +GLAPI void APIENTRY glSampleMaski (GLuint maskNumber, GLbitfield mask); +#endif +#endif /* GL_VERSION_3_2 */ + +#ifndef GL_VERSION_3_3 +#define GL_VERSION_3_3 1 +#define GL_VERTEX_ATTRIB_ARRAY_DIVISOR 0x88FE +#define GL_SRC1_COLOR 0x88F9 +#define GL_ONE_MINUS_SRC1_COLOR 0x88FA +#define GL_ONE_MINUS_SRC1_ALPHA 0x88FB +#define GL_MAX_DUAL_SOURCE_DRAW_BUFFERS 0x88FC +#define GL_ANY_SAMPLES_PASSED 0x8C2F +#define GL_SAMPLER_BINDING 0x8919 +#define GL_RGB10_A2UI 0x906F +#define GL_TEXTURE_SWIZZLE_R 0x8E42 +#define GL_TEXTURE_SWIZZLE_G 0x8E43 +#define GL_TEXTURE_SWIZZLE_B 0x8E44 +#define GL_TEXTURE_SWIZZLE_A 0x8E45 +#define GL_TEXTURE_SWIZZLE_RGBA 0x8E46 +#define GL_TIME_ELAPSED 0x88BF +#define GL_TIMESTAMP 0x8E28 +#define GL_INT_2_10_10_10_REV 0x8D9F +typedef void (APIENTRYP PFNGLBINDFRAGDATALOCATIONINDEXEDPROC) (GLuint program, GLuint colorNumber, GLuint index, const GLchar *name); +typedef GLint (APIENTRYP PFNGLGETFRAGDATAINDEXPROC) (GLuint program, const GLchar *name); +typedef void (APIENTRYP PFNGLGENSAMPLERSPROC) (GLsizei count, GLuint *samplers); +typedef void (APIENTRYP PFNGLDELETESAMPLERSPROC) (GLsizei count, const GLuint *samplers); +typedef GLboolean (APIENTRYP PFNGLISSAMPLERPROC) (GLuint sampler); +typedef void (APIENTRYP PFNGLBINDSAMPLERPROC) (GLuint unit, GLuint sampler); +typedef void (APIENTRYP PFNGLSAMPLERPARAMETERIPROC) (GLuint sampler, GLenum pname, GLint param); +typedef void (APIENTRYP PFNGLSAMPLERPARAMETERIVPROC) (GLuint sampler, GLenum pname, const GLint *param); +typedef void (APIENTRYP PFNGLSAMPLERPARAMETERFPROC) (GLuint sampler, GLenum pname, GLfloat param); +typedef void (APIENTRYP PFNGLSAMPLERPARAMETERFVPROC) (GLuint sampler, GLenum pname, const GLfloat *param); +typedef void (APIENTRYP PFNGLSAMPLERPARAMETERIIVPROC) (GLuint sampler, GLenum pname, const GLint *param); +typedef void (APIENTRYP PFNGLSAMPLERPARAMETERIUIVPROC) (GLuint sampler, GLenum pname, const GLuint *param); +typedef void (APIENTRYP PFNGLGETSAMPLERPARAMETERIVPROC) (GLuint sampler, GLenum pname, GLint *params); +typedef void (APIENTRYP PFNGLGETSAMPLERPARAMETERIIVPROC) (GLuint sampler, GLenum pname, GLint *params); +typedef void (APIENTRYP PFNGLGETSAMPLERPARAMETERFVPROC) (GLuint sampler, GLenum pname, GLfloat *params); +typedef void (APIENTRYP PFNGLGETSAMPLERPARAMETERIUIVPROC) (GLuint sampler, GLenum pname, GLuint *params); +typedef void (APIENTRYP PFNGLQUERYCOUNTERPROC) (GLuint id, GLenum target); +typedef void (APIENTRYP PFNGLGETQUERYOBJECTI64VPROC) (GLuint id, GLenum pname, GLint64 *params); +typedef void (APIENTRYP PFNGLGETQUERYOBJECTUI64VPROC) (GLuint id, GLenum pname, GLuint64 *params); +typedef void (APIENTRYP PFNGLVERTEXATTRIBDIVISORPROC) (GLuint index, GLuint divisor); +typedef void (APIENTRYP PFNGLVERTEXATTRIBP1UIPROC) (GLuint index, GLenum type, GLboolean normalized, GLuint value); +typedef void (APIENTRYP PFNGLVERTEXATTRIBP1UIVPROC) (GLuint index, GLenum type, GLboolean normalized, const GLuint *value); +typedef void (APIENTRYP PFNGLVERTEXATTRIBP2UIPROC) (GLuint index, GLenum type, GLboolean normalized, GLuint value); +typedef void (APIENTRYP PFNGLVERTEXATTRIBP2UIVPROC) (GLuint index, GLenum type, GLboolean normalized, const GLuint *value); +typedef void (APIENTRYP PFNGLVERTEXATTRIBP3UIPROC) (GLuint index, GLenum type, GLboolean normalized, GLuint value); +typedef void (APIENTRYP PFNGLVERTEXATTRIBP3UIVPROC) (GLuint index, GLenum type, GLboolean normalized, const GLuint *value); +typedef void (APIENTRYP PFNGLVERTEXATTRIBP4UIPROC) (GLuint index, GLenum type, GLboolean normalized, GLuint value); +typedef void (APIENTRYP PFNGLVERTEXATTRIBP4UIVPROC) (GLuint index, GLenum type, GLboolean normalized, const GLuint *value); +#ifdef GL_GLEXT_PROTOTYPES +GLAPI void APIENTRY glBindFragDataLocationIndexed (GLuint program, GLuint colorNumber, GLuint index, const GLchar *name); +GLAPI GLint APIENTRY glGetFragDataIndex (GLuint program, const GLchar *name); +GLAPI void APIENTRY glGenSamplers (GLsizei count, GLuint *samplers); +GLAPI void APIENTRY glDeleteSamplers (GLsizei count, const GLuint *samplers); +GLAPI GLboolean APIENTRY glIsSampler (GLuint sampler); +GLAPI void APIENTRY glBindSampler (GLuint unit, GLuint sampler); +GLAPI void APIENTRY glSamplerParameteri (GLuint sampler, GLenum pname, GLint param); +GLAPI void APIENTRY glSamplerParameteriv (GLuint sampler, GLenum pname, const GLint *param); +GLAPI void APIENTRY glSamplerParameterf (GLuint sampler, GLenum pname, GLfloat param); +GLAPI void APIENTRY glSamplerParameterfv (GLuint sampler, GLenum pname, const GLfloat *param); +GLAPI void APIENTRY glSamplerParameterIiv (GLuint sampler, GLenum pname, const GLint *param); +GLAPI void APIENTRY glSamplerParameterIuiv (GLuint sampler, GLenum pname, const GLuint *param); +GLAPI void APIENTRY glGetSamplerParameteriv (GLuint sampler, GLenum pname, GLint *params); +GLAPI void APIENTRY glGetSamplerParameterIiv (GLuint sampler, GLenum pname, GLint *params); +GLAPI void APIENTRY glGetSamplerParameterfv (GLuint sampler, GLenum pname, GLfloat *params); +GLAPI void APIENTRY glGetSamplerParameterIuiv (GLuint sampler, GLenum pname, GLuint *params); +GLAPI void APIENTRY glQueryCounter (GLuint id, GLenum target); +GLAPI void APIENTRY glGetQueryObjecti64v (GLuint id, GLenum pname, GLint64 *params); +GLAPI void APIENTRY glGetQueryObjectui64v (GLuint id, GLenum pname, GLuint64 *params); +GLAPI void APIENTRY glVertexAttribDivisor (GLuint index, GLuint divisor); +GLAPI void APIENTRY glVertexAttribP1ui (GLuint index, GLenum type, GLboolean normalized, GLuint value); +GLAPI void APIENTRY glVertexAttribP1uiv (GLuint index, GLenum type, GLboolean normalized, const GLuint *value); +GLAPI void APIENTRY glVertexAttribP2ui (GLuint index, GLenum type, GLboolean normalized, GLuint value); +GLAPI void APIENTRY glVertexAttribP2uiv (GLuint index, GLenum type, GLboolean normalized, const GLuint *value); +GLAPI void APIENTRY glVertexAttribP3ui (GLuint index, GLenum type, GLboolean normalized, GLuint value); +GLAPI void APIENTRY glVertexAttribP3uiv (GLuint index, GLenum type, GLboolean normalized, const GLuint *value); +GLAPI void APIENTRY glVertexAttribP4ui (GLuint index, GLenum type, GLboolean normalized, GLuint value); +GLAPI void APIENTRY glVertexAttribP4uiv (GLuint index, GLenum type, GLboolean normalized, const GLuint *value); +#endif +#endif /* GL_VERSION_3_3 */ + +#ifndef GL_VERSION_4_0 +#define GL_VERSION_4_0 1 +#define GL_SAMPLE_SHADING 0x8C36 +#define GL_MIN_SAMPLE_SHADING_VALUE 0x8C37 +#define GL_MIN_PROGRAM_TEXTURE_GATHER_OFFSET 0x8E5E +#define GL_MAX_PROGRAM_TEXTURE_GATHER_OFFSET 0x8E5F +#define GL_TEXTURE_CUBE_MAP_ARRAY 0x9009 +#define GL_TEXTURE_BINDING_CUBE_MAP_ARRAY 0x900A +#define GL_PROXY_TEXTURE_CUBE_MAP_ARRAY 0x900B +#define GL_SAMPLER_CUBE_MAP_ARRAY 0x900C +#define GL_SAMPLER_CUBE_MAP_ARRAY_SHADOW 0x900D +#define GL_INT_SAMPLER_CUBE_MAP_ARRAY 0x900E +#define GL_UNSIGNED_INT_SAMPLER_CUBE_MAP_ARRAY 0x900F +#define GL_DRAW_INDIRECT_BUFFER 0x8F3F +#define GL_DRAW_INDIRECT_BUFFER_BINDING 0x8F43 +#define GL_GEOMETRY_SHADER_INVOCATIONS 0x887F +#define GL_MAX_GEOMETRY_SHADER_INVOCATIONS 0x8E5A +#define GL_MIN_FRAGMENT_INTERPOLATION_OFFSET 0x8E5B +#define GL_MAX_FRAGMENT_INTERPOLATION_OFFSET 0x8E5C +#define GL_FRAGMENT_INTERPOLATION_OFFSET_BITS 0x8E5D +#define GL_MAX_VERTEX_STREAMS 0x8E71 +#define GL_DOUBLE_VEC2 0x8FFC +#define GL_DOUBLE_VEC3 0x8FFD +#define GL_DOUBLE_VEC4 0x8FFE +#define GL_DOUBLE_MAT2 0x8F46 +#define GL_DOUBLE_MAT3 0x8F47 +#define GL_DOUBLE_MAT4 0x8F48 +#define GL_DOUBLE_MAT2x3 0x8F49 +#define GL_DOUBLE_MAT2x4 0x8F4A +#define GL_DOUBLE_MAT3x2 0x8F4B +#define GL_DOUBLE_MAT3x4 0x8F4C +#define GL_DOUBLE_MAT4x2 0x8F4D +#define GL_DOUBLE_MAT4x3 0x8F4E +#define GL_ACTIVE_SUBROUTINES 0x8DE5 +#define GL_ACTIVE_SUBROUTINE_UNIFORMS 0x8DE6 +#define GL_ACTIVE_SUBROUTINE_UNIFORM_LOCATIONS 0x8E47 +#define GL_ACTIVE_SUBROUTINE_MAX_LENGTH 0x8E48 +#define GL_ACTIVE_SUBROUTINE_UNIFORM_MAX_LENGTH 0x8E49 +#define GL_MAX_SUBROUTINES 0x8DE7 +#define GL_MAX_SUBROUTINE_UNIFORM_LOCATIONS 0x8DE8 +#define GL_NUM_COMPATIBLE_SUBROUTINES 0x8E4A +#define GL_COMPATIBLE_SUBROUTINES 0x8E4B +#define GL_PATCHES 0x000E +#define GL_PATCH_VERTICES 0x8E72 +#define GL_PATCH_DEFAULT_INNER_LEVEL 0x8E73 +#define GL_PATCH_DEFAULT_OUTER_LEVEL 0x8E74 +#define GL_TESS_CONTROL_OUTPUT_VERTICES 0x8E75 +#define GL_TESS_GEN_MODE 0x8E76 +#define GL_TESS_GEN_SPACING 0x8E77 +#define GL_TESS_GEN_VERTEX_ORDER 0x8E78 +#define GL_TESS_GEN_POINT_MODE 0x8E79 +#define GL_ISOLINES 0x8E7A +#define GL_FRACTIONAL_ODD 0x8E7B +#define GL_FRACTIONAL_EVEN 0x8E7C +#define GL_MAX_PATCH_VERTICES 0x8E7D +#define GL_MAX_TESS_GEN_LEVEL 0x8E7E +#define GL_MAX_TESS_CONTROL_UNIFORM_COMPONENTS 0x8E7F +#define GL_MAX_TESS_EVALUATION_UNIFORM_COMPONENTS 0x8E80 +#define GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS 0x8E81 +#define GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS 0x8E82 +#define GL_MAX_TESS_CONTROL_OUTPUT_COMPONENTS 0x8E83 +#define GL_MAX_TESS_PATCH_COMPONENTS 0x8E84 +#define GL_MAX_TESS_CONTROL_TOTAL_OUTPUT_COMPONENTS 0x8E85 +#define GL_MAX_TESS_EVALUATION_OUTPUT_COMPONENTS 0x8E86 +#define GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS 0x8E89 +#define GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS 0x8E8A +#define GL_MAX_TESS_CONTROL_INPUT_COMPONENTS 0x886C +#define GL_MAX_TESS_EVALUATION_INPUT_COMPONENTS 0x886D +#define GL_MAX_COMBINED_TESS_CONTROL_UNIFORM_COMPONENTS 0x8E1E +#define GL_MAX_COMBINED_TESS_EVALUATION_UNIFORM_COMPONENTS 0x8E1F +#define GL_UNIFORM_BLOCK_REFERENCED_BY_TESS_CONTROL_SHADER 0x84F0 +#define GL_UNIFORM_BLOCK_REFERENCED_BY_TESS_EVALUATION_SHADER 0x84F1 +#define GL_TESS_EVALUATION_SHADER 0x8E87 +#define GL_TESS_CONTROL_SHADER 0x8E88 +#define GL_TRANSFORM_FEEDBACK 0x8E22 +#define GL_TRANSFORM_FEEDBACK_BUFFER_PAUSED 0x8E23 +#define GL_TRANSFORM_FEEDBACK_BUFFER_ACTIVE 0x8E24 +#define GL_TRANSFORM_FEEDBACK_BINDING 0x8E25 +#define GL_MAX_TRANSFORM_FEEDBACK_BUFFERS 0x8E70 +typedef void (APIENTRYP PFNGLMINSAMPLESHADINGPROC) (GLfloat value); +typedef void (APIENTRYP PFNGLBLENDEQUATIONIPROC) (GLuint buf, GLenum mode); +typedef void (APIENTRYP PFNGLBLENDEQUATIONSEPARATEIPROC) (GLuint buf, GLenum modeRGB, GLenum modeAlpha); +typedef void (APIENTRYP PFNGLBLENDFUNCIPROC) (GLuint buf, GLenum src, GLenum dst); +typedef void (APIENTRYP PFNGLBLENDFUNCSEPARATEIPROC) (GLuint buf, GLenum srcRGB, GLenum dstRGB, GLenum srcAlpha, GLenum dstAlpha); +typedef void (APIENTRYP PFNGLDRAWARRAYSINDIRECTPROC) (GLenum mode, const void *indirect); +typedef void (APIENTRYP PFNGLDRAWELEMENTSINDIRECTPROC) (GLenum mode, GLenum type, const void *indirect); +typedef void (APIENTRYP PFNGLUNIFORM1DPROC) (GLint location, GLdouble x); +typedef void (APIENTRYP PFNGLUNIFORM2DPROC) (GLint location, GLdouble x, GLdouble y); +typedef void (APIENTRYP PFNGLUNIFORM3DPROC) (GLint location, GLdouble x, GLdouble y, GLdouble z); +typedef void (APIENTRYP PFNGLUNIFORM4DPROC) (GLint location, GLdouble x, GLdouble y, GLdouble z, GLdouble w); +typedef void (APIENTRYP PFNGLUNIFORM1DVPROC) (GLint location, GLsizei count, const GLdouble *value); +typedef void (APIENTRYP PFNGLUNIFORM2DVPROC) (GLint location, GLsizei count, const GLdouble *value); +typedef void (APIENTRYP PFNGLUNIFORM3DVPROC) (GLint location, GLsizei count, const GLdouble *value); +typedef void (APIENTRYP PFNGLUNIFORM4DVPROC) (GLint location, GLsizei count, const GLdouble *value); +typedef void (APIENTRYP PFNGLUNIFORMMATRIX2DVPROC) (GLint location, GLsizei count, GLboolean transpose, const GLdouble *value); +typedef void (APIENTRYP PFNGLUNIFORMMATRIX3DVPROC) (GLint location, GLsizei count, GLboolean transpose, const GLdouble *value); +typedef void (APIENTRYP PFNGLUNIFORMMATRIX4DVPROC) (GLint location, GLsizei count, GLboolean transpose, const GLdouble *value); +typedef void (APIENTRYP PFNGLUNIFORMMATRIX2X3DVPROC) (GLint location, GLsizei count, GLboolean transpose, const GLdouble *value); +typedef void (APIENTRYP PFNGLUNIFORMMATRIX2X4DVPROC) (GLint location, GLsizei count, GLboolean transpose, const GLdouble *value); +typedef void (APIENTRYP PFNGLUNIFORMMATRIX3X2DVPROC) (GLint location, GLsizei count, GLboolean transpose, const GLdouble *value); +typedef void (APIENTRYP PFNGLUNIFORMMATRIX3X4DVPROC) (GLint location, GLsizei count, GLboolean transpose, const GLdouble *value); +typedef void (APIENTRYP PFNGLUNIFORMMATRIX4X2DVPROC) (GLint location, GLsizei count, GLboolean transpose, const GLdouble *value); +typedef void (APIENTRYP PFNGLUNIFORMMATRIX4X3DVPROC) (GLint location, GLsizei count, GLboolean transpose, const GLdouble *value); +typedef void (APIENTRYP PFNGLGETUNIFORMDVPROC) (GLuint program, GLint location, GLdouble *params); +typedef GLint (APIENTRYP PFNGLGETSUBROUTINEUNIFORMLOCATIONPROC) (GLuint program, GLenum shadertype, const GLchar *name); +typedef GLuint (APIENTRYP PFNGLGETSUBROUTINEINDEXPROC) (GLuint program, GLenum shadertype, const GLchar *name); +typedef void (APIENTRYP PFNGLGETACTIVESUBROUTINEUNIFORMIVPROC) (GLuint program, GLenum shadertype, GLuint index, GLenum pname, GLint *values); +typedef void (APIENTRYP PFNGLGETACTIVESUBROUTINEUNIFORMNAMEPROC) (GLuint program, GLenum shadertype, GLuint index, GLsizei bufsize, GLsizei *length, GLchar *name); +typedef void (APIENTRYP PFNGLGETACTIVESUBROUTINENAMEPROC) (GLuint program, GLenum shadertype, GLuint index, GLsizei bufsize, GLsizei *length, GLchar *name); +typedef void (APIENTRYP PFNGLUNIFORMSUBROUTINESUIVPROC) (GLenum shadertype, GLsizei count, const GLuint *indices); +typedef void (APIENTRYP PFNGLGETUNIFORMSUBROUTINEUIVPROC) (GLenum shadertype, GLint location, GLuint *params); +typedef void (APIENTRYP PFNGLGETPROGRAMSTAGEIVPROC) (GLuint program, GLenum shadertype, GLenum pname, GLint *values); +typedef void (APIENTRYP PFNGLPATCHPARAMETERIPROC) (GLenum pname, GLint value); +typedef void (APIENTRYP PFNGLPATCHPARAMETERFVPROC) (GLenum pname, const GLfloat *values); +typedef void (APIENTRYP PFNGLBINDTRANSFORMFEEDBACKPROC) (GLenum target, GLuint id); +typedef void (APIENTRYP PFNGLDELETETRANSFORMFEEDBACKSPROC) (GLsizei n, const GLuint *ids); +typedef void (APIENTRYP PFNGLGENTRANSFORMFEEDBACKSPROC) (GLsizei n, GLuint *ids); +typedef GLboolean (APIENTRYP PFNGLISTRANSFORMFEEDBACKPROC) (GLuint id); +typedef void (APIENTRYP PFNGLPAUSETRANSFORMFEEDBACKPROC) (void); +typedef void (APIENTRYP PFNGLRESUMETRANSFORMFEEDBACKPROC) (void); +typedef void (APIENTRYP PFNGLDRAWTRANSFORMFEEDBACKPROC) (GLenum mode, GLuint id); +typedef void (APIENTRYP PFNGLDRAWTRANSFORMFEEDBACKSTREAMPROC) (GLenum mode, GLuint id, GLuint stream); +typedef void (APIENTRYP PFNGLBEGINQUERYINDEXEDPROC) (GLenum target, GLuint index, GLuint id); +typedef void (APIENTRYP PFNGLENDQUERYINDEXEDPROC) (GLenum target, GLuint index); +typedef void (APIENTRYP PFNGLGETQUERYINDEXEDIVPROC) (GLenum target, GLuint index, GLenum pname, GLint *params); +#ifdef GL_GLEXT_PROTOTYPES +GLAPI void APIENTRY glMinSampleShading (GLfloat value); +GLAPI void APIENTRY glBlendEquationi (GLuint buf, GLenum mode); +GLAPI void APIENTRY glBlendEquationSeparatei (GLuint buf, GLenum modeRGB, GLenum modeAlpha); +GLAPI void APIENTRY glBlendFunci (GLuint buf, GLenum src, GLenum dst); +GLAPI void APIENTRY glBlendFuncSeparatei (GLuint buf, GLenum srcRGB, GLenum dstRGB, GLenum srcAlpha, GLenum dstAlpha); +GLAPI void APIENTRY glDrawArraysIndirect (GLenum mode, const void *indirect); +GLAPI void APIENTRY glDrawElementsIndirect (GLenum mode, GLenum type, const void *indirect); +GLAPI void APIENTRY glUniform1d (GLint location, GLdouble x); +GLAPI void APIENTRY glUniform2d (GLint location, GLdouble x, GLdouble y); +GLAPI void APIENTRY glUniform3d (GLint location, GLdouble x, GLdouble y, GLdouble z); +GLAPI void APIENTRY glUniform4d (GLint location, GLdouble x, GLdouble y, GLdouble z, GLdouble w); +GLAPI void APIENTRY glUniform1dv (GLint location, GLsizei count, const GLdouble *value); +GLAPI void APIENTRY glUniform2dv (GLint location, GLsizei count, const GLdouble *value); +GLAPI void APIENTRY glUniform3dv (GLint location, GLsizei count, const GLdouble *value); +GLAPI void APIENTRY glUniform4dv (GLint location, GLsizei count, const GLdouble *value); +GLAPI void APIENTRY glUniformMatrix2dv (GLint location, GLsizei count, GLboolean transpose, const GLdouble *value); +GLAPI void APIENTRY glUniformMatrix3dv (GLint location, GLsizei count, GLboolean transpose, const GLdouble *value); +GLAPI void APIENTRY glUniformMatrix4dv (GLint location, GLsizei count, GLboolean transpose, const GLdouble *value); +GLAPI void APIENTRY glUniformMatrix2x3dv (GLint location, GLsizei count, GLboolean transpose, const GLdouble *value); +GLAPI void APIENTRY glUniformMatrix2x4dv (GLint location, GLsizei count, GLboolean transpose, const GLdouble *value); +GLAPI void APIENTRY glUniformMatrix3x2dv (GLint location, GLsizei count, GLboolean transpose, const GLdouble *value); +GLAPI void APIENTRY glUniformMatrix3x4dv (GLint location, GLsizei count, GLboolean transpose, const GLdouble *value); +GLAPI void APIENTRY glUniformMatrix4x2dv (GLint location, GLsizei count, GLboolean transpose, const GLdouble *value); +GLAPI void APIENTRY glUniformMatrix4x3dv (GLint location, GLsizei count, GLboolean transpose, const GLdouble *value); +GLAPI void APIENTRY glGetUniformdv (GLuint program, GLint location, GLdouble *params); +GLAPI GLint APIENTRY glGetSubroutineUniformLocation (GLuint program, GLenum shadertype, const GLchar *name); +GLAPI GLuint APIENTRY glGetSubroutineIndex (GLuint program, GLenum shadertype, const GLchar *name); +GLAPI void APIENTRY glGetActiveSubroutineUniformiv (GLuint program, GLenum shadertype, GLuint index, GLenum pname, GLint *values); +GLAPI void APIENTRY glGetActiveSubroutineUniformName (GLuint program, GLenum shadertype, GLuint index, GLsizei bufsize, GLsizei *length, GLchar *name); +GLAPI void APIENTRY glGetActiveSubroutineName (GLuint program, GLenum shadertype, GLuint index, GLsizei bufsize, GLsizei *length, GLchar *name); +GLAPI void APIENTRY glUniformSubroutinesuiv (GLenum shadertype, GLsizei count, const GLuint *indices); +GLAPI void APIENTRY glGetUniformSubroutineuiv (GLenum shadertype, GLint location, GLuint *params); +GLAPI void APIENTRY glGetProgramStageiv (GLuint program, GLenum shadertype, GLenum pname, GLint *values); +GLAPI void APIENTRY glPatchParameteri (GLenum pname, GLint value); +GLAPI void APIENTRY glPatchParameterfv (GLenum pname, const GLfloat *values); +GLAPI void APIENTRY glBindTransformFeedback (GLenum target, GLuint id); +GLAPI void APIENTRY glDeleteTransformFeedbacks (GLsizei n, const GLuint *ids); +GLAPI void APIENTRY glGenTransformFeedbacks (GLsizei n, GLuint *ids); +GLAPI GLboolean APIENTRY glIsTransformFeedback (GLuint id); +GLAPI void APIENTRY glPauseTransformFeedback (void); +GLAPI void APIENTRY glResumeTransformFeedback (void); +GLAPI void APIENTRY glDrawTransformFeedback (GLenum mode, GLuint id); +GLAPI void APIENTRY glDrawTransformFeedbackStream (GLenum mode, GLuint id, GLuint stream); +GLAPI void APIENTRY glBeginQueryIndexed (GLenum target, GLuint index, GLuint id); +GLAPI void APIENTRY glEndQueryIndexed (GLenum target, GLuint index); +GLAPI void APIENTRY glGetQueryIndexediv (GLenum target, GLuint index, GLenum pname, GLint *params); +#endif +#endif /* GL_VERSION_4_0 */ + +#ifndef GL_VERSION_4_1 +#define GL_VERSION_4_1 1 +#define GL_FIXED 0x140C +#define GL_IMPLEMENTATION_COLOR_READ_TYPE 0x8B9A +#define GL_IMPLEMENTATION_COLOR_READ_FORMAT 0x8B9B +#define GL_LOW_FLOAT 0x8DF0 +#define GL_MEDIUM_FLOAT 0x8DF1 +#define GL_HIGH_FLOAT 0x8DF2 +#define GL_LOW_INT 0x8DF3 +#define GL_MEDIUM_INT 0x8DF4 +#define GL_HIGH_INT 0x8DF5 +#define GL_SHADER_COMPILER 0x8DFA +#define GL_SHADER_BINARY_FORMATS 0x8DF8 +#define GL_NUM_SHADER_BINARY_FORMATS 0x8DF9 +#define GL_MAX_VERTEX_UNIFORM_VECTORS 0x8DFB +#define GL_MAX_VARYING_VECTORS 0x8DFC +#define GL_MAX_FRAGMENT_UNIFORM_VECTORS 0x8DFD +#define GL_RGB565 0x8D62 +#define GL_PROGRAM_BINARY_RETRIEVABLE_HINT 0x8257 +#define GL_PROGRAM_BINARY_LENGTH 0x8741 +#define GL_NUM_PROGRAM_BINARY_FORMATS 0x87FE +#define GL_PROGRAM_BINARY_FORMATS 0x87FF +#define GL_VERTEX_SHADER_BIT 0x00000001 +#define GL_FRAGMENT_SHADER_BIT 0x00000002 +#define GL_GEOMETRY_SHADER_BIT 0x00000004 +#define GL_TESS_CONTROL_SHADER_BIT 0x00000008 +#define GL_TESS_EVALUATION_SHADER_BIT 0x00000010 +#define GL_ALL_SHADER_BITS 0xFFFFFFFF +#define GL_PROGRAM_SEPARABLE 0x8258 +#define GL_ACTIVE_PROGRAM 0x8259 +#define GL_PROGRAM_PIPELINE_BINDING 0x825A +#define GL_MAX_VIEWPORTS 0x825B +#define GL_VIEWPORT_SUBPIXEL_BITS 0x825C +#define GL_VIEWPORT_BOUNDS_RANGE 0x825D +#define GL_LAYER_PROVOKING_VERTEX 0x825E +#define GL_VIEWPORT_INDEX_PROVOKING_VERTEX 0x825F +#define GL_UNDEFINED_VERTEX 0x8260 +typedef void (APIENTRYP PFNGLRELEASESHADERCOMPILERPROC) (void); +typedef void (APIENTRYP PFNGLSHADERBINARYPROC) (GLsizei count, const GLuint *shaders, GLenum binaryformat, const void *binary, GLsizei length); +typedef void (APIENTRYP PFNGLGETSHADERPRECISIONFORMATPROC) (GLenum shadertype, GLenum precisiontype, GLint *range, GLint *precision); +typedef void (APIENTRYP PFNGLDEPTHRANGEFPROC) (GLfloat n, GLfloat f); +typedef void (APIENTRYP PFNGLCLEARDEPTHFPROC) (GLfloat d); +typedef void (APIENTRYP PFNGLGETPROGRAMBINARYPROC) (GLuint program, GLsizei bufSize, GLsizei *length, GLenum *binaryFormat, void *binary); +typedef void (APIENTRYP PFNGLPROGRAMBINARYPROC) (GLuint program, GLenum binaryFormat, const void *binary, GLsizei length); +typedef void (APIENTRYP PFNGLPROGRAMPARAMETERIPROC) (GLuint program, GLenum pname, GLint value); +typedef void (APIENTRYP PFNGLUSEPROGRAMSTAGESPROC) (GLuint pipeline, GLbitfield stages, GLuint program); +typedef void (APIENTRYP PFNGLACTIVESHADERPROGRAMPROC) (GLuint pipeline, GLuint program); +typedef GLuint (APIENTRYP PFNGLCREATESHADERPROGRAMVPROC) (GLenum type, GLsizei count, const GLchar *const*strings); +typedef void (APIENTRYP PFNGLBINDPROGRAMPIPELINEPROC) (GLuint pipeline); +typedef void (APIENTRYP PFNGLDELETEPROGRAMPIPELINESPROC) (GLsizei n, const GLuint *pipelines); +typedef void (APIENTRYP PFNGLGENPROGRAMPIPELINESPROC) (GLsizei n, GLuint *pipelines); +typedef GLboolean (APIENTRYP PFNGLISPROGRAMPIPELINEPROC) (GLuint pipeline); +typedef void (APIENTRYP PFNGLGETPROGRAMPIPELINEIVPROC) (GLuint pipeline, GLenum pname, GLint *params); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORM1IPROC) (GLuint program, GLint location, GLint v0); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORM1IVPROC) (GLuint program, GLint location, GLsizei count, const GLint *value); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORM1FPROC) (GLuint program, GLint location, GLfloat v0); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORM1FVPROC) (GLuint program, GLint location, GLsizei count, const GLfloat *value); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORM1DPROC) (GLuint program, GLint location, GLdouble v0); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORM1DVPROC) (GLuint program, GLint location, GLsizei count, const GLdouble *value); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORM1UIPROC) (GLuint program, GLint location, GLuint v0); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORM1UIVPROC) (GLuint program, GLint location, GLsizei count, const GLuint *value); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORM2IPROC) (GLuint program, GLint location, GLint v0, GLint v1); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORM2IVPROC) (GLuint program, GLint location, GLsizei count, const GLint *value); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORM2FPROC) (GLuint program, GLint location, GLfloat v0, GLfloat v1); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORM2FVPROC) (GLuint program, GLint location, GLsizei count, const GLfloat *value); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORM2DPROC) (GLuint program, GLint location, GLdouble v0, GLdouble v1); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORM2DVPROC) (GLuint program, GLint location, GLsizei count, const GLdouble *value); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORM2UIPROC) (GLuint program, GLint location, GLuint v0, GLuint v1); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORM2UIVPROC) (GLuint program, GLint location, GLsizei count, const GLuint *value); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORM3IPROC) (GLuint program, GLint location, GLint v0, GLint v1, GLint v2); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORM3IVPROC) (GLuint program, GLint location, GLsizei count, const GLint *value); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORM3FPROC) (GLuint program, GLint location, GLfloat v0, GLfloat v1, GLfloat v2); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORM3FVPROC) (GLuint program, GLint location, GLsizei count, const GLfloat *value); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORM3DPROC) (GLuint program, GLint location, GLdouble v0, GLdouble v1, GLdouble v2); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORM3DVPROC) (GLuint program, GLint location, GLsizei count, const GLdouble *value); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORM3UIPROC) (GLuint program, GLint location, GLuint v0, GLuint v1, GLuint v2); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORM3UIVPROC) (GLuint program, GLint location, GLsizei count, const GLuint *value); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORM4IPROC) (GLuint program, GLint location, GLint v0, GLint v1, GLint v2, GLint v3); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORM4IVPROC) (GLuint program, GLint location, GLsizei count, const GLint *value); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORM4FPROC) (GLuint program, GLint location, GLfloat v0, GLfloat v1, GLfloat v2, GLfloat v3); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORM4FVPROC) (GLuint program, GLint location, GLsizei count, const GLfloat *value); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORM4DPROC) (GLuint program, GLint location, GLdouble v0, GLdouble v1, GLdouble v2, GLdouble v3); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORM4DVPROC) (GLuint program, GLint location, GLsizei count, const GLdouble *value); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORM4UIPROC) (GLuint program, GLint location, GLuint v0, GLuint v1, GLuint v2, GLuint v3); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORM4UIVPROC) (GLuint program, GLint location, GLsizei count, const GLuint *value); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORMMATRIX2FVPROC) (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORMMATRIX3FVPROC) (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORMMATRIX4FVPROC) (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORMMATRIX2DVPROC) (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLdouble *value); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORMMATRIX3DVPROC) (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLdouble *value); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORMMATRIX4DVPROC) (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLdouble *value); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORMMATRIX2X3FVPROC) (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORMMATRIX3X2FVPROC) (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORMMATRIX2X4FVPROC) (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORMMATRIX4X2FVPROC) (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORMMATRIX3X4FVPROC) (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORMMATRIX4X3FVPROC) (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORMMATRIX2X3DVPROC) (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLdouble *value); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORMMATRIX3X2DVPROC) (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLdouble *value); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORMMATRIX2X4DVPROC) (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLdouble *value); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORMMATRIX4X2DVPROC) (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLdouble *value); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORMMATRIX3X4DVPROC) (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLdouble *value); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORMMATRIX4X3DVPROC) (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLdouble *value); +typedef void (APIENTRYP PFNGLVALIDATEPROGRAMPIPELINEPROC) (GLuint pipeline); +typedef void (APIENTRYP PFNGLGETPROGRAMPIPELINEINFOLOGPROC) (GLuint pipeline, GLsizei bufSize, GLsizei *length, GLchar *infoLog); +typedef void (APIENTRYP PFNGLVERTEXATTRIBL1DPROC) (GLuint index, GLdouble x); +typedef void (APIENTRYP PFNGLVERTEXATTRIBL2DPROC) (GLuint index, GLdouble x, GLdouble y); +typedef void (APIENTRYP PFNGLVERTEXATTRIBL3DPROC) (GLuint index, GLdouble x, GLdouble y, GLdouble z); +typedef void (APIENTRYP PFNGLVERTEXATTRIBL4DPROC) (GLuint index, GLdouble x, GLdouble y, GLdouble z, GLdouble w); +typedef void (APIENTRYP PFNGLVERTEXATTRIBL1DVPROC) (GLuint index, const GLdouble *v); +typedef void (APIENTRYP PFNGLVERTEXATTRIBL2DVPROC) (GLuint index, const GLdouble *v); +typedef void (APIENTRYP PFNGLVERTEXATTRIBL3DVPROC) (GLuint index, const GLdouble *v); +typedef void (APIENTRYP PFNGLVERTEXATTRIBL4DVPROC) (GLuint index, const GLdouble *v); +typedef void (APIENTRYP PFNGLVERTEXATTRIBLPOINTERPROC) (GLuint index, GLint size, GLenum type, GLsizei stride, const void *pointer); +typedef void (APIENTRYP PFNGLGETVERTEXATTRIBLDVPROC) (GLuint index, GLenum pname, GLdouble *params); +typedef void (APIENTRYP PFNGLVIEWPORTARRAYVPROC) (GLuint first, GLsizei count, const GLfloat *v); +typedef void (APIENTRYP PFNGLVIEWPORTINDEXEDFPROC) (GLuint index, GLfloat x, GLfloat y, GLfloat w, GLfloat h); +typedef void (APIENTRYP PFNGLVIEWPORTINDEXEDFVPROC) (GLuint index, const GLfloat *v); +typedef void (APIENTRYP PFNGLSCISSORARRAYVPROC) (GLuint first, GLsizei count, const GLint *v); +typedef void (APIENTRYP PFNGLSCISSORINDEXEDPROC) (GLuint index, GLint left, GLint bottom, GLsizei width, GLsizei height); +typedef void (APIENTRYP PFNGLSCISSORINDEXEDVPROC) (GLuint index, const GLint *v); +typedef void (APIENTRYP PFNGLDEPTHRANGEARRAYVPROC) (GLuint first, GLsizei count, const GLdouble *v); +typedef void (APIENTRYP PFNGLDEPTHRANGEINDEXEDPROC) (GLuint index, GLdouble n, GLdouble f); +typedef void (APIENTRYP PFNGLGETFLOATI_VPROC) (GLenum target, GLuint index, GLfloat *data); +typedef void (APIENTRYP PFNGLGETDOUBLEI_VPROC) (GLenum target, GLuint index, GLdouble *data); +#ifdef GL_GLEXT_PROTOTYPES +GLAPI void APIENTRY glReleaseShaderCompiler (void); +GLAPI void APIENTRY glShaderBinary (GLsizei count, const GLuint *shaders, GLenum binaryformat, const void *binary, GLsizei length); +GLAPI void APIENTRY glGetShaderPrecisionFormat (GLenum shadertype, GLenum precisiontype, GLint *range, GLint *precision); +GLAPI void APIENTRY glDepthRangef (GLfloat n, GLfloat f); +GLAPI void APIENTRY glClearDepthf (GLfloat d); +GLAPI void APIENTRY glGetProgramBinary (GLuint program, GLsizei bufSize, GLsizei *length, GLenum *binaryFormat, void *binary); +GLAPI void APIENTRY glProgramBinary (GLuint program, GLenum binaryFormat, const void *binary, GLsizei length); +GLAPI void APIENTRY glProgramParameteri (GLuint program, GLenum pname, GLint value); +GLAPI void APIENTRY glUseProgramStages (GLuint pipeline, GLbitfield stages, GLuint program); +GLAPI void APIENTRY glActiveShaderProgram (GLuint pipeline, GLuint program); +GLAPI GLuint APIENTRY glCreateShaderProgramv (GLenum type, GLsizei count, const GLchar *const*strings); +GLAPI void APIENTRY glBindProgramPipeline (GLuint pipeline); +GLAPI void APIENTRY glDeleteProgramPipelines (GLsizei n, const GLuint *pipelines); +GLAPI void APIENTRY glGenProgramPipelines (GLsizei n, GLuint *pipelines); +GLAPI GLboolean APIENTRY glIsProgramPipeline (GLuint pipeline); +GLAPI void APIENTRY glGetProgramPipelineiv (GLuint pipeline, GLenum pname, GLint *params); +GLAPI void APIENTRY glProgramUniform1i (GLuint program, GLint location, GLint v0); +GLAPI void APIENTRY glProgramUniform1iv (GLuint program, GLint location, GLsizei count, const GLint *value); +GLAPI void APIENTRY glProgramUniform1f (GLuint program, GLint location, GLfloat v0); +GLAPI void APIENTRY glProgramUniform1fv (GLuint program, GLint location, GLsizei count, const GLfloat *value); +GLAPI void APIENTRY glProgramUniform1d (GLuint program, GLint location, GLdouble v0); +GLAPI void APIENTRY glProgramUniform1dv (GLuint program, GLint location, GLsizei count, const GLdouble *value); +GLAPI void APIENTRY glProgramUniform1ui (GLuint program, GLint location, GLuint v0); +GLAPI void APIENTRY glProgramUniform1uiv (GLuint program, GLint location, GLsizei count, const GLuint *value); +GLAPI void APIENTRY glProgramUniform2i (GLuint program, GLint location, GLint v0, GLint v1); +GLAPI void APIENTRY glProgramUniform2iv (GLuint program, GLint location, GLsizei count, const GLint *value); +GLAPI void APIENTRY glProgramUniform2f (GLuint program, GLint location, GLfloat v0, GLfloat v1); +GLAPI void APIENTRY glProgramUniform2fv (GLuint program, GLint location, GLsizei count, const GLfloat *value); +GLAPI void APIENTRY glProgramUniform2d (GLuint program, GLint location, GLdouble v0, GLdouble v1); +GLAPI void APIENTRY glProgramUniform2dv (GLuint program, GLint location, GLsizei count, const GLdouble *value); +GLAPI void APIENTRY glProgramUniform2ui (GLuint program, GLint location, GLuint v0, GLuint v1); +GLAPI void APIENTRY glProgramUniform2uiv (GLuint program, GLint location, GLsizei count, const GLuint *value); +GLAPI void APIENTRY glProgramUniform3i (GLuint program, GLint location, GLint v0, GLint v1, GLint v2); +GLAPI void APIENTRY glProgramUniform3iv (GLuint program, GLint location, GLsizei count, const GLint *value); +GLAPI void APIENTRY glProgramUniform3f (GLuint program, GLint location, GLfloat v0, GLfloat v1, GLfloat v2); +GLAPI void APIENTRY glProgramUniform3fv (GLuint program, GLint location, GLsizei count, const GLfloat *value); +GLAPI void APIENTRY glProgramUniform3d (GLuint program, GLint location, GLdouble v0, GLdouble v1, GLdouble v2); +GLAPI void APIENTRY glProgramUniform3dv (GLuint program, GLint location, GLsizei count, const GLdouble *value); +GLAPI void APIENTRY glProgramUniform3ui (GLuint program, GLint location, GLuint v0, GLuint v1, GLuint v2); +GLAPI void APIENTRY glProgramUniform3uiv (GLuint program, GLint location, GLsizei count, const GLuint *value); +GLAPI void APIENTRY glProgramUniform4i (GLuint program, GLint location, GLint v0, GLint v1, GLint v2, GLint v3); +GLAPI void APIENTRY glProgramUniform4iv (GLuint program, GLint location, GLsizei count, const GLint *value); +GLAPI void APIENTRY glProgramUniform4f (GLuint program, GLint location, GLfloat v0, GLfloat v1, GLfloat v2, GLfloat v3); +GLAPI void APIENTRY glProgramUniform4fv (GLuint program, GLint location, GLsizei count, const GLfloat *value); +GLAPI void APIENTRY glProgramUniform4d (GLuint program, GLint location, GLdouble v0, GLdouble v1, GLdouble v2, GLdouble v3); +GLAPI void APIENTRY glProgramUniform4dv (GLuint program, GLint location, GLsizei count, const GLdouble *value); +GLAPI void APIENTRY glProgramUniform4ui (GLuint program, GLint location, GLuint v0, GLuint v1, GLuint v2, GLuint v3); +GLAPI void APIENTRY glProgramUniform4uiv (GLuint program, GLint location, GLsizei count, const GLuint *value); +GLAPI void APIENTRY glProgramUniformMatrix2fv (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +GLAPI void APIENTRY glProgramUniformMatrix3fv (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +GLAPI void APIENTRY glProgramUniformMatrix4fv (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +GLAPI void APIENTRY glProgramUniformMatrix2dv (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLdouble *value); +GLAPI void APIENTRY glProgramUniformMatrix3dv (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLdouble *value); +GLAPI void APIENTRY glProgramUniformMatrix4dv (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLdouble *value); +GLAPI void APIENTRY glProgramUniformMatrix2x3fv (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +GLAPI void APIENTRY glProgramUniformMatrix3x2fv (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +GLAPI void APIENTRY glProgramUniformMatrix2x4fv (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +GLAPI void APIENTRY glProgramUniformMatrix4x2fv (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +GLAPI void APIENTRY glProgramUniformMatrix3x4fv (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +GLAPI void APIENTRY glProgramUniformMatrix4x3fv (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +GLAPI void APIENTRY glProgramUniformMatrix2x3dv (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLdouble *value); +GLAPI void APIENTRY glProgramUniformMatrix3x2dv (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLdouble *value); +GLAPI void APIENTRY glProgramUniformMatrix2x4dv (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLdouble *value); +GLAPI void APIENTRY glProgramUniformMatrix4x2dv (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLdouble *value); +GLAPI void APIENTRY glProgramUniformMatrix3x4dv (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLdouble *value); +GLAPI void APIENTRY glProgramUniformMatrix4x3dv (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLdouble *value); +GLAPI void APIENTRY glValidateProgramPipeline (GLuint pipeline); +GLAPI void APIENTRY glGetProgramPipelineInfoLog (GLuint pipeline, GLsizei bufSize, GLsizei *length, GLchar *infoLog); +GLAPI void APIENTRY glVertexAttribL1d (GLuint index, GLdouble x); +GLAPI void APIENTRY glVertexAttribL2d (GLuint index, GLdouble x, GLdouble y); +GLAPI void APIENTRY glVertexAttribL3d (GLuint index, GLdouble x, GLdouble y, GLdouble z); +GLAPI void APIENTRY glVertexAttribL4d (GLuint index, GLdouble x, GLdouble y, GLdouble z, GLdouble w); +GLAPI void APIENTRY glVertexAttribL1dv (GLuint index, const GLdouble *v); +GLAPI void APIENTRY glVertexAttribL2dv (GLuint index, const GLdouble *v); +GLAPI void APIENTRY glVertexAttribL3dv (GLuint index, const GLdouble *v); +GLAPI void APIENTRY glVertexAttribL4dv (GLuint index, const GLdouble *v); +GLAPI void APIENTRY glVertexAttribLPointer (GLuint index, GLint size, GLenum type, GLsizei stride, const void *pointer); +GLAPI void APIENTRY glGetVertexAttribLdv (GLuint index, GLenum pname, GLdouble *params); +GLAPI void APIENTRY glViewportArrayv (GLuint first, GLsizei count, const GLfloat *v); +GLAPI void APIENTRY glViewportIndexedf (GLuint index, GLfloat x, GLfloat y, GLfloat w, GLfloat h); +GLAPI void APIENTRY glViewportIndexedfv (GLuint index, const GLfloat *v); +GLAPI void APIENTRY glScissorArrayv (GLuint first, GLsizei count, const GLint *v); +GLAPI void APIENTRY glScissorIndexed (GLuint index, GLint left, GLint bottom, GLsizei width, GLsizei height); +GLAPI void APIENTRY glScissorIndexedv (GLuint index, const GLint *v); +GLAPI void APIENTRY glDepthRangeArrayv (GLuint first, GLsizei count, const GLdouble *v); +GLAPI void APIENTRY glDepthRangeIndexed (GLuint index, GLdouble n, GLdouble f); +GLAPI void APIENTRY glGetFloati_v (GLenum target, GLuint index, GLfloat *data); +GLAPI void APIENTRY glGetDoublei_v (GLenum target, GLuint index, GLdouble *data); +#endif +#endif /* GL_VERSION_4_1 */ + +#ifndef GL_VERSION_4_2 +#define GL_VERSION_4_2 1 +#define GL_COPY_READ_BUFFER_BINDING 0x8F36 +#define GL_COPY_WRITE_BUFFER_BINDING 0x8F37 +#define GL_TRANSFORM_FEEDBACK_ACTIVE 0x8E24 +#define GL_TRANSFORM_FEEDBACK_PAUSED 0x8E23 +#define GL_UNPACK_COMPRESSED_BLOCK_WIDTH 0x9127 +#define GL_UNPACK_COMPRESSED_BLOCK_HEIGHT 0x9128 +#define GL_UNPACK_COMPRESSED_BLOCK_DEPTH 0x9129 +#define GL_UNPACK_COMPRESSED_BLOCK_SIZE 0x912A +#define GL_PACK_COMPRESSED_BLOCK_WIDTH 0x912B +#define GL_PACK_COMPRESSED_BLOCK_HEIGHT 0x912C +#define GL_PACK_COMPRESSED_BLOCK_DEPTH 0x912D +#define GL_PACK_COMPRESSED_BLOCK_SIZE 0x912E +#define GL_NUM_SAMPLE_COUNTS 0x9380 +#define GL_MIN_MAP_BUFFER_ALIGNMENT 0x90BC +#define GL_ATOMIC_COUNTER_BUFFER 0x92C0 +#define GL_ATOMIC_COUNTER_BUFFER_BINDING 0x92C1 +#define GL_ATOMIC_COUNTER_BUFFER_START 0x92C2 +#define GL_ATOMIC_COUNTER_BUFFER_SIZE 0x92C3 +#define GL_ATOMIC_COUNTER_BUFFER_DATA_SIZE 0x92C4 +#define GL_ATOMIC_COUNTER_BUFFER_ACTIVE_ATOMIC_COUNTERS 0x92C5 +#define GL_ATOMIC_COUNTER_BUFFER_ACTIVE_ATOMIC_COUNTER_INDICES 0x92C6 +#define GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_VERTEX_SHADER 0x92C7 +#define GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_TESS_CONTROL_SHADER 0x92C8 +#define GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_TESS_EVALUATION_SHADER 0x92C9 +#define GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_GEOMETRY_SHADER 0x92CA +#define GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_FRAGMENT_SHADER 0x92CB +#define GL_MAX_VERTEX_ATOMIC_COUNTER_BUFFERS 0x92CC +#define GL_MAX_TESS_CONTROL_ATOMIC_COUNTER_BUFFERS 0x92CD +#define GL_MAX_TESS_EVALUATION_ATOMIC_COUNTER_BUFFERS 0x92CE +#define GL_MAX_GEOMETRY_ATOMIC_COUNTER_BUFFERS 0x92CF +#define GL_MAX_FRAGMENT_ATOMIC_COUNTER_BUFFERS 0x92D0 +#define GL_MAX_COMBINED_ATOMIC_COUNTER_BUFFERS 0x92D1 +#define GL_MAX_VERTEX_ATOMIC_COUNTERS 0x92D2 +#define GL_MAX_TESS_CONTROL_ATOMIC_COUNTERS 0x92D3 +#define GL_MAX_TESS_EVALUATION_ATOMIC_COUNTERS 0x92D4 +#define GL_MAX_GEOMETRY_ATOMIC_COUNTERS 0x92D5 +#define GL_MAX_FRAGMENT_ATOMIC_COUNTERS 0x92D6 +#define GL_MAX_COMBINED_ATOMIC_COUNTERS 0x92D7 +#define GL_MAX_ATOMIC_COUNTER_BUFFER_SIZE 0x92D8 +#define GL_MAX_ATOMIC_COUNTER_BUFFER_BINDINGS 0x92DC +#define GL_ACTIVE_ATOMIC_COUNTER_BUFFERS 0x92D9 +#define GL_UNIFORM_ATOMIC_COUNTER_BUFFER_INDEX 0x92DA +#define GL_UNSIGNED_INT_ATOMIC_COUNTER 0x92DB +#define GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT 0x00000001 +#define GL_ELEMENT_ARRAY_BARRIER_BIT 0x00000002 +#define GL_UNIFORM_BARRIER_BIT 0x00000004 +#define GL_TEXTURE_FETCH_BARRIER_BIT 0x00000008 +#define GL_SHADER_IMAGE_ACCESS_BARRIER_BIT 0x00000020 +#define GL_COMMAND_BARRIER_BIT 0x00000040 +#define GL_PIXEL_BUFFER_BARRIER_BIT 0x00000080 +#define GL_TEXTURE_UPDATE_BARRIER_BIT 0x00000100 +#define GL_BUFFER_UPDATE_BARRIER_BIT 0x00000200 +#define GL_FRAMEBUFFER_BARRIER_BIT 0x00000400 +#define GL_TRANSFORM_FEEDBACK_BARRIER_BIT 0x00000800 +#define GL_ATOMIC_COUNTER_BARRIER_BIT 0x00001000 +#define GL_ALL_BARRIER_BITS 0xFFFFFFFF +#define GL_MAX_IMAGE_UNITS 0x8F38 +#define GL_MAX_COMBINED_IMAGE_UNITS_AND_FRAGMENT_OUTPUTS 0x8F39 +#define GL_IMAGE_BINDING_NAME 0x8F3A +#define GL_IMAGE_BINDING_LEVEL 0x8F3B +#define GL_IMAGE_BINDING_LAYERED 0x8F3C +#define GL_IMAGE_BINDING_LAYER 0x8F3D +#define GL_IMAGE_BINDING_ACCESS 0x8F3E +#define GL_IMAGE_1D 0x904C +#define GL_IMAGE_2D 0x904D +#define GL_IMAGE_3D 0x904E +#define GL_IMAGE_2D_RECT 0x904F +#define GL_IMAGE_CUBE 0x9050 +#define GL_IMAGE_BUFFER 0x9051 +#define GL_IMAGE_1D_ARRAY 0x9052 +#define GL_IMAGE_2D_ARRAY 0x9053 +#define GL_IMAGE_CUBE_MAP_ARRAY 0x9054 +#define GL_IMAGE_2D_MULTISAMPLE 0x9055 +#define GL_IMAGE_2D_MULTISAMPLE_ARRAY 0x9056 +#define GL_INT_IMAGE_1D 0x9057 +#define GL_INT_IMAGE_2D 0x9058 +#define GL_INT_IMAGE_3D 0x9059 +#define GL_INT_IMAGE_2D_RECT 0x905A +#define GL_INT_IMAGE_CUBE 0x905B +#define GL_INT_IMAGE_BUFFER 0x905C +#define GL_INT_IMAGE_1D_ARRAY 0x905D +#define GL_INT_IMAGE_2D_ARRAY 0x905E +#define GL_INT_IMAGE_CUBE_MAP_ARRAY 0x905F +#define GL_INT_IMAGE_2D_MULTISAMPLE 0x9060 +#define GL_INT_IMAGE_2D_MULTISAMPLE_ARRAY 0x9061 +#define GL_UNSIGNED_INT_IMAGE_1D 0x9062 +#define GL_UNSIGNED_INT_IMAGE_2D 0x9063 +#define GL_UNSIGNED_INT_IMAGE_3D 0x9064 +#define GL_UNSIGNED_INT_IMAGE_2D_RECT 0x9065 +#define GL_UNSIGNED_INT_IMAGE_CUBE 0x9066 +#define GL_UNSIGNED_INT_IMAGE_BUFFER 0x9067 +#define GL_UNSIGNED_INT_IMAGE_1D_ARRAY 0x9068 +#define GL_UNSIGNED_INT_IMAGE_2D_ARRAY 0x9069 +#define GL_UNSIGNED_INT_IMAGE_CUBE_MAP_ARRAY 0x906A +#define GL_UNSIGNED_INT_IMAGE_2D_MULTISAMPLE 0x906B +#define GL_UNSIGNED_INT_IMAGE_2D_MULTISAMPLE_ARRAY 0x906C +#define GL_MAX_IMAGE_SAMPLES 0x906D +#define GL_IMAGE_BINDING_FORMAT 0x906E +#define GL_IMAGE_FORMAT_COMPATIBILITY_TYPE 0x90C7 +#define GL_IMAGE_FORMAT_COMPATIBILITY_BY_SIZE 0x90C8 +#define GL_IMAGE_FORMAT_COMPATIBILITY_BY_CLASS 0x90C9 +#define GL_MAX_VERTEX_IMAGE_UNIFORMS 0x90CA +#define GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS 0x90CB +#define GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS 0x90CC +#define GL_MAX_GEOMETRY_IMAGE_UNIFORMS 0x90CD +#define GL_MAX_FRAGMENT_IMAGE_UNIFORMS 0x90CE +#define GL_MAX_COMBINED_IMAGE_UNIFORMS 0x90CF +#define GL_COMPRESSED_RGBA_BPTC_UNORM 0x8E8C +#define GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM 0x8E8D +#define GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT 0x8E8E +#define GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT 0x8E8F +#define GL_TEXTURE_IMMUTABLE_FORMAT 0x912F +typedef void (APIENTRYP PFNGLDRAWARRAYSINSTANCEDBASEINSTANCEPROC) (GLenum mode, GLint first, GLsizei count, GLsizei instancecount, GLuint baseinstance); +typedef void (APIENTRYP PFNGLDRAWELEMENTSINSTANCEDBASEINSTANCEPROC) (GLenum mode, GLsizei count, GLenum type, const void *indices, GLsizei instancecount, GLuint baseinstance); +typedef void (APIENTRYP PFNGLDRAWELEMENTSINSTANCEDBASEVERTEXBASEINSTANCEPROC) (GLenum mode, GLsizei count, GLenum type, const void *indices, GLsizei instancecount, GLint basevertex, GLuint baseinstance); +typedef void (APIENTRYP PFNGLGETINTERNALFORMATIVPROC) (GLenum target, GLenum internalformat, GLenum pname, GLsizei bufSize, GLint *params); +typedef void (APIENTRYP PFNGLGETACTIVEATOMICCOUNTERBUFFERIVPROC) (GLuint program, GLuint bufferIndex, GLenum pname, GLint *params); +typedef void (APIENTRYP PFNGLBINDIMAGETEXTUREPROC) (GLuint unit, GLuint texture, GLint level, GLboolean layered, GLint layer, GLenum access, GLenum format); +typedef void (APIENTRYP PFNGLMEMORYBARRIERPROC) (GLbitfield barriers); +typedef void (APIENTRYP PFNGLTEXSTORAGE1DPROC) (GLenum target, GLsizei levels, GLenum internalformat, GLsizei width); +typedef void (APIENTRYP PFNGLTEXSTORAGE2DPROC) (GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height); +typedef void (APIENTRYP PFNGLTEXSTORAGE3DPROC) (GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth); +typedef void (APIENTRYP PFNGLDRAWTRANSFORMFEEDBACKINSTANCEDPROC) (GLenum mode, GLuint id, GLsizei instancecount); +typedef void (APIENTRYP PFNGLDRAWTRANSFORMFEEDBACKSTREAMINSTANCEDPROC) (GLenum mode, GLuint id, GLuint stream, GLsizei instancecount); +#ifdef GL_GLEXT_PROTOTYPES +GLAPI void APIENTRY glDrawArraysInstancedBaseInstance (GLenum mode, GLint first, GLsizei count, GLsizei instancecount, GLuint baseinstance); +GLAPI void APIENTRY glDrawElementsInstancedBaseInstance (GLenum mode, GLsizei count, GLenum type, const void *indices, GLsizei instancecount, GLuint baseinstance); +GLAPI void APIENTRY glDrawElementsInstancedBaseVertexBaseInstance (GLenum mode, GLsizei count, GLenum type, const void *indices, GLsizei instancecount, GLint basevertex, GLuint baseinstance); +GLAPI void APIENTRY glGetInternalformativ (GLenum target, GLenum internalformat, GLenum pname, GLsizei bufSize, GLint *params); +GLAPI void APIENTRY glGetActiveAtomicCounterBufferiv (GLuint program, GLuint bufferIndex, GLenum pname, GLint *params); +GLAPI void APIENTRY glBindImageTexture (GLuint unit, GLuint texture, GLint level, GLboolean layered, GLint layer, GLenum access, GLenum format); +GLAPI void APIENTRY glMemoryBarrier (GLbitfield barriers); +GLAPI void APIENTRY glTexStorage1D (GLenum target, GLsizei levels, GLenum internalformat, GLsizei width); +GLAPI void APIENTRY glTexStorage2D (GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height); +GLAPI void APIENTRY glTexStorage3D (GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth); +GLAPI void APIENTRY glDrawTransformFeedbackInstanced (GLenum mode, GLuint id, GLsizei instancecount); +GLAPI void APIENTRY glDrawTransformFeedbackStreamInstanced (GLenum mode, GLuint id, GLuint stream, GLsizei instancecount); +#endif +#endif /* GL_VERSION_4_2 */ + +#ifndef GL_VERSION_4_3 +#define GL_VERSION_4_3 1 +typedef void (APIENTRY *GLDEBUGPROC)(GLenum source,GLenum type,GLuint id,GLenum severity,GLsizei length,const GLchar *message,const void *userParam); +#define GL_NUM_SHADING_LANGUAGE_VERSIONS 0x82E9 +#define GL_VERTEX_ATTRIB_ARRAY_LONG 0x874E +#define GL_COMPRESSED_RGB8_ETC2 0x9274 +#define GL_COMPRESSED_SRGB8_ETC2 0x9275 +#define GL_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2 0x9276 +#define GL_COMPRESSED_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2 0x9277 +#define GL_COMPRESSED_RGBA8_ETC2_EAC 0x9278 +#define GL_COMPRESSED_SRGB8_ALPHA8_ETC2_EAC 0x9279 +#define GL_COMPRESSED_R11_EAC 0x9270 +#define GL_COMPRESSED_SIGNED_R11_EAC 0x9271 +#define GL_COMPRESSED_RG11_EAC 0x9272 +#define GL_COMPRESSED_SIGNED_RG11_EAC 0x9273 +#define GL_PRIMITIVE_RESTART_FIXED_INDEX 0x8D69 +#define GL_ANY_SAMPLES_PASSED_CONSERVATIVE 0x8D6A +#define GL_MAX_ELEMENT_INDEX 0x8D6B +#define GL_COMPUTE_SHADER 0x91B9 +#define GL_MAX_COMPUTE_UNIFORM_BLOCKS 0x91BB +#define GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS 0x91BC +#define GL_MAX_COMPUTE_IMAGE_UNIFORMS 0x91BD +#define GL_MAX_COMPUTE_SHARED_MEMORY_SIZE 0x8262 +#define GL_MAX_COMPUTE_UNIFORM_COMPONENTS 0x8263 +#define GL_MAX_COMPUTE_ATOMIC_COUNTER_BUFFERS 0x8264 +#define GL_MAX_COMPUTE_ATOMIC_COUNTERS 0x8265 +#define GL_MAX_COMBINED_COMPUTE_UNIFORM_COMPONENTS 0x8266 +#define GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS 0x90EB +#define GL_MAX_COMPUTE_WORK_GROUP_COUNT 0x91BE +#define GL_MAX_COMPUTE_WORK_GROUP_SIZE 0x91BF +#define GL_COMPUTE_WORK_GROUP_SIZE 0x8267 +#define GL_UNIFORM_BLOCK_REFERENCED_BY_COMPUTE_SHADER 0x90EC +#define GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_COMPUTE_SHADER 0x90ED +#define GL_DISPATCH_INDIRECT_BUFFER 0x90EE +#define GL_DISPATCH_INDIRECT_BUFFER_BINDING 0x90EF +#define GL_COMPUTE_SHADER_BIT 0x00000020 +#define GL_DEBUG_OUTPUT_SYNCHRONOUS 0x8242 +#define GL_DEBUG_NEXT_LOGGED_MESSAGE_LENGTH 0x8243 +#define GL_DEBUG_CALLBACK_FUNCTION 0x8244 +#define GL_DEBUG_CALLBACK_USER_PARAM 0x8245 +#define GL_DEBUG_SOURCE_API 0x8246 +#define GL_DEBUG_SOURCE_WINDOW_SYSTEM 0x8247 +#define GL_DEBUG_SOURCE_SHADER_COMPILER 0x8248 +#define GL_DEBUG_SOURCE_THIRD_PARTY 0x8249 +#define GL_DEBUG_SOURCE_APPLICATION 0x824A +#define GL_DEBUG_SOURCE_OTHER 0x824B +#define GL_DEBUG_TYPE_ERROR 0x824C +#define GL_DEBUG_TYPE_DEPRECATED_BEHAVIOR 0x824D +#define GL_DEBUG_TYPE_UNDEFINED_BEHAVIOR 0x824E +#define GL_DEBUG_TYPE_PORTABILITY 0x824F +#define GL_DEBUG_TYPE_PERFORMANCE 0x8250 +#define GL_DEBUG_TYPE_OTHER 0x8251 +#define GL_MAX_DEBUG_MESSAGE_LENGTH 0x9143 +#define GL_MAX_DEBUG_LOGGED_MESSAGES 0x9144 +#define GL_DEBUG_LOGGED_MESSAGES 0x9145 +#define GL_DEBUG_SEVERITY_HIGH 0x9146 +#define GL_DEBUG_SEVERITY_MEDIUM 0x9147 +#define GL_DEBUG_SEVERITY_LOW 0x9148 +#define GL_DEBUG_TYPE_MARKER 0x8268 +#define GL_DEBUG_TYPE_PUSH_GROUP 0x8269 +#define GL_DEBUG_TYPE_POP_GROUP 0x826A +#define GL_DEBUG_SEVERITY_NOTIFICATION 0x826B +#define GL_MAX_DEBUG_GROUP_STACK_DEPTH 0x826C +#define GL_DEBUG_GROUP_STACK_DEPTH 0x826D +#define GL_BUFFER 0x82E0 +#define GL_SHADER 0x82E1 +#define GL_PROGRAM 0x82E2 +#define GL_QUERY 0x82E3 +#define GL_PROGRAM_PIPELINE 0x82E4 +#define GL_SAMPLER 0x82E6 +#define GL_MAX_LABEL_LENGTH 0x82E8 +#define GL_DEBUG_OUTPUT 0x92E0 +#define GL_CONTEXT_FLAG_DEBUG_BIT 0x00000002 +#define GL_MAX_UNIFORM_LOCATIONS 0x826E +#define GL_FRAMEBUFFER_DEFAULT_WIDTH 0x9310 +#define GL_FRAMEBUFFER_DEFAULT_HEIGHT 0x9311 +#define GL_FRAMEBUFFER_DEFAULT_LAYERS 0x9312 +#define GL_FRAMEBUFFER_DEFAULT_SAMPLES 0x9313 +#define GL_FRAMEBUFFER_DEFAULT_FIXED_SAMPLE_LOCATIONS 0x9314 +#define GL_MAX_FRAMEBUFFER_WIDTH 0x9315 +#define GL_MAX_FRAMEBUFFER_HEIGHT 0x9316 +#define GL_MAX_FRAMEBUFFER_LAYERS 0x9317 +#define GL_MAX_FRAMEBUFFER_SAMPLES 0x9318 +#define GL_INTERNALFORMAT_SUPPORTED 0x826F +#define GL_INTERNALFORMAT_PREFERRED 0x8270 +#define GL_INTERNALFORMAT_RED_SIZE 0x8271 +#define GL_INTERNALFORMAT_GREEN_SIZE 0x8272 +#define GL_INTERNALFORMAT_BLUE_SIZE 0x8273 +#define GL_INTERNALFORMAT_ALPHA_SIZE 0x8274 +#define GL_INTERNALFORMAT_DEPTH_SIZE 0x8275 +#define GL_INTERNALFORMAT_STENCIL_SIZE 0x8276 +#define GL_INTERNALFORMAT_SHARED_SIZE 0x8277 +#define GL_INTERNALFORMAT_RED_TYPE 0x8278 +#define GL_INTERNALFORMAT_GREEN_TYPE 0x8279 +#define GL_INTERNALFORMAT_BLUE_TYPE 0x827A +#define GL_INTERNALFORMAT_ALPHA_TYPE 0x827B +#define GL_INTERNALFORMAT_DEPTH_TYPE 0x827C +#define GL_INTERNALFORMAT_STENCIL_TYPE 0x827D +#define GL_MAX_WIDTH 0x827E +#define GL_MAX_HEIGHT 0x827F +#define GL_MAX_DEPTH 0x8280 +#define GL_MAX_LAYERS 0x8281 +#define GL_MAX_COMBINED_DIMENSIONS 0x8282 +#define GL_COLOR_COMPONENTS 0x8283 +#define GL_DEPTH_COMPONENTS 0x8284 +#define GL_STENCIL_COMPONENTS 0x8285 +#define GL_COLOR_RENDERABLE 0x8286 +#define GL_DEPTH_RENDERABLE 0x8287 +#define GL_STENCIL_RENDERABLE 0x8288 +#define GL_FRAMEBUFFER_RENDERABLE 0x8289 +#define GL_FRAMEBUFFER_RENDERABLE_LAYERED 0x828A +#define GL_FRAMEBUFFER_BLEND 0x828B +#define GL_READ_PIXELS 0x828C +#define GL_READ_PIXELS_FORMAT 0x828D +#define GL_READ_PIXELS_TYPE 0x828E +#define GL_TEXTURE_IMAGE_FORMAT 0x828F +#define GL_TEXTURE_IMAGE_TYPE 0x8290 +#define GL_GET_TEXTURE_IMAGE_FORMAT 0x8291 +#define GL_GET_TEXTURE_IMAGE_TYPE 0x8292 +#define GL_MIPMAP 0x8293 +#define GL_MANUAL_GENERATE_MIPMAP 0x8294 +#define GL_AUTO_GENERATE_MIPMAP 0x8295 +#define GL_COLOR_ENCODING 0x8296 +#define GL_SRGB_READ 0x8297 +#define GL_SRGB_WRITE 0x8298 +#define GL_FILTER 0x829A +#define GL_VERTEX_TEXTURE 0x829B +#define GL_TESS_CONTROL_TEXTURE 0x829C +#define GL_TESS_EVALUATION_TEXTURE 0x829D +#define GL_GEOMETRY_TEXTURE 0x829E +#define GL_FRAGMENT_TEXTURE 0x829F +#define GL_COMPUTE_TEXTURE 0x82A0 +#define GL_TEXTURE_SHADOW 0x82A1 +#define GL_TEXTURE_GATHER 0x82A2 +#define GL_TEXTURE_GATHER_SHADOW 0x82A3 +#define GL_SHADER_IMAGE_LOAD 0x82A4 +#define GL_SHADER_IMAGE_STORE 0x82A5 +#define GL_SHADER_IMAGE_ATOMIC 0x82A6 +#define GL_IMAGE_TEXEL_SIZE 0x82A7 +#define GL_IMAGE_COMPATIBILITY_CLASS 0x82A8 +#define GL_IMAGE_PIXEL_FORMAT 0x82A9 +#define GL_IMAGE_PIXEL_TYPE 0x82AA +#define GL_SIMULTANEOUS_TEXTURE_AND_DEPTH_TEST 0x82AC +#define GL_SIMULTANEOUS_TEXTURE_AND_STENCIL_TEST 0x82AD +#define GL_SIMULTANEOUS_TEXTURE_AND_DEPTH_WRITE 0x82AE +#define GL_SIMULTANEOUS_TEXTURE_AND_STENCIL_WRITE 0x82AF +#define GL_TEXTURE_COMPRESSED_BLOCK_WIDTH 0x82B1 +#define GL_TEXTURE_COMPRESSED_BLOCK_HEIGHT 0x82B2 +#define GL_TEXTURE_COMPRESSED_BLOCK_SIZE 0x82B3 +#define GL_CLEAR_BUFFER 0x82B4 +#define GL_TEXTURE_VIEW 0x82B5 +#define GL_VIEW_COMPATIBILITY_CLASS 0x82B6 +#define GL_FULL_SUPPORT 0x82B7 +#define GL_CAVEAT_SUPPORT 0x82B8 +#define GL_IMAGE_CLASS_4_X_32 0x82B9 +#define GL_IMAGE_CLASS_2_X_32 0x82BA +#define GL_IMAGE_CLASS_1_X_32 0x82BB +#define GL_IMAGE_CLASS_4_X_16 0x82BC +#define GL_IMAGE_CLASS_2_X_16 0x82BD +#define GL_IMAGE_CLASS_1_X_16 0x82BE +#define GL_IMAGE_CLASS_4_X_8 0x82BF +#define GL_IMAGE_CLASS_2_X_8 0x82C0 +#define GL_IMAGE_CLASS_1_X_8 0x82C1 +#define GL_IMAGE_CLASS_11_11_10 0x82C2 +#define GL_IMAGE_CLASS_10_10_10_2 0x82C3 +#define GL_VIEW_CLASS_128_BITS 0x82C4 +#define GL_VIEW_CLASS_96_BITS 0x82C5 +#define GL_VIEW_CLASS_64_BITS 0x82C6 +#define GL_VIEW_CLASS_48_BITS 0x82C7 +#define GL_VIEW_CLASS_32_BITS 0x82C8 +#define GL_VIEW_CLASS_24_BITS 0x82C9 +#define GL_VIEW_CLASS_16_BITS 0x82CA +#define GL_VIEW_CLASS_8_BITS 0x82CB +#define GL_VIEW_CLASS_S3TC_DXT1_RGB 0x82CC +#define GL_VIEW_CLASS_S3TC_DXT1_RGBA 0x82CD +#define GL_VIEW_CLASS_S3TC_DXT3_RGBA 0x82CE +#define GL_VIEW_CLASS_S3TC_DXT5_RGBA 0x82CF +#define GL_VIEW_CLASS_RGTC1_RED 0x82D0 +#define GL_VIEW_CLASS_RGTC2_RG 0x82D1 +#define GL_VIEW_CLASS_BPTC_UNORM 0x82D2 +#define GL_VIEW_CLASS_BPTC_FLOAT 0x82D3 +#define GL_UNIFORM 0x92E1 +#define GL_UNIFORM_BLOCK 0x92E2 +#define GL_PROGRAM_INPUT 0x92E3 +#define GL_PROGRAM_OUTPUT 0x92E4 +#define GL_BUFFER_VARIABLE 0x92E5 +#define GL_SHADER_STORAGE_BLOCK 0x92E6 +#define GL_VERTEX_SUBROUTINE 0x92E8 +#define GL_TESS_CONTROL_SUBROUTINE 0x92E9 +#define GL_TESS_EVALUATION_SUBROUTINE 0x92EA +#define GL_GEOMETRY_SUBROUTINE 0x92EB +#define GL_FRAGMENT_SUBROUTINE 0x92EC +#define GL_COMPUTE_SUBROUTINE 0x92ED +#define GL_VERTEX_SUBROUTINE_UNIFORM 0x92EE +#define GL_TESS_CONTROL_SUBROUTINE_UNIFORM 0x92EF +#define GL_TESS_EVALUATION_SUBROUTINE_UNIFORM 0x92F0 +#define GL_GEOMETRY_SUBROUTINE_UNIFORM 0x92F1 +#define GL_FRAGMENT_SUBROUTINE_UNIFORM 0x92F2 +#define GL_COMPUTE_SUBROUTINE_UNIFORM 0x92F3 +#define GL_TRANSFORM_FEEDBACK_VARYING 0x92F4 +#define GL_ACTIVE_RESOURCES 0x92F5 +#define GL_MAX_NAME_LENGTH 0x92F6 +#define GL_MAX_NUM_ACTIVE_VARIABLES 0x92F7 +#define GL_MAX_NUM_COMPATIBLE_SUBROUTINES 0x92F8 +#define GL_NAME_LENGTH 0x92F9 +#define GL_TYPE 0x92FA +#define GL_ARRAY_SIZE 0x92FB +#define GL_OFFSET 0x92FC +#define GL_BLOCK_INDEX 0x92FD +#define GL_ARRAY_STRIDE 0x92FE +#define GL_MATRIX_STRIDE 0x92FF +#define GL_IS_ROW_MAJOR 0x9300 +#define GL_ATOMIC_COUNTER_BUFFER_INDEX 0x9301 +#define GL_BUFFER_BINDING 0x9302 +#define GL_BUFFER_DATA_SIZE 0x9303 +#define GL_NUM_ACTIVE_VARIABLES 0x9304 +#define GL_ACTIVE_VARIABLES 0x9305 +#define GL_REFERENCED_BY_VERTEX_SHADER 0x9306 +#define GL_REFERENCED_BY_TESS_CONTROL_SHADER 0x9307 +#define GL_REFERENCED_BY_TESS_EVALUATION_SHADER 0x9308 +#define GL_REFERENCED_BY_GEOMETRY_SHADER 0x9309 +#define GL_REFERENCED_BY_FRAGMENT_SHADER 0x930A +#define GL_REFERENCED_BY_COMPUTE_SHADER 0x930B +#define GL_TOP_LEVEL_ARRAY_SIZE 0x930C +#define GL_TOP_LEVEL_ARRAY_STRIDE 0x930D +#define GL_LOCATION 0x930E +#define GL_LOCATION_INDEX 0x930F +#define GL_IS_PER_PATCH 0x92E7 +#define GL_SHADER_STORAGE_BUFFER 0x90D2 +#define GL_SHADER_STORAGE_BUFFER_BINDING 0x90D3 +#define GL_SHADER_STORAGE_BUFFER_START 0x90D4 +#define GL_SHADER_STORAGE_BUFFER_SIZE 0x90D5 +#define GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS 0x90D6 +#define GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS 0x90D7 +#define GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS 0x90D8 +#define GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS 0x90D9 +#define GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS 0x90DA +#define GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS 0x90DB +#define GL_MAX_COMBINED_SHADER_STORAGE_BLOCKS 0x90DC +#define GL_MAX_SHADER_STORAGE_BUFFER_BINDINGS 0x90DD +#define GL_MAX_SHADER_STORAGE_BLOCK_SIZE 0x90DE +#define GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT 0x90DF +#define GL_SHADER_STORAGE_BARRIER_BIT 0x00002000 +#define GL_MAX_COMBINED_SHADER_OUTPUT_RESOURCES 0x8F39 +#define GL_DEPTH_STENCIL_TEXTURE_MODE 0x90EA +#define GL_TEXTURE_BUFFER_OFFSET 0x919D +#define GL_TEXTURE_BUFFER_SIZE 0x919E +#define GL_TEXTURE_BUFFER_OFFSET_ALIGNMENT 0x919F +#define GL_TEXTURE_VIEW_MIN_LEVEL 0x82DB +#define GL_TEXTURE_VIEW_NUM_LEVELS 0x82DC +#define GL_TEXTURE_VIEW_MIN_LAYER 0x82DD +#define GL_TEXTURE_VIEW_NUM_LAYERS 0x82DE +#define GL_TEXTURE_IMMUTABLE_LEVELS 0x82DF +#define GL_VERTEX_ATTRIB_BINDING 0x82D4 +#define GL_VERTEX_ATTRIB_RELATIVE_OFFSET 0x82D5 +#define GL_VERTEX_BINDING_DIVISOR 0x82D6 +#define GL_VERTEX_BINDING_OFFSET 0x82D7 +#define GL_VERTEX_BINDING_STRIDE 0x82D8 +#define GL_MAX_VERTEX_ATTRIB_RELATIVE_OFFSET 0x82D9 +#define GL_MAX_VERTEX_ATTRIB_BINDINGS 0x82DA +#define GL_VERTEX_BINDING_BUFFER 0x8F4F +typedef void (APIENTRYP PFNGLCLEARBUFFERDATAPROC) (GLenum target, GLenum internalformat, GLenum format, GLenum type, const void *data); +typedef void (APIENTRYP PFNGLCLEARBUFFERSUBDATAPROC) (GLenum target, GLenum internalformat, GLintptr offset, GLsizeiptr size, GLenum format, GLenum type, const void *data); +typedef void (APIENTRYP PFNGLDISPATCHCOMPUTEPROC) (GLuint num_groups_x, GLuint num_groups_y, GLuint num_groups_z); +typedef void (APIENTRYP PFNGLDISPATCHCOMPUTEINDIRECTPROC) (GLintptr indirect); +typedef void (APIENTRYP PFNGLCOPYIMAGESUBDATAPROC) (GLuint srcName, GLenum srcTarget, GLint srcLevel, GLint srcX, GLint srcY, GLint srcZ, GLuint dstName, GLenum dstTarget, GLint dstLevel, GLint dstX, GLint dstY, GLint dstZ, GLsizei srcWidth, GLsizei srcHeight, GLsizei srcDepth); +typedef void (APIENTRYP PFNGLFRAMEBUFFERPARAMETERIPROC) (GLenum target, GLenum pname, GLint param); +typedef void (APIENTRYP PFNGLGETFRAMEBUFFERPARAMETERIVPROC) (GLenum target, GLenum pname, GLint *params); +typedef void (APIENTRYP PFNGLGETINTERNALFORMATI64VPROC) (GLenum target, GLenum internalformat, GLenum pname, GLsizei bufSize, GLint64 *params); +typedef void (APIENTRYP PFNGLINVALIDATETEXSUBIMAGEPROC) (GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth); +typedef void (APIENTRYP PFNGLINVALIDATETEXIMAGEPROC) (GLuint texture, GLint level); +typedef void (APIENTRYP PFNGLINVALIDATEBUFFERSUBDATAPROC) (GLuint buffer, GLintptr offset, GLsizeiptr length); +typedef void (APIENTRYP PFNGLINVALIDATEBUFFERDATAPROC) (GLuint buffer); +typedef void (APIENTRYP PFNGLINVALIDATEFRAMEBUFFERPROC) (GLenum target, GLsizei numAttachments, const GLenum *attachments); +typedef void (APIENTRYP PFNGLINVALIDATESUBFRAMEBUFFERPROC) (GLenum target, GLsizei numAttachments, const GLenum *attachments, GLint x, GLint y, GLsizei width, GLsizei height); +typedef void (APIENTRYP PFNGLMULTIDRAWARRAYSINDIRECTPROC) (GLenum mode, const void *indirect, GLsizei drawcount, GLsizei stride); +typedef void (APIENTRYP PFNGLMULTIDRAWELEMENTSINDIRECTPROC) (GLenum mode, GLenum type, const void *indirect, GLsizei drawcount, GLsizei stride); +typedef void (APIENTRYP PFNGLGETPROGRAMINTERFACEIVPROC) (GLuint program, GLenum programInterface, GLenum pname, GLint *params); +typedef GLuint (APIENTRYP PFNGLGETPROGRAMRESOURCEINDEXPROC) (GLuint program, GLenum programInterface, const GLchar *name); +typedef void (APIENTRYP PFNGLGETPROGRAMRESOURCENAMEPROC) (GLuint program, GLenum programInterface, GLuint index, GLsizei bufSize, GLsizei *length, GLchar *name); +typedef void (APIENTRYP PFNGLGETPROGRAMRESOURCEIVPROC) (GLuint program, GLenum programInterface, GLuint index, GLsizei propCount, const GLenum *props, GLsizei bufSize, GLsizei *length, GLint *params); +typedef GLint (APIENTRYP PFNGLGETPROGRAMRESOURCELOCATIONPROC) (GLuint program, GLenum programInterface, const GLchar *name); +typedef GLint (APIENTRYP PFNGLGETPROGRAMRESOURCELOCATIONINDEXPROC) (GLuint program, GLenum programInterface, const GLchar *name); +typedef void (APIENTRYP PFNGLSHADERSTORAGEBLOCKBINDINGPROC) (GLuint program, GLuint storageBlockIndex, GLuint storageBlockBinding); +typedef void (APIENTRYP PFNGLTEXBUFFERRANGEPROC) (GLenum target, GLenum internalformat, GLuint buffer, GLintptr offset, GLsizeiptr size); +typedef void (APIENTRYP PFNGLTEXSTORAGE2DMULTISAMPLEPROC) (GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height, GLboolean fixedsamplelocations); +typedef void (APIENTRYP PFNGLTEXSTORAGE3DMULTISAMPLEPROC) (GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth, GLboolean fixedsamplelocations); +typedef void (APIENTRYP PFNGLTEXTUREVIEWPROC) (GLuint texture, GLenum target, GLuint origtexture, GLenum internalformat, GLuint minlevel, GLuint numlevels, GLuint minlayer, GLuint numlayers); +typedef void (APIENTRYP PFNGLBINDVERTEXBUFFERPROC) (GLuint bindingindex, GLuint buffer, GLintptr offset, GLsizei stride); +typedef void (APIENTRYP PFNGLVERTEXATTRIBFORMATPROC) (GLuint attribindex, GLint size, GLenum type, GLboolean normalized, GLuint relativeoffset); +typedef void (APIENTRYP PFNGLVERTEXATTRIBIFORMATPROC) (GLuint attribindex, GLint size, GLenum type, GLuint relativeoffset); +typedef void (APIENTRYP PFNGLVERTEXATTRIBLFORMATPROC) (GLuint attribindex, GLint size, GLenum type, GLuint relativeoffset); +typedef void (APIENTRYP PFNGLVERTEXATTRIBBINDINGPROC) (GLuint attribindex, GLuint bindingindex); +typedef void (APIENTRYP PFNGLVERTEXBINDINGDIVISORPROC) (GLuint bindingindex, GLuint divisor); +typedef void (APIENTRYP PFNGLDEBUGMESSAGECONTROLPROC) (GLenum source, GLenum type, GLenum severity, GLsizei count, const GLuint *ids, GLboolean enabled); +typedef void (APIENTRYP PFNGLDEBUGMESSAGEINSERTPROC) (GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length, const GLchar *buf); +typedef void (APIENTRYP PFNGLDEBUGMESSAGECALLBACKPROC) (GLDEBUGPROC callback, const void *userParam); +typedef GLuint (APIENTRYP PFNGLGETDEBUGMESSAGELOGPROC) (GLuint count, GLsizei bufSize, GLenum *sources, GLenum *types, GLuint *ids, GLenum *severities, GLsizei *lengths, GLchar *messageLog); +typedef void (APIENTRYP PFNGLPUSHDEBUGGROUPPROC) (GLenum source, GLuint id, GLsizei length, const GLchar *message); +typedef void (APIENTRYP PFNGLPOPDEBUGGROUPPROC) (void); +typedef void (APIENTRYP PFNGLOBJECTLABELPROC) (GLenum identifier, GLuint name, GLsizei length, const GLchar *label); +typedef void (APIENTRYP PFNGLGETOBJECTLABELPROC) (GLenum identifier, GLuint name, GLsizei bufSize, GLsizei *length, GLchar *label); +typedef void (APIENTRYP PFNGLOBJECTPTRLABELPROC) (const void *ptr, GLsizei length, const GLchar *label); +typedef void (APIENTRYP PFNGLGETOBJECTPTRLABELPROC) (const void *ptr, GLsizei bufSize, GLsizei *length, GLchar *label); +#ifdef GL_GLEXT_PROTOTYPES +GLAPI void APIENTRY glClearBufferData (GLenum target, GLenum internalformat, GLenum format, GLenum type, const void *data); +GLAPI void APIENTRY glClearBufferSubData (GLenum target, GLenum internalformat, GLintptr offset, GLsizeiptr size, GLenum format, GLenum type, const void *data); +GLAPI void APIENTRY glDispatchCompute (GLuint num_groups_x, GLuint num_groups_y, GLuint num_groups_z); +GLAPI void APIENTRY glDispatchComputeIndirect (GLintptr indirect); +GLAPI void APIENTRY glCopyImageSubData (GLuint srcName, GLenum srcTarget, GLint srcLevel, GLint srcX, GLint srcY, GLint srcZ, GLuint dstName, GLenum dstTarget, GLint dstLevel, GLint dstX, GLint dstY, GLint dstZ, GLsizei srcWidth, GLsizei srcHeight, GLsizei srcDepth); +GLAPI void APIENTRY glFramebufferParameteri (GLenum target, GLenum pname, GLint param); +GLAPI void APIENTRY glGetFramebufferParameteriv (GLenum target, GLenum pname, GLint *params); +GLAPI void APIENTRY glGetInternalformati64v (GLenum target, GLenum internalformat, GLenum pname, GLsizei bufSize, GLint64 *params); +GLAPI void APIENTRY glInvalidateTexSubImage (GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth); +GLAPI void APIENTRY glInvalidateTexImage (GLuint texture, GLint level); +GLAPI void APIENTRY glInvalidateBufferSubData (GLuint buffer, GLintptr offset, GLsizeiptr length); +GLAPI void APIENTRY glInvalidateBufferData (GLuint buffer); +GLAPI void APIENTRY glInvalidateFramebuffer (GLenum target, GLsizei numAttachments, const GLenum *attachments); +GLAPI void APIENTRY glInvalidateSubFramebuffer (GLenum target, GLsizei numAttachments, const GLenum *attachments, GLint x, GLint y, GLsizei width, GLsizei height); +GLAPI void APIENTRY glMultiDrawArraysIndirect (GLenum mode, const void *indirect, GLsizei drawcount, GLsizei stride); +GLAPI void APIENTRY glMultiDrawElementsIndirect (GLenum mode, GLenum type, const void *indirect, GLsizei drawcount, GLsizei stride); +GLAPI void APIENTRY glGetProgramInterfaceiv (GLuint program, GLenum programInterface, GLenum pname, GLint *params); +GLAPI GLuint APIENTRY glGetProgramResourceIndex (GLuint program, GLenum programInterface, const GLchar *name); +GLAPI void APIENTRY glGetProgramResourceName (GLuint program, GLenum programInterface, GLuint index, GLsizei bufSize, GLsizei *length, GLchar *name); +GLAPI void APIENTRY glGetProgramResourceiv (GLuint program, GLenum programInterface, GLuint index, GLsizei propCount, const GLenum *props, GLsizei bufSize, GLsizei *length, GLint *params); +GLAPI GLint APIENTRY glGetProgramResourceLocation (GLuint program, GLenum programInterface, const GLchar *name); +GLAPI GLint APIENTRY glGetProgramResourceLocationIndex (GLuint program, GLenum programInterface, const GLchar *name); +GLAPI void APIENTRY glShaderStorageBlockBinding (GLuint program, GLuint storageBlockIndex, GLuint storageBlockBinding); +GLAPI void APIENTRY glTexBufferRange (GLenum target, GLenum internalformat, GLuint buffer, GLintptr offset, GLsizeiptr size); +GLAPI void APIENTRY glTexStorage2DMultisample (GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height, GLboolean fixedsamplelocations); +GLAPI void APIENTRY glTexStorage3DMultisample (GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth, GLboolean fixedsamplelocations); +GLAPI void APIENTRY glTextureView (GLuint texture, GLenum target, GLuint origtexture, GLenum internalformat, GLuint minlevel, GLuint numlevels, GLuint minlayer, GLuint numlayers); +GLAPI void APIENTRY glBindVertexBuffer (GLuint bindingindex, GLuint buffer, GLintptr offset, GLsizei stride); +GLAPI void APIENTRY glVertexAttribFormat (GLuint attribindex, GLint size, GLenum type, GLboolean normalized, GLuint relativeoffset); +GLAPI void APIENTRY glVertexAttribIFormat (GLuint attribindex, GLint size, GLenum type, GLuint relativeoffset); +GLAPI void APIENTRY glVertexAttribLFormat (GLuint attribindex, GLint size, GLenum type, GLuint relativeoffset); +GLAPI void APIENTRY glVertexAttribBinding (GLuint attribindex, GLuint bindingindex); +GLAPI void APIENTRY glVertexBindingDivisor (GLuint bindingindex, GLuint divisor); +GLAPI void APIENTRY glDebugMessageControl (GLenum source, GLenum type, GLenum severity, GLsizei count, const GLuint *ids, GLboolean enabled); +GLAPI void APIENTRY glDebugMessageInsert (GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length, const GLchar *buf); +GLAPI void APIENTRY glDebugMessageCallback (GLDEBUGPROC callback, const void *userParam); +GLAPI GLuint APIENTRY glGetDebugMessageLog (GLuint count, GLsizei bufSize, GLenum *sources, GLenum *types, GLuint *ids, GLenum *severities, GLsizei *lengths, GLchar *messageLog); +GLAPI void APIENTRY glPushDebugGroup (GLenum source, GLuint id, GLsizei length, const GLchar *message); +GLAPI void APIENTRY glPopDebugGroup (void); +GLAPI void APIENTRY glObjectLabel (GLenum identifier, GLuint name, GLsizei length, const GLchar *label); +GLAPI void APIENTRY glGetObjectLabel (GLenum identifier, GLuint name, GLsizei bufSize, GLsizei *length, GLchar *label); +GLAPI void APIENTRY glObjectPtrLabel (const void *ptr, GLsizei length, const GLchar *label); +GLAPI void APIENTRY glGetObjectPtrLabel (const void *ptr, GLsizei bufSize, GLsizei *length, GLchar *label); +#endif +#endif /* GL_VERSION_4_3 */ + +#ifndef GL_VERSION_4_4 +#define GL_VERSION_4_4 1 +#define GL_MAX_VERTEX_ATTRIB_STRIDE 0x82E5 +#define GL_PRIMITIVE_RESTART_FOR_PATCHES_SUPPORTED 0x8221 +#define GL_TEXTURE_BUFFER_BINDING 0x8C2A +#define GL_MAP_PERSISTENT_BIT 0x0040 +#define GL_MAP_COHERENT_BIT 0x0080 +#define GL_DYNAMIC_STORAGE_BIT 0x0100 +#define GL_CLIENT_STORAGE_BIT 0x0200 +#define GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT 0x00004000 +#define GL_BUFFER_IMMUTABLE_STORAGE 0x821F +#define GL_BUFFER_STORAGE_FLAGS 0x8220 +#define GL_CLEAR_TEXTURE 0x9365 +#define GL_LOCATION_COMPONENT 0x934A +#define GL_TRANSFORM_FEEDBACK_BUFFER_INDEX 0x934B +#define GL_TRANSFORM_FEEDBACK_BUFFER_STRIDE 0x934C +#define GL_QUERY_BUFFER 0x9192 +#define GL_QUERY_BUFFER_BARRIER_BIT 0x00008000 +#define GL_QUERY_BUFFER_BINDING 0x9193 +#define GL_QUERY_RESULT_NO_WAIT 0x9194 +#define GL_MIRROR_CLAMP_TO_EDGE 0x8743 +typedef void (APIENTRYP PFNGLBUFFERSTORAGEPROC) (GLenum target, GLsizeiptr size, const void *data, GLbitfield flags); +typedef void (APIENTRYP PFNGLCLEARTEXIMAGEPROC) (GLuint texture, GLint level, GLenum format, GLenum type, const void *data); +typedef void (APIENTRYP PFNGLCLEARTEXSUBIMAGEPROC) (GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, const void *data); +typedef void (APIENTRYP PFNGLBINDBUFFERSBASEPROC) (GLenum target, GLuint first, GLsizei count, const GLuint *buffers); +typedef void (APIENTRYP PFNGLBINDBUFFERSRANGEPROC) (GLenum target, GLuint first, GLsizei count, const GLuint *buffers, const GLintptr *offsets, const GLsizeiptr *sizes); +typedef void (APIENTRYP PFNGLBINDTEXTURESPROC) (GLuint first, GLsizei count, const GLuint *textures); +typedef void (APIENTRYP PFNGLBINDSAMPLERSPROC) (GLuint first, GLsizei count, const GLuint *samplers); +typedef void (APIENTRYP PFNGLBINDIMAGETEXTURESPROC) (GLuint first, GLsizei count, const GLuint *textures); +typedef void (APIENTRYP PFNGLBINDVERTEXBUFFERSPROC) (GLuint first, GLsizei count, const GLuint *buffers, const GLintptr *offsets, const GLsizei *strides); +#ifdef GL_GLEXT_PROTOTYPES +GLAPI void APIENTRY glBufferStorage (GLenum target, GLsizeiptr size, const void *data, GLbitfield flags); +GLAPI void APIENTRY glClearTexImage (GLuint texture, GLint level, GLenum format, GLenum type, const void *data); +GLAPI void APIENTRY glClearTexSubImage (GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, const void *data); +GLAPI void APIENTRY glBindBuffersBase (GLenum target, GLuint first, GLsizei count, const GLuint *buffers); +GLAPI void APIENTRY glBindBuffersRange (GLenum target, GLuint first, GLsizei count, const GLuint *buffers, const GLintptr *offsets, const GLsizeiptr *sizes); +GLAPI void APIENTRY glBindTextures (GLuint first, GLsizei count, const GLuint *textures); +GLAPI void APIENTRY glBindSamplers (GLuint first, GLsizei count, const GLuint *samplers); +GLAPI void APIENTRY glBindImageTextures (GLuint first, GLsizei count, const GLuint *textures); +GLAPI void APIENTRY glBindVertexBuffers (GLuint first, GLsizei count, const GLuint *buffers, const GLintptr *offsets, const GLsizei *strides); +#endif +#endif /* GL_VERSION_4_4 */ + +#ifndef GL_VERSION_4_5 +#define GL_VERSION_4_5 1 +#define GL_CONTEXT_LOST 0x0507 +#define GL_NEGATIVE_ONE_TO_ONE 0x935E +#define GL_ZERO_TO_ONE 0x935F +#define GL_CLIP_ORIGIN 0x935C +#define GL_CLIP_DEPTH_MODE 0x935D +#define GL_QUERY_WAIT_INVERTED 0x8E17 +#define GL_QUERY_NO_WAIT_INVERTED 0x8E18 +#define GL_QUERY_BY_REGION_WAIT_INVERTED 0x8E19 +#define GL_QUERY_BY_REGION_NO_WAIT_INVERTED 0x8E1A +#define GL_MAX_CULL_DISTANCES 0x82F9 +#define GL_MAX_COMBINED_CLIP_AND_CULL_DISTANCES 0x82FA +#define GL_TEXTURE_TARGET 0x1006 +#define GL_QUERY_TARGET 0x82EA +#define GL_TEXTURE_BINDING 0x82EB +#define GL_GUILTY_CONTEXT_RESET 0x8253 +#define GL_INNOCENT_CONTEXT_RESET 0x8254 +#define GL_UNKNOWN_CONTEXT_RESET 0x8255 +#define GL_RESET_NOTIFICATION_STRATEGY 0x8256 +#define GL_LOSE_CONTEXT_ON_RESET 0x8252 +#define GL_NO_RESET_NOTIFICATION 0x8261 +#define GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT 0x00000004 +#define GL_CONTEXT_RELEASE_BEHAVIOR 0x82FB +#define GL_CONTEXT_RELEASE_BEHAVIOR_FLUSH 0x82FC +typedef void (APIENTRYP PFNGLCLIPCONTROLPROC) (GLenum origin, GLenum depth); +typedef void (APIENTRYP PFNGLCREATETRANSFORMFEEDBACKSPROC) (GLsizei n, GLuint *ids); +typedef void (APIENTRYP PFNGLTRANSFORMFEEDBACKBUFFERBASEPROC) (GLuint xfb, GLuint index, GLuint buffer); +typedef void (APIENTRYP PFNGLTRANSFORMFEEDBACKBUFFERRANGEPROC) (GLuint xfb, GLuint index, GLuint buffer, GLintptr offset, GLsizei size); +typedef void (APIENTRYP PFNGLGETTRANSFORMFEEDBACKIVPROC) (GLuint xfb, GLenum pname, GLint *param); +typedef void (APIENTRYP PFNGLGETTRANSFORMFEEDBACKI_VPROC) (GLuint xfb, GLenum pname, GLuint index, GLint *param); +typedef void (APIENTRYP PFNGLGETTRANSFORMFEEDBACKI64_VPROC) (GLuint xfb, GLenum pname, GLuint index, GLint64 *param); +typedef void (APIENTRYP PFNGLCREATEBUFFERSPROC) (GLsizei n, GLuint *buffers); +typedef void (APIENTRYP PFNGLNAMEDBUFFERSTORAGEPROC) (GLuint buffer, GLsizei size, const void *data, GLbitfield flags); +typedef void (APIENTRYP PFNGLNAMEDBUFFERDATAPROC) (GLuint buffer, GLsizei size, const void *data, GLenum usage); +typedef void (APIENTRYP PFNGLNAMEDBUFFERSUBDATAPROC) (GLuint buffer, GLintptr offset, GLsizei size, const void *data); +typedef void (APIENTRYP PFNGLCOPYNAMEDBUFFERSUBDATAPROC) (GLuint readBuffer, GLuint writeBuffer, GLintptr readOffset, GLintptr writeOffset, GLsizei size); +typedef void (APIENTRYP PFNGLCLEARNAMEDBUFFERDATAPROC) (GLuint buffer, GLenum internalformat, GLenum format, GLenum type, const void *data); +typedef void (APIENTRYP PFNGLCLEARNAMEDBUFFERSUBDATAPROC) (GLuint buffer, GLenum internalformat, GLintptr offset, GLsizei size, GLenum format, GLenum type, const void *data); +typedef void *(APIENTRYP PFNGLMAPNAMEDBUFFERPROC) (GLuint buffer, GLenum access); +typedef void *(APIENTRYP PFNGLMAPNAMEDBUFFERRANGEPROC) (GLuint buffer, GLintptr offset, GLsizei length, GLbitfield access); +typedef GLboolean (APIENTRYP PFNGLUNMAPNAMEDBUFFERPROC) (GLuint buffer); +typedef void (APIENTRYP PFNGLFLUSHMAPPEDNAMEDBUFFERRANGEPROC) (GLuint buffer, GLintptr offset, GLsizei length); +typedef void (APIENTRYP PFNGLGETNAMEDBUFFERPARAMETERIVPROC) (GLuint buffer, GLenum pname, GLint *params); +typedef void (APIENTRYP PFNGLGETNAMEDBUFFERPARAMETERI64VPROC) (GLuint buffer, GLenum pname, GLint64 *params); +typedef void (APIENTRYP PFNGLGETNAMEDBUFFERPOINTERVPROC) (GLuint buffer, GLenum pname, void **params); +typedef void (APIENTRYP PFNGLGETNAMEDBUFFERSUBDATAPROC) (GLuint buffer, GLintptr offset, GLsizei size, void *data); +typedef void (APIENTRYP PFNGLCREATEFRAMEBUFFERSPROC) (GLsizei n, GLuint *framebuffers); +typedef void (APIENTRYP PFNGLNAMEDFRAMEBUFFERRENDERBUFFERPROC) (GLuint framebuffer, GLenum attachment, GLenum renderbuffertarget, GLuint renderbuffer); +typedef void (APIENTRYP PFNGLNAMEDFRAMEBUFFERPARAMETERIPROC) (GLuint framebuffer, GLenum pname, GLint param); +typedef void (APIENTRYP PFNGLNAMEDFRAMEBUFFERTEXTUREPROC) (GLuint framebuffer, GLenum attachment, GLuint texture, GLint level); +typedef void (APIENTRYP PFNGLNAMEDFRAMEBUFFERTEXTURELAYERPROC) (GLuint framebuffer, GLenum attachment, GLuint texture, GLint level, GLint layer); +typedef void (APIENTRYP PFNGLNAMEDFRAMEBUFFERDRAWBUFFERPROC) (GLuint framebuffer, GLenum buf); +typedef void (APIENTRYP PFNGLNAMEDFRAMEBUFFERDRAWBUFFERSPROC) (GLuint framebuffer, GLsizei n, const GLenum *bufs); +typedef void (APIENTRYP PFNGLNAMEDFRAMEBUFFERREADBUFFERPROC) (GLuint framebuffer, GLenum src); +typedef void (APIENTRYP PFNGLINVALIDATENAMEDFRAMEBUFFERDATAPROC) (GLuint framebuffer, GLsizei numAttachments, const GLenum *attachments); +typedef void (APIENTRYP PFNGLINVALIDATENAMEDFRAMEBUFFERSUBDATAPROC) (GLuint framebuffer, GLsizei numAttachments, const GLenum *attachments, GLint x, GLint y, GLsizei width, GLsizei height); +typedef void (APIENTRYP PFNGLCLEARNAMEDFRAMEBUFFERIVPROC) (GLuint framebuffer, GLenum buffer, GLint drawbuffer, const GLint *value); +typedef void (APIENTRYP PFNGLCLEARNAMEDFRAMEBUFFERUIVPROC) (GLuint framebuffer, GLenum buffer, GLint drawbuffer, const GLuint *value); +typedef void (APIENTRYP PFNGLCLEARNAMEDFRAMEBUFFERFVPROC) (GLuint framebuffer, GLenum buffer, GLint drawbuffer, const GLfloat *value); +typedef void (APIENTRYP PFNGLCLEARNAMEDFRAMEBUFFERFIPROC) (GLuint framebuffer, GLenum buffer, const GLfloat depth, GLint stencil); +typedef void (APIENTRYP PFNGLBLITNAMEDFRAMEBUFFERPROC) (GLuint readFramebuffer, GLuint drawFramebuffer, GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, GLbitfield mask, GLenum filter); +typedef GLenum (APIENTRYP PFNGLCHECKNAMEDFRAMEBUFFERSTATUSPROC) (GLuint framebuffer, GLenum target); +typedef void (APIENTRYP PFNGLGETNAMEDFRAMEBUFFERPARAMETERIVPROC) (GLuint framebuffer, GLenum pname, GLint *param); +typedef void (APIENTRYP PFNGLGETNAMEDFRAMEBUFFERATTACHMENTPARAMETERIVPROC) (GLuint framebuffer, GLenum attachment, GLenum pname, GLint *params); +typedef void (APIENTRYP PFNGLCREATERENDERBUFFERSPROC) (GLsizei n, GLuint *renderbuffers); +typedef void (APIENTRYP PFNGLNAMEDRENDERBUFFERSTORAGEPROC) (GLuint renderbuffer, GLenum internalformat, GLsizei width, GLsizei height); +typedef void (APIENTRYP PFNGLNAMEDRENDERBUFFERSTORAGEMULTISAMPLEPROC) (GLuint renderbuffer, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height); +typedef void (APIENTRYP PFNGLGETNAMEDRENDERBUFFERPARAMETERIVPROC) (GLuint renderbuffer, GLenum pname, GLint *params); +typedef void (APIENTRYP PFNGLCREATETEXTURESPROC) (GLenum target, GLsizei n, GLuint *textures); +typedef void (APIENTRYP PFNGLTEXTUREBUFFERPROC) (GLuint texture, GLenum internalformat, GLuint buffer); +typedef void (APIENTRYP PFNGLTEXTUREBUFFERRANGEPROC) (GLuint texture, GLenum internalformat, GLuint buffer, GLintptr offset, GLsizei size); +typedef void (APIENTRYP PFNGLTEXTURESTORAGE1DPROC) (GLuint texture, GLsizei levels, GLenum internalformat, GLsizei width); +typedef void (APIENTRYP PFNGLTEXTURESTORAGE2DPROC) (GLuint texture, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height); +typedef void (APIENTRYP PFNGLTEXTURESTORAGE3DPROC) (GLuint texture, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth); +typedef void (APIENTRYP PFNGLTEXTURESTORAGE2DMULTISAMPLEPROC) (GLuint texture, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height, GLboolean fixedsamplelocations); +typedef void (APIENTRYP PFNGLTEXTURESTORAGE3DMULTISAMPLEPROC) (GLuint texture, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth, GLboolean fixedsamplelocations); +typedef void (APIENTRYP PFNGLTEXTURESUBIMAGE1DPROC) (GLuint texture, GLint level, GLint xoffset, GLsizei width, GLenum format, GLenum type, const void *pixels); +typedef void (APIENTRYP PFNGLTEXTURESUBIMAGE2DPROC) (GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLenum type, const void *pixels); +typedef void (APIENTRYP PFNGLTEXTURESUBIMAGE3DPROC) (GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, const void *pixels); +typedef void (APIENTRYP PFNGLCOMPRESSEDTEXTURESUBIMAGE1DPROC) (GLuint texture, GLint level, GLint xoffset, GLsizei width, GLenum format, GLsizei imageSize, const void *data); +typedef void (APIENTRYP PFNGLCOMPRESSEDTEXTURESUBIMAGE2DPROC) (GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLsizei imageSize, const void *data); +typedef void (APIENTRYP PFNGLCOMPRESSEDTEXTURESUBIMAGE3DPROC) (GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLsizei imageSize, const void *data); +typedef void (APIENTRYP PFNGLCOPYTEXTURESUBIMAGE1DPROC) (GLuint texture, GLint level, GLint xoffset, GLint x, GLint y, GLsizei width); +typedef void (APIENTRYP PFNGLCOPYTEXTURESUBIMAGE2DPROC) (GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint x, GLint y, GLsizei width, GLsizei height); +typedef void (APIENTRYP PFNGLCOPYTEXTURESUBIMAGE3DPROC) (GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLint x, GLint y, GLsizei width, GLsizei height); +typedef void (APIENTRYP PFNGLTEXTUREPARAMETERFPROC) (GLuint texture, GLenum pname, GLfloat param); +typedef void (APIENTRYP PFNGLTEXTUREPARAMETERFVPROC) (GLuint texture, GLenum pname, const GLfloat *param); +typedef void (APIENTRYP PFNGLTEXTUREPARAMETERIPROC) (GLuint texture, GLenum pname, GLint param); +typedef void (APIENTRYP PFNGLTEXTUREPARAMETERIIVPROC) (GLuint texture, GLenum pname, const GLint *params); +typedef void (APIENTRYP PFNGLTEXTUREPARAMETERIUIVPROC) (GLuint texture, GLenum pname, const GLuint *params); +typedef void (APIENTRYP PFNGLTEXTUREPARAMETERIVPROC) (GLuint texture, GLenum pname, const GLint *param); +typedef void (APIENTRYP PFNGLGENERATETEXTUREMIPMAPPROC) (GLuint texture); +typedef void (APIENTRYP PFNGLBINDTEXTUREUNITPROC) (GLuint unit, GLuint texture); +typedef void (APIENTRYP PFNGLGETTEXTUREIMAGEPROC) (GLuint texture, GLint level, GLenum format, GLenum type, GLsizei bufSize, void *pixels); +typedef void (APIENTRYP PFNGLGETCOMPRESSEDTEXTUREIMAGEPROC) (GLuint texture, GLint level, GLsizei bufSize, void *pixels); +typedef void (APIENTRYP PFNGLGETTEXTURELEVELPARAMETERFVPROC) (GLuint texture, GLint level, GLenum pname, GLfloat *params); +typedef void (APIENTRYP PFNGLGETTEXTURELEVELPARAMETERIVPROC) (GLuint texture, GLint level, GLenum pname, GLint *params); +typedef void (APIENTRYP PFNGLGETTEXTUREPARAMETERFVPROC) (GLuint texture, GLenum pname, GLfloat *params); +typedef void (APIENTRYP PFNGLGETTEXTUREPARAMETERIIVPROC) (GLuint texture, GLenum pname, GLint *params); +typedef void (APIENTRYP PFNGLGETTEXTUREPARAMETERIUIVPROC) (GLuint texture, GLenum pname, GLuint *params); +typedef void (APIENTRYP PFNGLGETTEXTUREPARAMETERIVPROC) (GLuint texture, GLenum pname, GLint *params); +typedef void (APIENTRYP PFNGLCREATEVERTEXARRAYSPROC) (GLsizei n, GLuint *arrays); +typedef void (APIENTRYP PFNGLDISABLEVERTEXARRAYATTRIBPROC) (GLuint vaobj, GLuint index); +typedef void (APIENTRYP PFNGLENABLEVERTEXARRAYATTRIBPROC) (GLuint vaobj, GLuint index); +typedef void (APIENTRYP PFNGLVERTEXARRAYELEMENTBUFFERPROC) (GLuint vaobj, GLuint buffer); +typedef void (APIENTRYP PFNGLVERTEXARRAYVERTEXBUFFERPROC) (GLuint vaobj, GLuint bindingindex, GLuint buffer, GLintptr offset, GLsizei stride); +typedef void (APIENTRYP PFNGLVERTEXARRAYVERTEXBUFFERSPROC) (GLuint vaobj, GLuint first, GLsizei count, const GLuint *buffers, const GLintptr *offsets, const GLsizei *strides); +typedef void (APIENTRYP PFNGLVERTEXARRAYATTRIBBINDINGPROC) (GLuint vaobj, GLuint attribindex, GLuint bindingindex); +typedef void (APIENTRYP PFNGLVERTEXARRAYATTRIBFORMATPROC) (GLuint vaobj, GLuint attribindex, GLint size, GLenum type, GLboolean normalized, GLuint relativeoffset); +typedef void (APIENTRYP PFNGLVERTEXARRAYATTRIBIFORMATPROC) (GLuint vaobj, GLuint attribindex, GLint size, GLenum type, GLuint relativeoffset); +typedef void (APIENTRYP PFNGLVERTEXARRAYATTRIBLFORMATPROC) (GLuint vaobj, GLuint attribindex, GLint size, GLenum type, GLuint relativeoffset); +typedef void (APIENTRYP PFNGLVERTEXARRAYBINDINGDIVISORPROC) (GLuint vaobj, GLuint bindingindex, GLuint divisor); +typedef void (APIENTRYP PFNGLGETVERTEXARRAYIVPROC) (GLuint vaobj, GLenum pname, GLint *param); +typedef void (APIENTRYP PFNGLGETVERTEXARRAYINDEXEDIVPROC) (GLuint vaobj, GLuint index, GLenum pname, GLint *param); +typedef void (APIENTRYP PFNGLGETVERTEXARRAYINDEXED64IVPROC) (GLuint vaobj, GLuint index, GLenum pname, GLint64 *param); +typedef void (APIENTRYP PFNGLCREATESAMPLERSPROC) (GLsizei n, GLuint *samplers); +typedef void (APIENTRYP PFNGLCREATEPROGRAMPIPELINESPROC) (GLsizei n, GLuint *pipelines); +typedef void (APIENTRYP PFNGLCREATEQUERIESPROC) (GLenum target, GLsizei n, GLuint *ids); +typedef void (APIENTRYP PFNGLGETQUERYBUFFEROBJECTI64VPROC) (GLuint id, GLuint buffer, GLenum pname, GLintptr offset); +typedef void (APIENTRYP PFNGLGETQUERYBUFFEROBJECTIVPROC) (GLuint id, GLuint buffer, GLenum pname, GLintptr offset); +typedef void (APIENTRYP PFNGLGETQUERYBUFFEROBJECTUI64VPROC) (GLuint id, GLuint buffer, GLenum pname, GLintptr offset); +typedef void (APIENTRYP PFNGLGETQUERYBUFFEROBJECTUIVPROC) (GLuint id, GLuint buffer, GLenum pname, GLintptr offset); +typedef void (APIENTRYP PFNGLMEMORYBARRIERBYREGIONPROC) (GLbitfield barriers); +typedef void (APIENTRYP PFNGLGETTEXTURESUBIMAGEPROC) (GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, GLsizei bufSize, void *pixels); +typedef void (APIENTRYP PFNGLGETCOMPRESSEDTEXTURESUBIMAGEPROC) (GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLsizei bufSize, void *pixels); +typedef GLenum (APIENTRYP PFNGLGETGRAPHICSRESETSTATUSPROC) (void); +typedef void (APIENTRYP PFNGLGETNCOMPRESSEDTEXIMAGEPROC) (GLenum target, GLint lod, GLsizei bufSize, void *pixels); +typedef void (APIENTRYP PFNGLGETNTEXIMAGEPROC) (GLenum target, GLint level, GLenum format, GLenum type, GLsizei bufSize, void *pixels); +typedef void (APIENTRYP PFNGLGETNUNIFORMDVPROC) (GLuint program, GLint location, GLsizei bufSize, GLdouble *params); +typedef void (APIENTRYP PFNGLGETNUNIFORMFVPROC) (GLuint program, GLint location, GLsizei bufSize, GLfloat *params); +typedef void (APIENTRYP PFNGLGETNUNIFORMIVPROC) (GLuint program, GLint location, GLsizei bufSize, GLint *params); +typedef void (APIENTRYP PFNGLGETNUNIFORMUIVPROC) (GLuint program, GLint location, GLsizei bufSize, GLuint *params); +typedef void (APIENTRYP PFNGLREADNPIXELSPROC) (GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, GLsizei bufSize, void *data); +typedef void (APIENTRYP PFNGLTEXTUREBARRIERPROC) (void); +#ifdef GL_GLEXT_PROTOTYPES +GLAPI void APIENTRY glClipControl (GLenum origin, GLenum depth); +GLAPI void APIENTRY glCreateTransformFeedbacks (GLsizei n, GLuint *ids); +GLAPI void APIENTRY glTransformFeedbackBufferBase (GLuint xfb, GLuint index, GLuint buffer); +GLAPI void APIENTRY glTransformFeedbackBufferRange (GLuint xfb, GLuint index, GLuint buffer, GLintptr offset, GLsizei size); +GLAPI void APIENTRY glGetTransformFeedbackiv (GLuint xfb, GLenum pname, GLint *param); +GLAPI void APIENTRY glGetTransformFeedbacki_v (GLuint xfb, GLenum pname, GLuint index, GLint *param); +GLAPI void APIENTRY glGetTransformFeedbacki64_v (GLuint xfb, GLenum pname, GLuint index, GLint64 *param); +GLAPI void APIENTRY glCreateBuffers (GLsizei n, GLuint *buffers); +GLAPI void APIENTRY glNamedBufferStorage (GLuint buffer, GLsizei size, const void *data, GLbitfield flags); +GLAPI void APIENTRY glNamedBufferData (GLuint buffer, GLsizei size, const void *data, GLenum usage); +GLAPI void APIENTRY glNamedBufferSubData (GLuint buffer, GLintptr offset, GLsizei size, const void *data); +GLAPI void APIENTRY glCopyNamedBufferSubData (GLuint readBuffer, GLuint writeBuffer, GLintptr readOffset, GLintptr writeOffset, GLsizei size); +GLAPI void APIENTRY glClearNamedBufferData (GLuint buffer, GLenum internalformat, GLenum format, GLenum type, const void *data); +GLAPI void APIENTRY glClearNamedBufferSubData (GLuint buffer, GLenum internalformat, GLintptr offset, GLsizei size, GLenum format, GLenum type, const void *data); +GLAPI void *APIENTRY glMapNamedBuffer (GLuint buffer, GLenum access); +GLAPI void *APIENTRY glMapNamedBufferRange (GLuint buffer, GLintptr offset, GLsizei length, GLbitfield access); +GLAPI GLboolean APIENTRY glUnmapNamedBuffer (GLuint buffer); +GLAPI void APIENTRY glFlushMappedNamedBufferRange (GLuint buffer, GLintptr offset, GLsizei length); +GLAPI void APIENTRY glGetNamedBufferParameteriv (GLuint buffer, GLenum pname, GLint *params); +GLAPI void APIENTRY glGetNamedBufferParameteri64v (GLuint buffer, GLenum pname, GLint64 *params); +GLAPI void APIENTRY glGetNamedBufferPointerv (GLuint buffer, GLenum pname, void **params); +GLAPI void APIENTRY glGetNamedBufferSubData (GLuint buffer, GLintptr offset, GLsizei size, void *data); +GLAPI void APIENTRY glCreateFramebuffers (GLsizei n, GLuint *framebuffers); +GLAPI void APIENTRY glNamedFramebufferRenderbuffer (GLuint framebuffer, GLenum attachment, GLenum renderbuffertarget, GLuint renderbuffer); +GLAPI void APIENTRY glNamedFramebufferParameteri (GLuint framebuffer, GLenum pname, GLint param); +GLAPI void APIENTRY glNamedFramebufferTexture (GLuint framebuffer, GLenum attachment, GLuint texture, GLint level); +GLAPI void APIENTRY glNamedFramebufferTextureLayer (GLuint framebuffer, GLenum attachment, GLuint texture, GLint level, GLint layer); +GLAPI void APIENTRY glNamedFramebufferDrawBuffer (GLuint framebuffer, GLenum buf); +GLAPI void APIENTRY glNamedFramebufferDrawBuffers (GLuint framebuffer, GLsizei n, const GLenum *bufs); +GLAPI void APIENTRY glNamedFramebufferReadBuffer (GLuint framebuffer, GLenum src); +GLAPI void APIENTRY glInvalidateNamedFramebufferData (GLuint framebuffer, GLsizei numAttachments, const GLenum *attachments); +GLAPI void APIENTRY glInvalidateNamedFramebufferSubData (GLuint framebuffer, GLsizei numAttachments, const GLenum *attachments, GLint x, GLint y, GLsizei width, GLsizei height); +GLAPI void APIENTRY glClearNamedFramebufferiv (GLuint framebuffer, GLenum buffer, GLint drawbuffer, const GLint *value); +GLAPI void APIENTRY glClearNamedFramebufferuiv (GLuint framebuffer, GLenum buffer, GLint drawbuffer, const GLuint *value); +GLAPI void APIENTRY glClearNamedFramebufferfv (GLuint framebuffer, GLenum buffer, GLint drawbuffer, const GLfloat *value); +GLAPI void APIENTRY glClearNamedFramebufferfi (GLuint framebuffer, GLenum buffer, const GLfloat depth, GLint stencil); +GLAPI void APIENTRY glBlitNamedFramebuffer (GLuint readFramebuffer, GLuint drawFramebuffer, GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, GLbitfield mask, GLenum filter); +GLAPI GLenum APIENTRY glCheckNamedFramebufferStatus (GLuint framebuffer, GLenum target); +GLAPI void APIENTRY glGetNamedFramebufferParameteriv (GLuint framebuffer, GLenum pname, GLint *param); +GLAPI void APIENTRY glGetNamedFramebufferAttachmentParameteriv (GLuint framebuffer, GLenum attachment, GLenum pname, GLint *params); +GLAPI void APIENTRY glCreateRenderbuffers (GLsizei n, GLuint *renderbuffers); +GLAPI void APIENTRY glNamedRenderbufferStorage (GLuint renderbuffer, GLenum internalformat, GLsizei width, GLsizei height); +GLAPI void APIENTRY glNamedRenderbufferStorageMultisample (GLuint renderbuffer, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height); +GLAPI void APIENTRY glGetNamedRenderbufferParameteriv (GLuint renderbuffer, GLenum pname, GLint *params); +GLAPI void APIENTRY glCreateTextures (GLenum target, GLsizei n, GLuint *textures); +GLAPI void APIENTRY glTextureBuffer (GLuint texture, GLenum internalformat, GLuint buffer); +GLAPI void APIENTRY glTextureBufferRange (GLuint texture, GLenum internalformat, GLuint buffer, GLintptr offset, GLsizei size); +GLAPI void APIENTRY glTextureStorage1D (GLuint texture, GLsizei levels, GLenum internalformat, GLsizei width); +GLAPI void APIENTRY glTextureStorage2D (GLuint texture, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height); +GLAPI void APIENTRY glTextureStorage3D (GLuint texture, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth); +GLAPI void APIENTRY glTextureStorage2DMultisample (GLuint texture, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height, GLboolean fixedsamplelocations); +GLAPI void APIENTRY glTextureStorage3DMultisample (GLuint texture, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth, GLboolean fixedsamplelocations); +GLAPI void APIENTRY glTextureSubImage1D (GLuint texture, GLint level, GLint xoffset, GLsizei width, GLenum format, GLenum type, const void *pixels); +GLAPI void APIENTRY glTextureSubImage2D (GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLenum type, const void *pixels); +GLAPI void APIENTRY glTextureSubImage3D (GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, const void *pixels); +GLAPI void APIENTRY glCompressedTextureSubImage1D (GLuint texture, GLint level, GLint xoffset, GLsizei width, GLenum format, GLsizei imageSize, const void *data); +GLAPI void APIENTRY glCompressedTextureSubImage2D (GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLsizei imageSize, const void *data); +GLAPI void APIENTRY glCompressedTextureSubImage3D (GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLsizei imageSize, const void *data); +GLAPI void APIENTRY glCopyTextureSubImage1D (GLuint texture, GLint level, GLint xoffset, GLint x, GLint y, GLsizei width); +GLAPI void APIENTRY glCopyTextureSubImage2D (GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint x, GLint y, GLsizei width, GLsizei height); +GLAPI void APIENTRY glCopyTextureSubImage3D (GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLint x, GLint y, GLsizei width, GLsizei height); +GLAPI void APIENTRY glTextureParameterf (GLuint texture, GLenum pname, GLfloat param); +GLAPI void APIENTRY glTextureParameterfv (GLuint texture, GLenum pname, const GLfloat *param); +GLAPI void APIENTRY glTextureParameteri (GLuint texture, GLenum pname, GLint param); +GLAPI void APIENTRY glTextureParameterIiv (GLuint texture, GLenum pname, const GLint *params); +GLAPI void APIENTRY glTextureParameterIuiv (GLuint texture, GLenum pname, const GLuint *params); +GLAPI void APIENTRY glTextureParameteriv (GLuint texture, GLenum pname, const GLint *param); +GLAPI void APIENTRY glGenerateTextureMipmap (GLuint texture); +GLAPI void APIENTRY glBindTextureUnit (GLuint unit, GLuint texture); +GLAPI void APIENTRY glGetTextureImage (GLuint texture, GLint level, GLenum format, GLenum type, GLsizei bufSize, void *pixels); +GLAPI void APIENTRY glGetCompressedTextureImage (GLuint texture, GLint level, GLsizei bufSize, void *pixels); +GLAPI void APIENTRY glGetTextureLevelParameterfv (GLuint texture, GLint level, GLenum pname, GLfloat *params); +GLAPI void APIENTRY glGetTextureLevelParameteriv (GLuint texture, GLint level, GLenum pname, GLint *params); +GLAPI void APIENTRY glGetTextureParameterfv (GLuint texture, GLenum pname, GLfloat *params); +GLAPI void APIENTRY glGetTextureParameterIiv (GLuint texture, GLenum pname, GLint *params); +GLAPI void APIENTRY glGetTextureParameterIuiv (GLuint texture, GLenum pname, GLuint *params); +GLAPI void APIENTRY glGetTextureParameteriv (GLuint texture, GLenum pname, GLint *params); +GLAPI void APIENTRY glCreateVertexArrays (GLsizei n, GLuint *arrays); +GLAPI void APIENTRY glDisableVertexArrayAttrib (GLuint vaobj, GLuint index); +GLAPI void APIENTRY glEnableVertexArrayAttrib (GLuint vaobj, GLuint index); +GLAPI void APIENTRY glVertexArrayElementBuffer (GLuint vaobj, GLuint buffer); +GLAPI void APIENTRY glVertexArrayVertexBuffer (GLuint vaobj, GLuint bindingindex, GLuint buffer, GLintptr offset, GLsizei stride); +GLAPI void APIENTRY glVertexArrayVertexBuffers (GLuint vaobj, GLuint first, GLsizei count, const GLuint *buffers, const GLintptr *offsets, const GLsizei *strides); +GLAPI void APIENTRY glVertexArrayAttribBinding (GLuint vaobj, GLuint attribindex, GLuint bindingindex); +GLAPI void APIENTRY glVertexArrayAttribFormat (GLuint vaobj, GLuint attribindex, GLint size, GLenum type, GLboolean normalized, GLuint relativeoffset); +GLAPI void APIENTRY glVertexArrayAttribIFormat (GLuint vaobj, GLuint attribindex, GLint size, GLenum type, GLuint relativeoffset); +GLAPI void APIENTRY glVertexArrayAttribLFormat (GLuint vaobj, GLuint attribindex, GLint size, GLenum type, GLuint relativeoffset); +GLAPI void APIENTRY glVertexArrayBindingDivisor (GLuint vaobj, GLuint bindingindex, GLuint divisor); +GLAPI void APIENTRY glGetVertexArrayiv (GLuint vaobj, GLenum pname, GLint *param); +GLAPI void APIENTRY glGetVertexArrayIndexediv (GLuint vaobj, GLuint index, GLenum pname, GLint *param); +GLAPI void APIENTRY glGetVertexArrayIndexed64iv (GLuint vaobj, GLuint index, GLenum pname, GLint64 *param); +GLAPI void APIENTRY glCreateSamplers (GLsizei n, GLuint *samplers); +GLAPI void APIENTRY glCreateProgramPipelines (GLsizei n, GLuint *pipelines); +GLAPI void APIENTRY glCreateQueries (GLenum target, GLsizei n, GLuint *ids); +GLAPI void APIENTRY glGetQueryBufferObjecti64v (GLuint id, GLuint buffer, GLenum pname, GLintptr offset); +GLAPI void APIENTRY glGetQueryBufferObjectiv (GLuint id, GLuint buffer, GLenum pname, GLintptr offset); +GLAPI void APIENTRY glGetQueryBufferObjectui64v (GLuint id, GLuint buffer, GLenum pname, GLintptr offset); +GLAPI void APIENTRY glGetQueryBufferObjectuiv (GLuint id, GLuint buffer, GLenum pname, GLintptr offset); +GLAPI void APIENTRY glMemoryBarrierByRegion (GLbitfield barriers); +GLAPI void APIENTRY glGetTextureSubImage (GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, GLsizei bufSize, void *pixels); +GLAPI void APIENTRY glGetCompressedTextureSubImage (GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLsizei bufSize, void *pixels); +GLAPI GLenum APIENTRY glGetGraphicsResetStatus (void); +GLAPI void APIENTRY glGetnCompressedTexImage (GLenum target, GLint lod, GLsizei bufSize, void *pixels); +GLAPI void APIENTRY glGetnTexImage (GLenum target, GLint level, GLenum format, GLenum type, GLsizei bufSize, void *pixels); +GLAPI void APIENTRY glGetnUniformdv (GLuint program, GLint location, GLsizei bufSize, GLdouble *params); +GLAPI void APIENTRY glGetnUniformfv (GLuint program, GLint location, GLsizei bufSize, GLfloat *params); +GLAPI void APIENTRY glGetnUniformiv (GLuint program, GLint location, GLsizei bufSize, GLint *params); +GLAPI void APIENTRY glGetnUniformuiv (GLuint program, GLint location, GLsizei bufSize, GLuint *params); +GLAPI void APIENTRY glReadnPixels (GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, GLsizei bufSize, void *data); +GLAPI void APIENTRY glTextureBarrier (void); +#endif +#endif /* GL_VERSION_4_5 */ + +#ifndef GL_ARB_ES2_compatibility +#define GL_ARB_ES2_compatibility 1 +#endif /* GL_ARB_ES2_compatibility */ + +#ifndef GL_ARB_ES3_1_compatibility +#define GL_ARB_ES3_1_compatibility 1 +#endif /* GL_ARB_ES3_1_compatibility */ + +#ifndef GL_ARB_ES3_compatibility +#define GL_ARB_ES3_compatibility 1 +#endif /* GL_ARB_ES3_compatibility */ + +#ifndef GL_ARB_arrays_of_arrays +#define GL_ARB_arrays_of_arrays 1 +#endif /* GL_ARB_arrays_of_arrays */ + +#ifndef GL_ARB_base_instance +#define GL_ARB_base_instance 1 +#endif /* GL_ARB_base_instance */ + +#ifndef GL_ARB_bindless_texture +#define GL_ARB_bindless_texture 1 +typedef uint64_t GLuint64EXT; +#define GL_UNSIGNED_INT64_ARB 0x140F +typedef GLuint64 (APIENTRYP PFNGLGETTEXTUREHANDLEARBPROC) (GLuint texture); +typedef GLuint64 (APIENTRYP PFNGLGETTEXTURESAMPLERHANDLEARBPROC) (GLuint texture, GLuint sampler); +typedef void (APIENTRYP PFNGLMAKETEXTUREHANDLERESIDENTARBPROC) (GLuint64 handle); +typedef void (APIENTRYP PFNGLMAKETEXTUREHANDLENONRESIDENTARBPROC) (GLuint64 handle); +typedef GLuint64 (APIENTRYP PFNGLGETIMAGEHANDLEARBPROC) (GLuint texture, GLint level, GLboolean layered, GLint layer, GLenum format); +typedef void (APIENTRYP PFNGLMAKEIMAGEHANDLERESIDENTARBPROC) (GLuint64 handle, GLenum access); +typedef void (APIENTRYP PFNGLMAKEIMAGEHANDLENONRESIDENTARBPROC) (GLuint64 handle); +typedef void (APIENTRYP PFNGLUNIFORMHANDLEUI64ARBPROC) (GLint location, GLuint64 value); +typedef void (APIENTRYP PFNGLUNIFORMHANDLEUI64VARBPROC) (GLint location, GLsizei count, const GLuint64 *value); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORMHANDLEUI64ARBPROC) (GLuint program, GLint location, GLuint64 value); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORMHANDLEUI64VARBPROC) (GLuint program, GLint location, GLsizei count, const GLuint64 *values); +typedef GLboolean (APIENTRYP PFNGLISTEXTUREHANDLERESIDENTARBPROC) (GLuint64 handle); +typedef GLboolean (APIENTRYP PFNGLISIMAGEHANDLERESIDENTARBPROC) (GLuint64 handle); +typedef void (APIENTRYP PFNGLVERTEXATTRIBL1UI64ARBPROC) (GLuint index, GLuint64EXT x); +typedef void (APIENTRYP PFNGLVERTEXATTRIBL1UI64VARBPROC) (GLuint index, const GLuint64EXT *v); +typedef void (APIENTRYP PFNGLGETVERTEXATTRIBLUI64VARBPROC) (GLuint index, GLenum pname, GLuint64EXT *params); +#ifdef GL_GLEXT_PROTOTYPES +GLAPI GLuint64 APIENTRY glGetTextureHandleARB (GLuint texture); +GLAPI GLuint64 APIENTRY glGetTextureSamplerHandleARB (GLuint texture, GLuint sampler); +GLAPI void APIENTRY glMakeTextureHandleResidentARB (GLuint64 handle); +GLAPI void APIENTRY glMakeTextureHandleNonResidentARB (GLuint64 handle); +GLAPI GLuint64 APIENTRY glGetImageHandleARB (GLuint texture, GLint level, GLboolean layered, GLint layer, GLenum format); +GLAPI void APIENTRY glMakeImageHandleResidentARB (GLuint64 handle, GLenum access); +GLAPI void APIENTRY glMakeImageHandleNonResidentARB (GLuint64 handle); +GLAPI void APIENTRY glUniformHandleui64ARB (GLint location, GLuint64 value); +GLAPI void APIENTRY glUniformHandleui64vARB (GLint location, GLsizei count, const GLuint64 *value); +GLAPI void APIENTRY glProgramUniformHandleui64ARB (GLuint program, GLint location, GLuint64 value); +GLAPI void APIENTRY glProgramUniformHandleui64vARB (GLuint program, GLint location, GLsizei count, const GLuint64 *values); +GLAPI GLboolean APIENTRY glIsTextureHandleResidentARB (GLuint64 handle); +GLAPI GLboolean APIENTRY glIsImageHandleResidentARB (GLuint64 handle); +GLAPI void APIENTRY glVertexAttribL1ui64ARB (GLuint index, GLuint64EXT x); +GLAPI void APIENTRY glVertexAttribL1ui64vARB (GLuint index, const GLuint64EXT *v); +GLAPI void APIENTRY glGetVertexAttribLui64vARB (GLuint index, GLenum pname, GLuint64EXT *params); +#endif +#endif /* GL_ARB_bindless_texture */ + +#ifndef GL_ARB_blend_func_extended +#define GL_ARB_blend_func_extended 1 +#endif /* GL_ARB_blend_func_extended */ + +#ifndef GL_ARB_buffer_storage +#define GL_ARB_buffer_storage 1 +#endif /* GL_ARB_buffer_storage */ + +#ifndef GL_ARB_cl_event +#define GL_ARB_cl_event 1 +struct _cl_context; +struct _cl_event; +#define GL_SYNC_CL_EVENT_ARB 0x8240 +#define GL_SYNC_CL_EVENT_COMPLETE_ARB 0x8241 +typedef GLsync (APIENTRYP PFNGLCREATESYNCFROMCLEVENTARBPROC) (struct _cl_context *context, struct _cl_event *event, GLbitfield flags); +#ifdef GL_GLEXT_PROTOTYPES +GLAPI GLsync APIENTRY glCreateSyncFromCLeventARB (struct _cl_context *context, struct _cl_event *event, GLbitfield flags); +#endif +#endif /* GL_ARB_cl_event */ + +#ifndef GL_ARB_clear_buffer_object +#define GL_ARB_clear_buffer_object 1 +#endif /* GL_ARB_clear_buffer_object */ + +#ifndef GL_ARB_clear_texture +#define GL_ARB_clear_texture 1 +#endif /* GL_ARB_clear_texture */ + +#ifndef GL_ARB_clip_control +#define GL_ARB_clip_control 1 +#endif /* GL_ARB_clip_control */ + +#ifndef GL_ARB_compressed_texture_pixel_storage +#define GL_ARB_compressed_texture_pixel_storage 1 +#endif /* GL_ARB_compressed_texture_pixel_storage */ + +#ifndef GL_ARB_compute_shader +#define GL_ARB_compute_shader 1 +#endif /* GL_ARB_compute_shader */ + +#ifndef GL_ARB_compute_variable_group_size +#define GL_ARB_compute_variable_group_size 1 +#define GL_MAX_COMPUTE_VARIABLE_GROUP_INVOCATIONS_ARB 0x9344 +#define GL_MAX_COMPUTE_FIXED_GROUP_INVOCATIONS_ARB 0x90EB +#define GL_MAX_COMPUTE_VARIABLE_GROUP_SIZE_ARB 0x9345 +#define GL_MAX_COMPUTE_FIXED_GROUP_SIZE_ARB 0x91BF +typedef void (APIENTRYP PFNGLDISPATCHCOMPUTEGROUPSIZEARBPROC) (GLuint num_groups_x, GLuint num_groups_y, GLuint num_groups_z, GLuint group_size_x, GLuint group_size_y, GLuint group_size_z); +#ifdef GL_GLEXT_PROTOTYPES +GLAPI void APIENTRY glDispatchComputeGroupSizeARB (GLuint num_groups_x, GLuint num_groups_y, GLuint num_groups_z, GLuint group_size_x, GLuint group_size_y, GLuint group_size_z); +#endif +#endif /* GL_ARB_compute_variable_group_size */ + +#ifndef GL_ARB_conditional_render_inverted +#define GL_ARB_conditional_render_inverted 1 +#endif /* GL_ARB_conditional_render_inverted */ + +#ifndef GL_ARB_conservative_depth +#define GL_ARB_conservative_depth 1 +#endif /* GL_ARB_conservative_depth */ + +#ifndef GL_ARB_copy_buffer +#define GL_ARB_copy_buffer 1 +#endif /* GL_ARB_copy_buffer */ + +#ifndef GL_ARB_copy_image +#define GL_ARB_copy_image 1 +#endif /* GL_ARB_copy_image */ + +#ifndef GL_ARB_cull_distance +#define GL_ARB_cull_distance 1 +#endif /* GL_ARB_cull_distance */ + +#ifndef GL_ARB_debug_output +#define GL_ARB_debug_output 1 +typedef void (APIENTRY *GLDEBUGPROCARB)(GLenum source,GLenum type,GLuint id,GLenum severity,GLsizei length,const GLchar *message,const void *userParam); +#define GL_DEBUG_OUTPUT_SYNCHRONOUS_ARB 0x8242 +#define GL_DEBUG_NEXT_LOGGED_MESSAGE_LENGTH_ARB 0x8243 +#define GL_DEBUG_CALLBACK_FUNCTION_ARB 0x8244 +#define GL_DEBUG_CALLBACK_USER_PARAM_ARB 0x8245 +#define GL_DEBUG_SOURCE_API_ARB 0x8246 +#define GL_DEBUG_SOURCE_WINDOW_SYSTEM_ARB 0x8247 +#define GL_DEBUG_SOURCE_SHADER_COMPILER_ARB 0x8248 +#define GL_DEBUG_SOURCE_THIRD_PARTY_ARB 0x8249 +#define GL_DEBUG_SOURCE_APPLICATION_ARB 0x824A +#define GL_DEBUG_SOURCE_OTHER_ARB 0x824B +#define GL_DEBUG_TYPE_ERROR_ARB 0x824C +#define GL_DEBUG_TYPE_DEPRECATED_BEHAVIOR_ARB 0x824D +#define GL_DEBUG_TYPE_UNDEFINED_BEHAVIOR_ARB 0x824E +#define GL_DEBUG_TYPE_PORTABILITY_ARB 0x824F +#define GL_DEBUG_TYPE_PERFORMANCE_ARB 0x8250 +#define GL_DEBUG_TYPE_OTHER_ARB 0x8251 +#define GL_MAX_DEBUG_MESSAGE_LENGTH_ARB 0x9143 +#define GL_MAX_DEBUG_LOGGED_MESSAGES_ARB 0x9144 +#define GL_DEBUG_LOGGED_MESSAGES_ARB 0x9145 +#define GL_DEBUG_SEVERITY_HIGH_ARB 0x9146 +#define GL_DEBUG_SEVERITY_MEDIUM_ARB 0x9147 +#define GL_DEBUG_SEVERITY_LOW_ARB 0x9148 +typedef void (APIENTRYP PFNGLDEBUGMESSAGECONTROLARBPROC) (GLenum source, GLenum type, GLenum severity, GLsizei count, const GLuint *ids, GLboolean enabled); +typedef void (APIENTRYP PFNGLDEBUGMESSAGEINSERTARBPROC) (GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length, const GLchar *buf); +typedef void (APIENTRYP PFNGLDEBUGMESSAGECALLBACKARBPROC) (GLDEBUGPROCARB callback, const void *userParam); +typedef GLuint (APIENTRYP PFNGLGETDEBUGMESSAGELOGARBPROC) (GLuint count, GLsizei bufSize, GLenum *sources, GLenum *types, GLuint *ids, GLenum *severities, GLsizei *lengths, GLchar *messageLog); +#ifdef GL_GLEXT_PROTOTYPES +GLAPI void APIENTRY glDebugMessageControlARB (GLenum source, GLenum type, GLenum severity, GLsizei count, const GLuint *ids, GLboolean enabled); +GLAPI void APIENTRY glDebugMessageInsertARB (GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length, const GLchar *buf); +GLAPI void APIENTRY glDebugMessageCallbackARB (GLDEBUGPROCARB callback, const void *userParam); +GLAPI GLuint APIENTRY glGetDebugMessageLogARB (GLuint count, GLsizei bufSize, GLenum *sources, GLenum *types, GLuint *ids, GLenum *severities, GLsizei *lengths, GLchar *messageLog); +#endif +#endif /* GL_ARB_debug_output */ + +#ifndef GL_ARB_depth_buffer_float +#define GL_ARB_depth_buffer_float 1 +#endif /* GL_ARB_depth_buffer_float */ + +#ifndef GL_ARB_depth_clamp +#define GL_ARB_depth_clamp 1 +#endif /* GL_ARB_depth_clamp */ + +#ifndef GL_ARB_derivative_control +#define GL_ARB_derivative_control 1 +#endif /* GL_ARB_derivative_control */ + +#ifndef GL_ARB_direct_state_access +#define GL_ARB_direct_state_access 1 +#endif /* GL_ARB_direct_state_access */ + +#ifndef GL_ARB_draw_buffers_blend +#define GL_ARB_draw_buffers_blend 1 +typedef void (APIENTRYP PFNGLBLENDEQUATIONIARBPROC) (GLuint buf, GLenum mode); +typedef void (APIENTRYP PFNGLBLENDEQUATIONSEPARATEIARBPROC) (GLuint buf, GLenum modeRGB, GLenum modeAlpha); +typedef void (APIENTRYP PFNGLBLENDFUNCIARBPROC) (GLuint buf, GLenum src, GLenum dst); +typedef void (APIENTRYP PFNGLBLENDFUNCSEPARATEIARBPROC) (GLuint buf, GLenum srcRGB, GLenum dstRGB, GLenum srcAlpha, GLenum dstAlpha); +#ifdef GL_GLEXT_PROTOTYPES +GLAPI void APIENTRY glBlendEquationiARB (GLuint buf, GLenum mode); +GLAPI void APIENTRY glBlendEquationSeparateiARB (GLuint buf, GLenum modeRGB, GLenum modeAlpha); +GLAPI void APIENTRY glBlendFunciARB (GLuint buf, GLenum src, GLenum dst); +GLAPI void APIENTRY glBlendFuncSeparateiARB (GLuint buf, GLenum srcRGB, GLenum dstRGB, GLenum srcAlpha, GLenum dstAlpha); +#endif +#endif /* GL_ARB_draw_buffers_blend */ + +#ifndef GL_ARB_draw_elements_base_vertex +#define GL_ARB_draw_elements_base_vertex 1 +#endif /* GL_ARB_draw_elements_base_vertex */ + +#ifndef GL_ARB_draw_indirect +#define GL_ARB_draw_indirect 1 +#endif /* GL_ARB_draw_indirect */ + +#ifndef GL_ARB_enhanced_layouts +#define GL_ARB_enhanced_layouts 1 +#endif /* GL_ARB_enhanced_layouts */ + +#ifndef GL_ARB_explicit_attrib_location +#define GL_ARB_explicit_attrib_location 1 +#endif /* GL_ARB_explicit_attrib_location */ + +#ifndef GL_ARB_explicit_uniform_location +#define GL_ARB_explicit_uniform_location 1 +#endif /* GL_ARB_explicit_uniform_location */ + +#ifndef GL_ARB_fragment_coord_conventions +#define GL_ARB_fragment_coord_conventions 1 +#endif /* GL_ARB_fragment_coord_conventions */ + +#ifndef GL_ARB_fragment_layer_viewport +#define GL_ARB_fragment_layer_viewport 1 +#endif /* GL_ARB_fragment_layer_viewport */ + +#ifndef GL_ARB_framebuffer_no_attachments +#define GL_ARB_framebuffer_no_attachments 1 +#endif /* GL_ARB_framebuffer_no_attachments */ + +#ifndef GL_ARB_framebuffer_object +#define GL_ARB_framebuffer_object 1 +#endif /* GL_ARB_framebuffer_object */ + +#ifndef GL_ARB_framebuffer_sRGB +#define GL_ARB_framebuffer_sRGB 1 +#endif /* GL_ARB_framebuffer_sRGB */ + +#ifndef GL_ARB_get_program_binary +#define GL_ARB_get_program_binary 1 +#endif /* GL_ARB_get_program_binary */ + +#ifndef GL_ARB_get_texture_sub_image +#define GL_ARB_get_texture_sub_image 1 +#endif /* GL_ARB_get_texture_sub_image */ + +#ifndef GL_ARB_gpu_shader5 +#define GL_ARB_gpu_shader5 1 +#endif /* GL_ARB_gpu_shader5 */ + +#ifndef GL_ARB_gpu_shader_fp64 +#define GL_ARB_gpu_shader_fp64 1 +#endif /* GL_ARB_gpu_shader_fp64 */ + +#ifndef GL_ARB_half_float_vertex +#define GL_ARB_half_float_vertex 1 +#endif /* GL_ARB_half_float_vertex */ + +#ifndef GL_ARB_imaging +#define GL_ARB_imaging 1 +#define GL_BLEND_COLOR 0x8005 +#define GL_BLEND_EQUATION 0x8009 +#endif /* GL_ARB_imaging */ + +#ifndef GL_ARB_indirect_parameters +#define GL_ARB_indirect_parameters 1 +#define GL_PARAMETER_BUFFER_ARB 0x80EE +#define GL_PARAMETER_BUFFER_BINDING_ARB 0x80EF +typedef void (APIENTRYP PFNGLMULTIDRAWARRAYSINDIRECTCOUNTARBPROC) (GLenum mode, GLintptr indirect, GLintptr drawcount, GLsizei maxdrawcount, GLsizei stride); +typedef void (APIENTRYP PFNGLMULTIDRAWELEMENTSINDIRECTCOUNTARBPROC) (GLenum mode, GLenum type, GLintptr indirect, GLintptr drawcount, GLsizei maxdrawcount, GLsizei stride); +#ifdef GL_GLEXT_PROTOTYPES +GLAPI void APIENTRY glMultiDrawArraysIndirectCountARB (GLenum mode, GLintptr indirect, GLintptr drawcount, GLsizei maxdrawcount, GLsizei stride); +GLAPI void APIENTRY glMultiDrawElementsIndirectCountARB (GLenum mode, GLenum type, GLintptr indirect, GLintptr drawcount, GLsizei maxdrawcount, GLsizei stride); +#endif +#endif /* GL_ARB_indirect_parameters */ + +#ifndef GL_ARB_internalformat_query +#define GL_ARB_internalformat_query 1 +#endif /* GL_ARB_internalformat_query */ + +#ifndef GL_ARB_internalformat_query2 +#define GL_ARB_internalformat_query2 1 +#define GL_SRGB_DECODE_ARB 0x8299 +#endif /* GL_ARB_internalformat_query2 */ + +#ifndef GL_ARB_invalidate_subdata +#define GL_ARB_invalidate_subdata 1 +#endif /* GL_ARB_invalidate_subdata */ + +#ifndef GL_ARB_map_buffer_alignment +#define GL_ARB_map_buffer_alignment 1 +#endif /* GL_ARB_map_buffer_alignment */ + +#ifndef GL_ARB_map_buffer_range +#define GL_ARB_map_buffer_range 1 +#endif /* GL_ARB_map_buffer_range */ + +#ifndef GL_ARB_multi_bind +#define GL_ARB_multi_bind 1 +#endif /* GL_ARB_multi_bind */ + +#ifndef GL_ARB_multi_draw_indirect +#define GL_ARB_multi_draw_indirect 1 +#endif /* GL_ARB_multi_draw_indirect */ + +#ifndef GL_ARB_occlusion_query2 +#define GL_ARB_occlusion_query2 1 +#endif /* GL_ARB_occlusion_query2 */ + +#ifndef GL_ARB_pipeline_statistics_query +#define GL_ARB_pipeline_statistics_query 1 +#define GL_VERTICES_SUBMITTED_ARB 0x82EE +#define GL_PRIMITIVES_SUBMITTED_ARB 0x82EF +#define GL_VERTEX_SHADER_INVOCATIONS_ARB 0x82F0 +#define GL_TESS_CONTROL_SHADER_PATCHES_ARB 0x82F1 +#define GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB 0x82F2 +#define GL_GEOMETRY_SHADER_PRIMITIVES_EMITTED_ARB 0x82F3 +#define GL_FRAGMENT_SHADER_INVOCATIONS_ARB 0x82F4 +#define GL_COMPUTE_SHADER_INVOCATIONS_ARB 0x82F5 +#define GL_CLIPPING_INPUT_PRIMITIVES_ARB 0x82F6 +#define GL_CLIPPING_OUTPUT_PRIMITIVES_ARB 0x82F7 +#endif /* GL_ARB_pipeline_statistics_query */ + +#ifndef GL_ARB_program_interface_query +#define GL_ARB_program_interface_query 1 +#endif /* GL_ARB_program_interface_query */ + +#ifndef GL_ARB_provoking_vertex +#define GL_ARB_provoking_vertex 1 +#endif /* GL_ARB_provoking_vertex */ + +#ifndef GL_ARB_query_buffer_object +#define GL_ARB_query_buffer_object 1 +#endif /* GL_ARB_query_buffer_object */ + +#ifndef GL_ARB_robust_buffer_access_behavior +#define GL_ARB_robust_buffer_access_behavior 1 +#endif /* GL_ARB_robust_buffer_access_behavior */ + +#ifndef GL_ARB_robustness +#define GL_ARB_robustness 1 +#define GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT_ARB 0x00000004 +#define GL_LOSE_CONTEXT_ON_RESET_ARB 0x8252 +#define GL_GUILTY_CONTEXT_RESET_ARB 0x8253 +#define GL_INNOCENT_CONTEXT_RESET_ARB 0x8254 +#define GL_UNKNOWN_CONTEXT_RESET_ARB 0x8255 +#define GL_RESET_NOTIFICATION_STRATEGY_ARB 0x8256 +#define GL_NO_RESET_NOTIFICATION_ARB 0x8261 +typedef GLenum (APIENTRYP PFNGLGETGRAPHICSRESETSTATUSARBPROC) (void); +typedef void (APIENTRYP PFNGLGETNTEXIMAGEARBPROC) (GLenum target, GLint level, GLenum format, GLenum type, GLsizei bufSize, void *img); +typedef void (APIENTRYP PFNGLREADNPIXELSARBPROC) (GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, GLsizei bufSize, void *data); +typedef void (APIENTRYP PFNGLGETNCOMPRESSEDTEXIMAGEARBPROC) (GLenum target, GLint lod, GLsizei bufSize, void *img); +typedef void (APIENTRYP PFNGLGETNUNIFORMFVARBPROC) (GLuint program, GLint location, GLsizei bufSize, GLfloat *params); +typedef void (APIENTRYP PFNGLGETNUNIFORMIVARBPROC) (GLuint program, GLint location, GLsizei bufSize, GLint *params); +typedef void (APIENTRYP PFNGLGETNUNIFORMUIVARBPROC) (GLuint program, GLint location, GLsizei bufSize, GLuint *params); +typedef void (APIENTRYP PFNGLGETNUNIFORMDVARBPROC) (GLuint program, GLint location, GLsizei bufSize, GLdouble *params); +#ifdef GL_GLEXT_PROTOTYPES +GLAPI GLenum APIENTRY glGetGraphicsResetStatusARB (void); +GLAPI void APIENTRY glGetnTexImageARB (GLenum target, GLint level, GLenum format, GLenum type, GLsizei bufSize, void *img); +GLAPI void APIENTRY glReadnPixelsARB (GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, GLsizei bufSize, void *data); +GLAPI void APIENTRY glGetnCompressedTexImageARB (GLenum target, GLint lod, GLsizei bufSize, void *img); +GLAPI void APIENTRY glGetnUniformfvARB (GLuint program, GLint location, GLsizei bufSize, GLfloat *params); +GLAPI void APIENTRY glGetnUniformivARB (GLuint program, GLint location, GLsizei bufSize, GLint *params); +GLAPI void APIENTRY glGetnUniformuivARB (GLuint program, GLint location, GLsizei bufSize, GLuint *params); +GLAPI void APIENTRY glGetnUniformdvARB (GLuint program, GLint location, GLsizei bufSize, GLdouble *params); +#endif +#endif /* GL_ARB_robustness */ + +#ifndef GL_ARB_robustness_isolation +#define GL_ARB_robustness_isolation 1 +#endif /* GL_ARB_robustness_isolation */ + +#ifndef GL_ARB_sample_shading +#define GL_ARB_sample_shading 1 +#define GL_SAMPLE_SHADING_ARB 0x8C36 +#define GL_MIN_SAMPLE_SHADING_VALUE_ARB 0x8C37 +typedef void (APIENTRYP PFNGLMINSAMPLESHADINGARBPROC) (GLfloat value); +#ifdef GL_GLEXT_PROTOTYPES +GLAPI void APIENTRY glMinSampleShadingARB (GLfloat value); +#endif +#endif /* GL_ARB_sample_shading */ + +#ifndef GL_ARB_sampler_objects +#define GL_ARB_sampler_objects 1 +#endif /* GL_ARB_sampler_objects */ + +#ifndef GL_ARB_seamless_cube_map +#define GL_ARB_seamless_cube_map 1 +#endif /* GL_ARB_seamless_cube_map */ + +#ifndef GL_ARB_seamless_cubemap_per_texture +#define GL_ARB_seamless_cubemap_per_texture 1 +#endif /* GL_ARB_seamless_cubemap_per_texture */ + +#ifndef GL_ARB_separate_shader_objects +#define GL_ARB_separate_shader_objects 1 +#endif /* GL_ARB_separate_shader_objects */ + +#ifndef GL_ARB_shader_atomic_counters +#define GL_ARB_shader_atomic_counters 1 +#endif /* GL_ARB_shader_atomic_counters */ + +#ifndef GL_ARB_shader_bit_encoding +#define GL_ARB_shader_bit_encoding 1 +#endif /* GL_ARB_shader_bit_encoding */ + +#ifndef GL_ARB_shader_draw_parameters +#define GL_ARB_shader_draw_parameters 1 +#endif /* GL_ARB_shader_draw_parameters */ + +#ifndef GL_ARB_shader_group_vote +#define GL_ARB_shader_group_vote 1 +#endif /* GL_ARB_shader_group_vote */ + +#ifndef GL_ARB_shader_image_load_store +#define GL_ARB_shader_image_load_store 1 +#endif /* GL_ARB_shader_image_load_store */ + +#ifndef GL_ARB_shader_image_size +#define GL_ARB_shader_image_size 1 +#endif /* GL_ARB_shader_image_size */ + +#ifndef GL_ARB_shader_precision +#define GL_ARB_shader_precision 1 +#endif /* GL_ARB_shader_precision */ + +#ifndef GL_ARB_shader_stencil_export +#define GL_ARB_shader_stencil_export 1 +#endif /* GL_ARB_shader_stencil_export */ + +#ifndef GL_ARB_shader_storage_buffer_object +#define GL_ARB_shader_storage_buffer_object 1 +#endif /* GL_ARB_shader_storage_buffer_object */ + +#ifndef GL_ARB_shader_subroutine +#define GL_ARB_shader_subroutine 1 +#endif /* GL_ARB_shader_subroutine */ + +#ifndef GL_ARB_shader_texture_image_samples +#define GL_ARB_shader_texture_image_samples 1 +#endif /* GL_ARB_shader_texture_image_samples */ + +#ifndef GL_ARB_shading_language_420pack +#define GL_ARB_shading_language_420pack 1 +#endif /* GL_ARB_shading_language_420pack */ + +#ifndef GL_ARB_shading_language_include +#define GL_ARB_shading_language_include 1 +#define GL_SHADER_INCLUDE_ARB 0x8DAE +#define GL_NAMED_STRING_LENGTH_ARB 0x8DE9 +#define GL_NAMED_STRING_TYPE_ARB 0x8DEA +typedef void (APIENTRYP PFNGLNAMEDSTRINGARBPROC) (GLenum type, GLint namelen, const GLchar *name, GLint stringlen, const GLchar *string); +typedef void (APIENTRYP PFNGLDELETENAMEDSTRINGARBPROC) (GLint namelen, const GLchar *name); +typedef void (APIENTRYP PFNGLCOMPILESHADERINCLUDEARBPROC) (GLuint shader, GLsizei count, const GLchar *const*path, const GLint *length); +typedef GLboolean (APIENTRYP PFNGLISNAMEDSTRINGARBPROC) (GLint namelen, const GLchar *name); +typedef void (APIENTRYP PFNGLGETNAMEDSTRINGARBPROC) (GLint namelen, const GLchar *name, GLsizei bufSize, GLint *stringlen, GLchar *string); +typedef void (APIENTRYP PFNGLGETNAMEDSTRINGIVARBPROC) (GLint namelen, const GLchar *name, GLenum pname, GLint *params); +#ifdef GL_GLEXT_PROTOTYPES +GLAPI void APIENTRY glNamedStringARB (GLenum type, GLint namelen, const GLchar *name, GLint stringlen, const GLchar *string); +GLAPI void APIENTRY glDeleteNamedStringARB (GLint namelen, const GLchar *name); +GLAPI void APIENTRY glCompileShaderIncludeARB (GLuint shader, GLsizei count, const GLchar *const*path, const GLint *length); +GLAPI GLboolean APIENTRY glIsNamedStringARB (GLint namelen, const GLchar *name); +GLAPI void APIENTRY glGetNamedStringARB (GLint namelen, const GLchar *name, GLsizei bufSize, GLint *stringlen, GLchar *string); +GLAPI void APIENTRY glGetNamedStringivARB (GLint namelen, const GLchar *name, GLenum pname, GLint *params); +#endif +#endif /* GL_ARB_shading_language_include */ + +#ifndef GL_ARB_shading_language_packing +#define GL_ARB_shading_language_packing 1 +#endif /* GL_ARB_shading_language_packing */ + +#ifndef GL_ARB_sparse_buffer +#define GL_ARB_sparse_buffer 1 +#define GL_SPARSE_STORAGE_BIT_ARB 0x0400 +#define GL_SPARSE_BUFFER_PAGE_SIZE_ARB 0x82F8 +typedef void (APIENTRYP PFNGLBUFFERPAGECOMMITMENTARBPROC) (GLenum target, GLintptr offset, GLsizeiptr size, GLboolean commit); +typedef void (APIENTRYP PFNGLNAMEDBUFFERPAGECOMMITMENTEXTPROC) (GLuint buffer, GLintptr offset, GLsizeiptr size, GLboolean commit); +typedef void (APIENTRYP PFNGLNAMEDBUFFERPAGECOMMITMENTARBPROC) (GLuint buffer, GLintptr offset, GLsizeiptr size, GLboolean commit); +#ifdef GL_GLEXT_PROTOTYPES +GLAPI void APIENTRY glBufferPageCommitmentARB (GLenum target, GLintptr offset, GLsizeiptr size, GLboolean commit); +GLAPI void APIENTRY glNamedBufferPageCommitmentEXT (GLuint buffer, GLintptr offset, GLsizeiptr size, GLboolean commit); +GLAPI void APIENTRY glNamedBufferPageCommitmentARB (GLuint buffer, GLintptr offset, GLsizeiptr size, GLboolean commit); +#endif +#endif /* GL_ARB_sparse_buffer */ + +#ifndef GL_ARB_sparse_texture +#define GL_ARB_sparse_texture 1 +#define GL_TEXTURE_SPARSE_ARB 0x91A6 +#define GL_VIRTUAL_PAGE_SIZE_INDEX_ARB 0x91A7 +#define GL_NUM_SPARSE_LEVELS_ARB 0x91AA +#define GL_NUM_VIRTUAL_PAGE_SIZES_ARB 0x91A8 +#define GL_VIRTUAL_PAGE_SIZE_X_ARB 0x9195 +#define GL_VIRTUAL_PAGE_SIZE_Y_ARB 0x9196 +#define GL_VIRTUAL_PAGE_SIZE_Z_ARB 0x9197 +#define GL_MAX_SPARSE_TEXTURE_SIZE_ARB 0x9198 +#define GL_MAX_SPARSE_3D_TEXTURE_SIZE_ARB 0x9199 +#define GL_MAX_SPARSE_ARRAY_TEXTURE_LAYERS_ARB 0x919A +#define GL_SPARSE_TEXTURE_FULL_ARRAY_CUBE_MIPMAPS_ARB 0x91A9 +typedef void (APIENTRYP PFNGLTEXPAGECOMMITMENTARBPROC) (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLboolean resident); +#ifdef GL_GLEXT_PROTOTYPES +GLAPI void APIENTRY glTexPageCommitmentARB (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLboolean resident); +#endif +#endif /* GL_ARB_sparse_texture */ + +#ifndef GL_ARB_stencil_texturing +#define GL_ARB_stencil_texturing 1 +#endif /* GL_ARB_stencil_texturing */ + +#ifndef GL_ARB_sync +#define GL_ARB_sync 1 +#endif /* GL_ARB_sync */ + +#ifndef GL_ARB_tessellation_shader +#define GL_ARB_tessellation_shader 1 +#endif /* GL_ARB_tessellation_shader */ + +#ifndef GL_ARB_texture_barrier +#define GL_ARB_texture_barrier 1 +#endif /* GL_ARB_texture_barrier */ + +#ifndef GL_ARB_texture_buffer_object_rgb32 +#define GL_ARB_texture_buffer_object_rgb32 1 +#endif /* GL_ARB_texture_buffer_object_rgb32 */ + +#ifndef GL_ARB_texture_buffer_range +#define GL_ARB_texture_buffer_range 1 +#endif /* GL_ARB_texture_buffer_range */ + +#ifndef GL_ARB_texture_compression_bptc +#define GL_ARB_texture_compression_bptc 1 +#define GL_COMPRESSED_RGBA_BPTC_UNORM_ARB 0x8E8C +#define GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM_ARB 0x8E8D +#define GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT_ARB 0x8E8E +#define GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT_ARB 0x8E8F +#endif /* GL_ARB_texture_compression_bptc */ + +#ifndef GL_ARB_texture_compression_rgtc +#define GL_ARB_texture_compression_rgtc 1 +#endif /* GL_ARB_texture_compression_rgtc */ + +#ifndef GL_ARB_texture_cube_map_array +#define GL_ARB_texture_cube_map_array 1 +#define GL_TEXTURE_CUBE_MAP_ARRAY_ARB 0x9009 +#define GL_TEXTURE_BINDING_CUBE_MAP_ARRAY_ARB 0x900A +#define GL_PROXY_TEXTURE_CUBE_MAP_ARRAY_ARB 0x900B +#define GL_SAMPLER_CUBE_MAP_ARRAY_ARB 0x900C +#define GL_SAMPLER_CUBE_MAP_ARRAY_SHADOW_ARB 0x900D +#define GL_INT_SAMPLER_CUBE_MAP_ARRAY_ARB 0x900E +#define GL_UNSIGNED_INT_SAMPLER_CUBE_MAP_ARRAY_ARB 0x900F +#endif /* GL_ARB_texture_cube_map_array */ + +#ifndef GL_ARB_texture_gather +#define GL_ARB_texture_gather 1 +#define GL_MIN_PROGRAM_TEXTURE_GATHER_OFFSET_ARB 0x8E5E +#define GL_MAX_PROGRAM_TEXTURE_GATHER_OFFSET_ARB 0x8E5F +#define GL_MAX_PROGRAM_TEXTURE_GATHER_COMPONENTS_ARB 0x8F9F +#endif /* GL_ARB_texture_gather */ + +#ifndef GL_ARB_texture_mirror_clamp_to_edge +#define GL_ARB_texture_mirror_clamp_to_edge 1 +#endif /* GL_ARB_texture_mirror_clamp_to_edge */ + +#ifndef GL_ARB_texture_multisample +#define GL_ARB_texture_multisample 1 +#endif /* GL_ARB_texture_multisample */ + +#ifndef GL_ARB_texture_query_levels +#define GL_ARB_texture_query_levels 1 +#endif /* GL_ARB_texture_query_levels */ + +#ifndef GL_ARB_texture_query_lod +#define GL_ARB_texture_query_lod 1 +#endif /* GL_ARB_texture_query_lod */ + +#ifndef GL_ARB_texture_rg +#define GL_ARB_texture_rg 1 +#endif /* GL_ARB_texture_rg */ + +#ifndef GL_ARB_texture_rgb10_a2ui +#define GL_ARB_texture_rgb10_a2ui 1 +#endif /* GL_ARB_texture_rgb10_a2ui */ + +#ifndef GL_ARB_texture_stencil8 +#define GL_ARB_texture_stencil8 1 +#endif /* GL_ARB_texture_stencil8 */ + +#ifndef GL_ARB_texture_storage +#define GL_ARB_texture_storage 1 +#endif /* GL_ARB_texture_storage */ + +#ifndef GL_ARB_texture_storage_multisample +#define GL_ARB_texture_storage_multisample 1 +#endif /* GL_ARB_texture_storage_multisample */ + +#ifndef GL_ARB_texture_swizzle +#define GL_ARB_texture_swizzle 1 +#endif /* GL_ARB_texture_swizzle */ + +#ifndef GL_ARB_texture_view +#define GL_ARB_texture_view 1 +#endif /* GL_ARB_texture_view */ + +#ifndef GL_ARB_timer_query +#define GL_ARB_timer_query 1 +#endif /* GL_ARB_timer_query */ + +#ifndef GL_ARB_transform_feedback2 +#define GL_ARB_transform_feedback2 1 +#endif /* GL_ARB_transform_feedback2 */ + +#ifndef GL_ARB_transform_feedback3 +#define GL_ARB_transform_feedback3 1 +#endif /* GL_ARB_transform_feedback3 */ + +#ifndef GL_ARB_transform_feedback_instanced +#define GL_ARB_transform_feedback_instanced 1 +#endif /* GL_ARB_transform_feedback_instanced */ + +#ifndef GL_ARB_transform_feedback_overflow_query +#define GL_ARB_transform_feedback_overflow_query 1 +#define GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB 0x82EC +#define GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB 0x82ED +#endif /* GL_ARB_transform_feedback_overflow_query */ + +#ifndef GL_ARB_uniform_buffer_object +#define GL_ARB_uniform_buffer_object 1 +#endif /* GL_ARB_uniform_buffer_object */ + +#ifndef GL_ARB_vertex_array_bgra +#define GL_ARB_vertex_array_bgra 1 +#endif /* GL_ARB_vertex_array_bgra */ + +#ifndef GL_ARB_vertex_array_object +#define GL_ARB_vertex_array_object 1 +#endif /* GL_ARB_vertex_array_object */ + +#ifndef GL_ARB_vertex_attrib_64bit +#define GL_ARB_vertex_attrib_64bit 1 +#endif /* GL_ARB_vertex_attrib_64bit */ + +#ifndef GL_ARB_vertex_attrib_binding +#define GL_ARB_vertex_attrib_binding 1 +#endif /* GL_ARB_vertex_attrib_binding */ + +#ifndef GL_ARB_vertex_type_10f_11f_11f_rev +#define GL_ARB_vertex_type_10f_11f_11f_rev 1 +#endif /* GL_ARB_vertex_type_10f_11f_11f_rev */ + +#ifndef GL_ARB_vertex_type_2_10_10_10_rev +#define GL_ARB_vertex_type_2_10_10_10_rev 1 +#endif /* GL_ARB_vertex_type_2_10_10_10_rev */ + +#ifndef GL_ARB_viewport_array +#define GL_ARB_viewport_array 1 +#endif /* GL_ARB_viewport_array */ + +#ifndef GL_KHR_context_flush_control +#define GL_KHR_context_flush_control 1 +#endif /* GL_KHR_context_flush_control */ + +#ifndef GL_KHR_debug +#define GL_KHR_debug 1 +#endif /* GL_KHR_debug */ + +#ifndef GL_KHR_robust_buffer_access_behavior +#define GL_KHR_robust_buffer_access_behavior 1 +#endif /* GL_KHR_robust_buffer_access_behavior */ + +#ifndef GL_KHR_robustness +#define GL_KHR_robustness 1 +#define GL_CONTEXT_ROBUST_ACCESS 0x90F3 +#endif /* GL_KHR_robustness */ + +#ifndef GL_KHR_texture_compression_astc_hdr +#define GL_KHR_texture_compression_astc_hdr 1 +#define GL_COMPRESSED_RGBA_ASTC_4x4_KHR 0x93B0 +#define GL_COMPRESSED_RGBA_ASTC_5x4_KHR 0x93B1 +#define GL_COMPRESSED_RGBA_ASTC_5x5_KHR 0x93B2 +#define GL_COMPRESSED_RGBA_ASTC_6x5_KHR 0x93B3 +#define GL_COMPRESSED_RGBA_ASTC_6x6_KHR 0x93B4 +#define GL_COMPRESSED_RGBA_ASTC_8x5_KHR 0x93B5 +#define GL_COMPRESSED_RGBA_ASTC_8x6_KHR 0x93B6 +#define GL_COMPRESSED_RGBA_ASTC_8x8_KHR 0x93B7 +#define GL_COMPRESSED_RGBA_ASTC_10x5_KHR 0x93B8 +#define GL_COMPRESSED_RGBA_ASTC_10x6_KHR 0x93B9 +#define GL_COMPRESSED_RGBA_ASTC_10x8_KHR 0x93BA +#define GL_COMPRESSED_RGBA_ASTC_10x10_KHR 0x93BB +#define GL_COMPRESSED_RGBA_ASTC_12x10_KHR 0x93BC +#define GL_COMPRESSED_RGBA_ASTC_12x12_KHR 0x93BD +#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR 0x93D0 +#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR 0x93D1 +#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR 0x93D2 +#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR 0x93D3 +#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR 0x93D4 +#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR 0x93D5 +#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR 0x93D6 +#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR 0x93D7 +#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR 0x93D8 +#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x6_KHR 0x93D9 +#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR 0x93DA +#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR 0x93DB +#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR 0x93DC +#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR 0x93DD +#endif /* GL_KHR_texture_compression_astc_hdr */ + +#ifndef GL_KHR_texture_compression_astc_ldr +#define GL_KHR_texture_compression_astc_ldr 1 +#endif /* GL_KHR_texture_compression_astc_ldr */ + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/Kernels/builtin_kernels.cl b/Kernels/builtin_kernels.cl new file mode 100644 index 00000000..466e2fc1 --- /dev/null +++ b/Kernels/builtin_kernels.cl @@ -0,0 +1,131 @@ +/* +// Copyright (c) 2018 Intel Corporation +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +*/ + +#if defined(cl_intel_device_side_vme_enable) + +__kernel __attribute__((reqd_work_group_size(16,1,1))) +void block_motion_estimate_intel( + sampler_t vmeAccelerator, + __read_only image2d_t srcImage, + __read_only image2d_t refImage, + __global short2* predMVs, + __global short2* outMVs, + __global ushort* outDist, + int iterations ) +{ + __local uint dst[64]; + __local ushort* dist = (__local ushort*)&dst[ 8 * 5 ]; + + int gid_0 = get_group_id(0); + int gid_1 = 0; + + for( int i = 0; i < iterations; i++, gid_1++ ) + { + int2 srcCoord = 0; + int2 refCoord = 0; + + srcCoord.x = gid_0 * 16 + get_global_offset(0); + srcCoord.y = gid_1 * 16 + get_global_offset(1); + + short2 predMV = 0; + + #ifndef HW_NULL_CHECK + if( predMVs != NULL ) + #endif + { + predMV = predMVs[ gid_0 + gid_1 * get_num_groups(0) ]; + refCoord.x = predMV.x / 4; + refCoord.y = predMV.y / 4; + refCoord.y = refCoord.y & 0xFFFE; + } + + intel_work_group_vme_mb_query( dst, srcCoord, refCoord, srcImage, refImage, vmeAccelerator ); + barrier(CLK_LOCAL_MEM_FENCE); + + // Write Out Result + + // 4x4 + if( intel_get_accelerator_mb_block_type( vmeAccelerator ) == 0x2 ) + { + int x = get_local_id(0) % 4; + int y = get_local_id(0) / 4; + int index = + ( gid_0 * 4 + x ) + + ( gid_1 * 4 + y ) * get_num_groups(0) * 4; + + short2 val = as_short2( dst[ 8 + ( y * 4 + x ) * 2 ] ); + outMVs[ index ] = val; + + #ifndef HW_NULL_CHECK + if( outDist != NULL ) + #endif + { + outDist[ index ] = dist[ y * 4 + x ]; + } + } + + // 8x8 + if( intel_get_accelerator_mb_block_type( vmeAccelerator ) == 0x1 ) + { + if( get_local_id(0) < 4 ) + { + int x = get_local_id(0) % 2; + int y = get_local_id(0) / 2; + int index = + ( gid_0 * 2 + x ) + + ( gid_1 * 2 + y ) * get_num_groups(0) * 2; + short2 val = as_short2( dst[ 8 + ( y * 2 + x ) * 8 ] ); + outMVs[ index ] = val; + + #ifndef HW_NULL_CHECK + if( outDist != NULL ) + #endif + { + outDist[ index ] = dist[ ( y * 2 + x ) * 4 ]; + } + } + } + + // 16x16 + if( intel_get_accelerator_mb_block_type( vmeAccelerator ) == 0x0 ) + { + if( get_local_id(0) == 0 ) + { + int index = + gid_0 + + gid_1 * get_num_groups(0); + + short2 val = as_short2( dst[8] ); + outMVs[ index ] = val; + + #ifndef HW_NULL_CHECK + if( outDist != NULL ) + #endif + { + outDist[ index ] = dist[ 0 ]; + } + } + } + } +} + +#endif \ No newline at end of file diff --git a/Kernels/precompiled_kernels.cl b/Kernels/precompiled_kernels.cl new file mode 100644 index 00000000..b9f039bc --- /dev/null +++ b/Kernels/precompiled_kernels.cl @@ -0,0 +1,297 @@ +/* +// Copyright (c) 2018 Intel Corporation +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +*/ + +#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable + +// This is a very slow kernel, but is guaranteed to be correct. +// +// This kernel can work with any local work size. +// The global work size should be at least bytesToRead. + +__kernel void CopyBufferBytes( + const __global uchar* pSrc, + __global uchar* pDst, + uint srcOffsetInBytes, + uint dstOffsetInBytes, + uint bytesToRead ) +{ + uint index = get_global_id(0); + + pSrc += ( srcOffsetInBytes + index ); + pDst += ( dstOffsetInBytes + index ); + + uint lastIndex = bytesToRead / sizeof(uchar); + + if( index < lastIndex ) + { + pDst[ 0 ] = pSrc[ 0 ]; + } +} + +// This is a faster kernel but it only works when the source +// offset and dst offset are multiples of sizeof(uint). +// +// This kernel can work with any local work size. +// The global work size should be at least ceil( bytesToRead / sizeof(uint) ). + +__kernel void CopyBufferUInts( + const __global uint* pSrc, + __global uint* pDst, + uint srcOffsetInUInts, + uint dstOffsetInUInts, + uint bytesToRead ) +{ + uint index = get_global_id(0); + + pSrc += srcOffsetInUInts + index; + pDst += dstOffsetInUInts + index; + + uint lastIndex = bytesToRead / sizeof(uint); + + if( index < lastIndex ) + { + pDst[ 0 ] = pSrc[ 0 ]; + } + else + { + if( index == lastIndex ) + { + const __global uchar* pByteSrc = pSrc; + __global uchar* pByteDst = pDst; + + uint bytesRemaining = bytesToRead % sizeof(uint); + + while( bytesRemaining ) + { + pByteDst[ 0 ] = pByteSrc[ 0 ]; + + bytesRemaining--; + pByteSrc++; + pByteDst++; + } + } + } +} + +// This is a faster kernel but it only works when the source +// offset and dst offset are multiples of sizeof(uint4). +// +// This kernel can work with any local work size. +// The global work size should be at least ceil( bytesToRead / sizeof(uint4) ). + +__kernel void CopyBufferUInt4s( + const __global uint4* pSrc, + __global uint4* pDst, + uint srcOffsetInUInt4s, + uint dstOffsetInUInt4s, + uint bytesToRead ) +{ + uint index = get_global_id(0); + + pSrc += srcOffsetInUInt4s + index; + pDst += dstOffsetInUInt4s + index; + + uint lastIndex = bytesToRead / sizeof(uint4); + + if( index < lastIndex ) + { + pDst[ 0 ] = pSrc[ 0 ]; + } + else + { + if( index == lastIndex ) + { + const __global uchar* pByteSrc = pSrc; + __global uchar* pByteDst = pDst; + + uint bytesRemaining = bytesToRead % sizeof(uint4); + + while( bytesRemaining ) + { + pByteDst[ 0 ] = pByteSrc[ 0 ]; + + bytesRemaining--; + pByteSrc++; + pByteDst++; + } + } + } +} + +// This is an experimental kernel. It only works when the source +// offset and dst offset are multiples of sizeof(uint16). +// +// This kernel can work with any local work size. +// The global work size should be at least ceil( bytesToRead / sizeof(uint16) ). + +__kernel void CopyBufferUInt16s( + const __global uint16* pSrc, + __global uint16* pDst, + uint srcOffsetInUInt16s, + uint dstOffsetInUInt16s, + uint bytesToRead ) +{ + uint index = get_global_id(0); + + pSrc += srcOffsetInUInt16s + index; + pDst += dstOffsetInUInt16s + index; + + uint lastIndex = bytesToRead / sizeof(uint16); + + if( index < lastIndex ) + { + pDst[ 0 ] = pSrc[ 0 ]; + } + else + { + if( index == lastIndex ) + { + const __global uchar* pByteSrc = pSrc; + __global uchar* pByteDst = pDst; + + uint bytesRemaining = bytesToRead % sizeof(uint16); + + while( bytesRemaining ) + { + pByteDst[ 0 ] = pByteSrc[ 0 ]; + + bytesRemaining--; + pByteSrc++; + pByteDst++; + } + } + } +} + +#if __IMAGE_SUPPORT__ + +// Technically, this is probably required, but we can probably avoid it +// on 99% of GPU devices. +#define CHECK_IMAGE_BOUNDS() +//#define CHECK_IMAGE_BOUNDS() if( x < regionX ) + +__kernel void CopyImage2Dto2DFloat( + __read_only image2d_t srcImage, + __write_only image2d_t dstImage, + uint srcOriginX, + uint srcOriginY, + uint srcOriginZ, + uint dstOriginX, + uint dstOriginY, + uint dstOriginZ, + uint regionX, + uint regionY, + uint regionZ ) +{ + const sampler_t samplerInline = + CLK_NORMALIZED_COORDS_FALSE | + CLK_ADDRESS_CLAMP_TO_EDGE | + CLK_FILTER_NEAREST; + + uint x = get_global_id(0); + uint y = get_global_id(1); + + CHECK_IMAGE_BOUNDS() + { + uint srcX = x + srcOriginX; + uint srcY = y + srcOriginY; + + uint dstX = x + dstOriginX; + uint dstY = y + dstOriginY; + + float4 color = read_imagef( srcImage, samplerInline, (int2)( srcX, srcY ) ); + + write_imagef( dstImage, (int2)( dstX, dstY ), color ); + } +} + +__kernel void CopyImage2Dto2DInt( + __read_only image2d_t srcImage, + __write_only image2d_t dstImage, + uint srcOriginX, + uint srcOriginY, + uint srcOriginZ, + uint dstOriginX, + uint dstOriginY, + uint dstOriginZ, + uint regionX, + uint regionY, + uint regionZ ) +{ + const sampler_t samplerInline = + CLK_NORMALIZED_COORDS_FALSE | + CLK_ADDRESS_CLAMP_TO_EDGE | + CLK_FILTER_NEAREST; + + uint x = get_global_id(0); + uint y = get_global_id(1); + + CHECK_IMAGE_BOUNDS() + { + uint srcX = x + srcOriginX; + uint srcY = y + srcOriginY; + + uint dstX = x + dstOriginX; + uint dstY = y + dstOriginY; + + int4 color = read_imagei( srcImage, samplerInline, (int2)( srcX, srcY ) ); + + write_imagei( dstImage, (int2)( dstX, dstY ), color ); + } +} + +__kernel void CopyImage2Dto2DUInt( + __read_only image2d_t srcImage, + __write_only image2d_t dstImage, + uint srcOriginX, + uint srcOriginY, + uint srcOriginZ, + uint dstOriginX, + uint dstOriginY, + uint dstOriginZ, + uint regionX, + uint regionY, + uint regionZ ) +{ + const sampler_t samplerInline = + CLK_NORMALIZED_COORDS_FALSE | + CLK_ADDRESS_CLAMP_TO_EDGE | + CLK_FILTER_NEAREST; + + uint x = get_global_id(0); + uint y = get_global_id(1); + + CHECK_IMAGE_BOUNDS() + { + uint srcX = x + srcOriginX; + uint srcY = y + srcOriginY; + + uint dstX = x + dstOriginX; + uint dstY = y + dstOriginY; + + uint4 color = read_imageui( srcImage, samplerInline, (int2)( srcX, srcY ) ); + + write_imageui( dstImage, (int2)( dstX, dstY ), color ); + } +} + +#endif // __IMAGE_SUPPORT__ \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..b685f825 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2018 Intel Corporation + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/OS/OS.h b/OS/OS.h new file mode 100644 index 00000000..27221600 --- /dev/null +++ b/OS/OS.h @@ -0,0 +1,33 @@ +/* +// Copyright (c) 2018 Intel Corporation +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +*/ + +#pragma once + +#if defined(_WIN32) +#include "OS_windows.h" +#elif defined(__linux__) +#include "OS_linux.h" +#elif defined(__APPLE__) +#include "OS_mac.h" +#else +#error Unknown OS! +#endif diff --git a/OS/OS_linux.cpp b/OS/OS_linux.cpp new file mode 100644 index 00000000..0f20d29e --- /dev/null +++ b/OS/OS_linux.cpp @@ -0,0 +1,36 @@ +/* +// Copyright (c) 2018 Intel Corporation +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +*/ + +#include "OS_linux.h" + +namespace OS +{ + +Services::Services( void* poGlobalData ) +{ +} + +Services::~Services() +{ +} + +} diff --git a/OS/OS_linux.h b/OS/OS_linux.h new file mode 100644 index 00000000..0acac699 --- /dev/null +++ b/OS/OS_linux.h @@ -0,0 +1,134 @@ +/* +// Copyright (c) 2018 Intel Corporation +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +*/ + +#pragma once + +#include "OS_linux_common.h" + +#include "CL/cl.h" // for clGetPlatformIDs + +namespace OS +{ + +class Services : public Services_Common +{ +public: + Services( void* pGlobalData ); + ~Services(); + + bool Init(); + + bool GetCLInterceptName( + std::string& name ) const; + + bool GetPrecompiledKernelString( + const char*& str, + size_t& length ) const; + + bool GetBuiltinKernelString( + const char*& str, + size_t& length ) const; + + bool ExecuteCommand( + const std::string& filename ) const; + bool StartAubCapture( + const std::string& fileName, + uint64_t delay ) const; + bool StopAubCapture( + uint64_t delay ) const; + +private: + DISALLOW_COPY_AND_ASSIGN( Services ); +}; + +inline bool Services::Init() +{ + return Services_Common::Init(); +} + +inline bool Services::GetCLInterceptName( + std::string& name ) const +{ + Dl_info info; + if( dladdr( (void*)clGetPlatformIDs, &info ) ) + { + name = info.dli_fname; + } + return false; +} + +#ifndef __ANDROID__ +extern "C" char _binary_Kernels_precompiled_kernels_cl_start; +extern "C" char _binary_Kernels_precompiled_kernels_cl_end; +#endif + +inline bool Services::GetPrecompiledKernelString( + const char*& str, + size_t& length ) const +{ +#ifndef __ANDROID__ + str = &_binary_Kernels_precompiled_kernels_cl_start; + length = &_binary_Kernels_precompiled_kernels_cl_end - &_binary_Kernels_precompiled_kernels_cl_start; +#endif + + return true; +} + +#ifndef __ANDROID__ +extern "C" char _binary_Kernels_builtin_kernels_cl_start; +extern "C" char _binary_Kernels_builtin_kernels_cl_end; +#endif + +inline bool Services::GetBuiltinKernelString( + const char*& str, + size_t& length ) const +{ +#ifndef __ANDROID__ + str = &_binary_Kernels_builtin_kernels_cl_start; + length = &_binary_Kernels_builtin_kernels_cl_end - &_binary_Kernels_builtin_kernels_cl_start; +#endif + + return true; +} + +inline bool Services::ExecuteCommand( const std::string& command ) const +{ + int res = system( command.c_str() ); + return res != -1; +} + +// TODO + +inline bool Services::StartAubCapture( + const std::string& fileName, + uint64_t delay ) const +{ + return false; +} + +inline bool Services::StopAubCapture( + uint64_t delay ) const +{ + return false; +} + +} diff --git a/OS/OS_linux_common.cpp b/OS/OS_linux_common.cpp new file mode 100644 index 00000000..5971b4fb --- /dev/null +++ b/OS/OS_linux_common.cpp @@ -0,0 +1,151 @@ +/* +// Copyright (c) 2018 Intel Corporation +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +*/ + +#include "OS_linux_common.h" +#ifdef __ANDROID__ +#include +#include +#include +#endif + +namespace OS +{ + +const char* Services_Common::ENV_PREFIX = ""; +const char* Services_Common::CONFIG_FILE = "config.conf"; +const char* Services_Common::LOG_DIR = NULL; + +Services_Common::Services_Common() +{ +} + +Services_Common::~Services_Common() +{ + pthread_mutex_destroy( &m_CriticalSection ); +} + +bool Services_Common::ReadRegistry( + const std::string& name, + void* pValue, + size_t size ) const +{ + // Look at environment variables first: + { + std::string envName(ENV_PREFIX); + envName += name; + const char *envVal = getenv(envName.c_str()); + if( ( envVal != NULL ) && ( size == sizeof(unsigned int) ) ) + { + unsigned int *puVal = (unsigned int *)pValue; + *puVal = atoi(envVal); + return true; + } + else if( ( envVal != NULL ) && ( strlen(envVal) < size ) ) + { + char* pStr = (char*)pValue; + strcpy( pStr, envVal ); + return true; + } + } + + // Look at the config file second: + bool found = false; + + std::ifstream is; + std::string s; + + std::string configFile; + + const char *envVal = getenv("HOME"); +#ifdef __ANDROID__ + // if ho HOME on Android then use sdcard folder + if( envVal == NULL ) + { + configFile = "/sdcard"; + } + else + { + configFile = envVal; + } +#else + configFile = envVal; +#endif + configFile += "/"; + configFile += CONFIG_FILE; + + is.open( configFile.c_str() ); + if( is.fail() ) + { +#ifdef __ANDROID__ + __android_log_print( ANDROID_LOG_WARN, "clIntercept", "Failed to open config file: %s\n", configFile.c_str() ); +#endif + return false; + } + + while( !is.eof() && !found ) + { + std::getline(is, s); + + // skip blank lines + if( s.length() == 0 ) + { + continue; + } + // skip "comment" lines + if( s.find(";") == 0 || s.find("#") == 0 || s.find("//") == 0 ) + { + continue; + } + + size_t pos = s.find('='); + if( pos != std::string::npos ) + { + std::string var = s.substr( 0, pos ); + var.erase(std::remove_if(var.begin(), var.end(), ::isspace), var.end()); + + std::string value = s.substr( pos + 1 ); + value.erase(std::remove_if(value.begin(), value.end(), ::isspace), value.end()); + + if( var == name ) + { + if( size == sizeof(unsigned int) ) + { + unsigned int* pUIValue = (unsigned int*)pValue; + std::istringstream iss(value); + iss >> pUIValue[0]; + found = true; + } + else if( value.length() < size ) + { + char* pStr = (char*)pValue; + strcpy( pStr, value.c_str() ); + found = true; + } + } + } + } + + is.close(); + return found; +} + +} diff --git a/OS/OS_linux_common.h b/OS/OS_linux_common.h new file mode 100644 index 00000000..d05b0460 --- /dev/null +++ b/OS/OS_linux_common.h @@ -0,0 +1,306 @@ +/* +// Copyright (c) 2018 Intel Corporation +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +*/ + +#pragma once + +#include "OS_timer.h" + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#ifdef __ANDROID__ +#include +#endif + +/*****************************************************************************\ + +MACRO: + DISALLOW_COPY_AND_ASSIGN + +Description: + A macro to disallow the copy constructor and operator= functions + This should be used in the private: declarations for a class + +\*****************************************************************************/ +#if !defined(DISALLOW_COPY_AND_ASSIGN) +#define DISALLOW_COPY_AND_ASSIGN( TypeName ) \ + TypeName(const TypeName&); \ + void operator=(const TypeName&) +#endif + +namespace OS +{ + +class Services_Common +{ +public: + static const char* ENV_PREFIX; + static const char* CONFIG_FILE; + static const char* LOG_DIR; + + Services_Common(); + ~Services_Common(); + + bool Init(); + + void EnterCriticalSection(); + void LeaveCriticalSection(); + + uint64_t GetProcessID() const; + uint64_t GetThreadID() const; + + std::string GetProcessName() const; + + bool ReadRegistry( + const std::string& name, + void* pValue, + size_t size ) const; + + void OutputDebugString( + const std::string& str ) const; + + uint64_t GetTimer() const; + uint64_t TickToNS( + uint64_t delta ) const; + + void* LoadLibrary( + const std::string& libraryName ) const; + void UnloadLibrary( + void*& pLibrary ) const; + + void* GetFunctionPointer( + void* pLibrary, + const std::string& functionName ) const; + + void GetDumpDirectoryName( + const std::string& subDir, + std::string& directoryName ) const; + void GetDumpDirectoryNameWithoutProcessName( + const std::string& subDir, + std::string& directoryName) const; + void MakeDumpDirectories( + const std::string& fileName ) const; + +private: + Timer m_Timer; + pthread_mutex_t m_CriticalSection; + + DISALLOW_COPY_AND_ASSIGN( Services_Common ); +}; + +inline bool Services_Common::Init() +{ + if( pthread_mutex_init( + &m_CriticalSection, + NULL ) ) + { + return false; + } + + return true; +} + +inline void Services_Common::EnterCriticalSection() +{ + pthread_mutex_lock( &m_CriticalSection ); +} + +inline void Services_Common::LeaveCriticalSection() +{ + pthread_mutex_unlock( &m_CriticalSection ); +} + +inline uint64_t Services_Common::GetProcessID() const +{ + return getpid(); +} + +inline uint64_t Services_Common::GetThreadID() const +{ + // TODO: Is this the thread ID we should be returning? + return pthread_self(); +} + +inline std::string Services_Common::GetProcessName() const +{ + char processName[ 1024 ]; + char* pProcessName = processName; + + size_t bytes = readlink( + "/proc/self/exe", + processName, + sizeof( processName ) - 1 ); + if( bytes ) + { + processName[ bytes ] = '\0'; + + pProcessName = strrchr( processName, '/' ); + pProcessName++; + } + else + { + strncpy( processName, "process.exe", sizeof( processName ) ); + processName[ sizeof( processName ) - 1 ] = 0; + } + + return std::string(pProcessName); +} + +inline void Services_Common::OutputDebugString( + const std::string& str ) const +{ + syslog( LOG_USER | LOG_INFO, "%s", str.c_str() ); +} + +inline uint64_t Services_Common::GetTimer() const +{ + return m_Timer.GetTimer(); +} + +inline uint64_t Services_Common::TickToNS( + uint64_t delta ) const +{ + return m_Timer.TickToNS( delta ); +} + +inline void* Services_Common::LoadLibrary( + const std::string& libraryName ) const +{ + void* pLibrary = dlopen( libraryName.c_str(), RTLD_NOW | RTLD_GLOBAL ); + if( pLibrary == NULL ) + { + fprintf(stderr, "dlopen() error: %s\n", dlerror()); + } + return pLibrary; +} + +inline void Services_Common::UnloadLibrary( + void*& pLibrary ) const +{ + dlclose( pLibrary ); + pLibrary = NULL; +} + +inline void* Services_Common::GetFunctionPointer( + void* pLibrary, + const std::string& functionName ) const +{ + if( pLibrary ) + { + return dlsym( pLibrary, functionName.c_str() ); + } + else + { + return dlsym( RTLD_NEXT, functionName.c_str() ); + } +} + +inline void Services_Common::GetDumpDirectoryName( + const std::string& subDir, + std::string& directoryName ) const +{ + // Get the home directory and add our directory name. + if( LOG_DIR ) + { + // Return log dir override if set in regkeys + directoryName = LOG_DIR; + } + else + { + { +#ifndef __ANDROID__ + directoryName = getenv("HOME"); +#else + const char *envVal = getenv("HOME"); + if( envVal == NULL ) + { + directoryName = "/sdcard/Intel"; + } + else + { + directoryName = envVal; + } +#endif + directoryName += "/"; + directoryName += subDir; + directoryName += "/"; + } + // Add the process name to the directory name. + directoryName += GetProcessName(); + } + +#ifdef __ANDROID__ + __android_log_print(ANDROID_LOG_INFO, "clIntercept", "dumpDir=%s\n", directoryName.c_str()); +#endif +} + +inline void Services_Common::GetDumpDirectoryNameWithoutProcessName( + const std::string& subDir, + std::string& directoryName) const +{ + // Get the home directory and add our directory name. + if( LOG_DIR ) + { + // Return log dir override if set in regkeys + directoryName = LOG_DIR; + } + else + { + directoryName = getenv("HOME"); + directoryName += "/"; + directoryName += subDir; + directoryName += "/"; + } +#ifdef __ANDROID__ + __android_log_print(ANDROID_LOG_INFO, "clIntercept", "dumpDir=%s\n", directoryName.c_str()); +#endif +} + +inline void Services_Common::MakeDumpDirectories( + const std::string& fileName ) const +{ + // The first directory name is the root. We don't + // have to make a directory for it. + std::string::size_type pos = fileName.find( "/" ); + + pos = fileName.find( "/", ++pos ); + while( pos != std::string::npos ) + { + mkdir( + fileName.substr( 0, pos ).c_str(), + 0777 ); + + pos = fileName.find( "/", ++pos ); + } +} + +} diff --git a/OS/OS_mac.cpp b/OS/OS_mac.cpp new file mode 100644 index 00000000..028e27bc --- /dev/null +++ b/OS/OS_mac.cpp @@ -0,0 +1,36 @@ +/* +// Copyright (c) 2018 Intel Corporation +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +*/ + +#include "OS_mac.h" + +namespace OS +{ + +Services::Services( void* poGlobalData ) +{ +} + +Services::~Services() +{ +} + +} diff --git a/OS/OS_mac.h b/OS/OS_mac.h new file mode 100644 index 00000000..8d574187 --- /dev/null +++ b/OS/OS_mac.h @@ -0,0 +1,114 @@ +/* +// Copyright (c) 2018 Intel Corporation +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +*/ +#pragma once + +#include "OS_mac_common.h" + +void CLIntercept_Load(void); + +namespace OS +{ + +class Services : public Services_Common +{ +public: + Services( void* pGlobalData ); + ~Services(); + + bool Init(); + + bool GetCLInterceptName( + std::string& name ) const; + + bool GetPrecompiledKernelString( + const char*& str, + size_t& length ) const; + + bool GetBuiltinKernelString( + const char*& str, + size_t& length ) const; + + bool ExecuteCommand( + const std::string& filename ) const; + bool StartAubCapture( + const std::string& fileName, + uint64_t delay ) const; + bool StopAubCapture( + uint64_t delay ) const; + +private: + DISALLOW_COPY_AND_ASSIGN( Services ); +}; + +inline bool Services::Init() +{ + return Services_Common::Init(); +} + +inline bool Services::GetCLInterceptName( + std::string& name ) const +{ + Dl_info info; + if( dladdr( (void*)CLIntercept_Load, &info ) ) + { + name = info.dli_fname; + } + return false; +} + +// TODO: We currently don't support any of the kernels overrides on OSX. + +inline bool Services::GetPrecompiledKernelString( + const char*& str, + size_t& length ) const +{ + return false; +} + +inline bool Services::GetBuiltinKernelString( + const char*& str, + size_t& length ) const +{ + return false; +} + +// TODO + +inline bool Services::ExecuteCommand( const std::string& command ) const +{ + return false; +} + +inline bool Services::StartAubCapture( + const std::string& fileName, + uint64_t delay ) const +{ + return false; +} + +inline bool Services::StopAubCapture( + uint64_t delay ) const +{ + return false; +} + +} diff --git a/OS/OS_mac_common.cpp b/OS/OS_mac_common.cpp new file mode 100644 index 00000000..ab848173 --- /dev/null +++ b/OS/OS_mac_common.cpp @@ -0,0 +1,130 @@ +/* +// Copyright (c) 2018 Intel Corporation +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +*/ + +#include "OS_mac_common.h" + +namespace OS +{ + +const char* Services_Common::ENV_PREFIX = ""; +const char* Services_Common::CONFIG_FILE = "config.conf"; +const char* Services_Common::LOG_DIR = NULL; + +Services_Common::Services_Common() +{ +} + +Services_Common::~Services_Common() +{ + pthread_mutex_destroy( &m_CriticalSection ); +} + +bool Services_Common::ReadRegistry( + const std::string& name, + void* pValue, + size_t size ) const +{ + // Look at environment variables first: + { + std::string envName(ENV_PREFIX); + envName += name; + const char *envVal = getenv(envName.c_str()); + if( ( envVal != NULL ) && ( size == sizeof(unsigned int) ) ) + { + unsigned int *puVal = (unsigned int *)pValue; + *puVal = atoi(envVal); + return true; + } + else if( ( envVal != NULL ) && ( strlen(envVal) < size ) ) + { + char* pStr = (char*)pValue; + strcpy( pStr, envVal ); + return true; + } + } + + // Look at the config file second: + bool found = false; + + std::ifstream is; + std::string s; + + std::string configFile; + + configFile = getenv("HOME"); + configFile += "/"; + configFile += CONFIG_FILE; + + is.open( configFile.c_str() ); + if( is.fail() ) + { + return false; + } + + while( !is.eof() && !found ) + { + std::getline(is, s); + + // skip blank lines + if( s.length() == 0 ) + { + continue; + } + // skip "comment" lines + if( s.find(";") == 0 || s.find("#") == 0 || s.find("//") == 0 ) + { + continue; + } + + size_t pos = s.find('='); + if( pos != std::string::npos ) + { + std::string var = s.substr( 0, pos ); + var.erase(remove_if(var.begin(), var.end(), ::isspace), var.end()); + + std::string value = s.substr( pos + 1 ); + value.erase(remove_if(value.begin(), value.end(), ::isspace), value.end()); + + if( var == name ) + { + if( size == sizeof(unsigned int) ) + { + unsigned int* pUIValue = (unsigned int*)pValue; + std::istringstream iss(value); + iss >> pUIValue[0]; + found = true; + } + else if( value.length() < size ) + { + char* pStr = (char*)pValue; + strcpy( pStr, value.c_str() ); + found = true; + } + } + } + } + + is.close(); + return found; +} + +} diff --git a/OS/OS_mac_common.h b/OS/OS_mac_common.h new file mode 100644 index 00000000..9ae62c7d --- /dev/null +++ b/OS/OS_mac_common.h @@ -0,0 +1,251 @@ +/* +// Copyright (c) 2018 Intel Corporation +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +*/ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +/*****************************************************************************\ + +MACRO: + DISALLOW_COPY_AND_ASSIGN + +Description: + A macro to disallow the copy constructor and operator= functions + This should be used in the private: declarations for a class + +\*****************************************************************************/ +#if !defined(DISALLOW_COPY_AND_ASSIGN) +#define DISALLOW_COPY_AND_ASSIGN( TypeName ) \ + TypeName(const TypeName&); \ + void operator=(const TypeName&) +#endif + +namespace OS +{ + +class Services_Common +{ +public: + static const char* ENV_PREFIX; + static const char* CONFIG_FILE; + static const char* LOG_DIR; + + Services_Common(); + ~Services_Common(); + + bool Init(); + + void EnterCriticalSection(); + void LeaveCriticalSection(); + + uint64_t GetThreadID() const; + + bool ReadRegistry( + const std::string& name, + void* pValue, + size_t size ) const; + + void OutputDebugString( + const std::string& str ) const; + + uint64_t GetTimer() const; + uint64_t TickToNS( + uint64_t delta ) const; + + void* LoadLibrary( + const std::string& libraryName ) const; + void UnloadLibrary( + void*& pLibrary ) const; + + void* GetFunctionPointer( + void* pLibrary, + const std::string& functionName ) const; + + void GetDumpDirectoryName( + const std::string& subDir, + std::string& directoryName ) const; + void GetDumpDirectoryNameWithoutProcessName( + const std::string& subDir, + std::string& directoryName) const; + void MakeDumpDirectories( + const std::string& fileName ) const; + +private: + pthread_mutex_t m_CriticalSection; + + DISALLOW_COPY_AND_ASSIGN( Services_Common ); +}; + +inline bool Services_Common::Init() +{ + if( pthread_mutex_init( + &m_CriticalSection, + NULL ) ) + { + return false; + } + + return true; +} + +inline void Services_Common::EnterCriticalSection() +{ + pthread_mutex_lock( &m_CriticalSection ); +} + +inline void Services_Common::LeaveCriticalSection() +{ + pthread_mutex_unlock( &m_CriticalSection ); +} + +inline uint64_t Services_Common::GetThreadID() const +{ + // TODO + return 0; +} + +inline void Services_Common::OutputDebugString( + const std::string& str ) const +{ + syslog( LOG_USER | LOG_INFO, "%s", str.c_str() ); +} + +inline uint64_t Services_Common::GetTimer() const +{ + timeval i; + gettimeofday( &i, NULL ); + return i.tv_sec * 1000000 + i.tv_usec; +} + +inline uint64_t Services_Common::TickToNS( + uint64_t delta ) const +{ + double ns = delta * 1000.0; + return (uint64_t)ns; +} + +inline void* Services_Common::LoadLibrary( + const std::string& libraryName ) const +{ + void* pLibrary = dlopen( libraryName.c_str(), RTLD_NOW ); + return pLibrary; +} + +inline void Services_Common::UnloadLibrary( + void*& pLibrary ) const +{ + dlclose( pLibrary ); + pLibrary = NULL; +} + +inline void* Services_Common::GetFunctionPointer( + void* pLibrary, + const std::string& functionName ) const +{ + if( pLibrary ) + { + return dlsym( pLibrary, functionName.c_str() ); + } + else + { + return dlsym( RTLD_NEXT, functionName.c_str() ); + } +} + +inline void Services_Common::GetDumpDirectoryName( + const std::string& subDir, + std::string& directoryName ) const +{ + // Get the home directory and add our directory name. + { + directoryName = getenv("HOME"); + directoryName += "/"; + directoryName += subDir; + directoryName += "/"; + } + // Add the process name to the directory name. + { + char processName[ 1024 ]; + char* pProcessName = processName; + + pid_t pid = getpid(); + int ret = proc_pidpath( pid, processName, sizeof(processName) ); + if( ret > 0 ) + { + pProcessName = strrchr( processName, '/' ); + } + else + { + strncpy( processName, "process.exe", sizeof( processName ) ); + processName[ sizeof( processName ) - 1 ] = 0; + } + + directoryName += pProcessName; + } +} + +inline void Services_Common::GetDumpDirectoryNameWithoutProcessName( + const std::string& subDir, + std::string& directoryName) const +{ + // Get the home directory and add our directory name. + { + directoryName = getenv("HOME"); + directoryName += "/"; + directoryName += subDir; + directoryName += "/"; + } +} + +inline void Services_Common::MakeDumpDirectories( + const std::string& fileName ) const +{ + // The first directory name is the root. We don't + // have to make a directory for it. + std::string::size_type pos = fileName.find( "/" ); + + pos = fileName.find( "/", ++pos ); + while( pos != std::string::npos ) + { + mkdir( + fileName.substr( 0, pos ).c_str(), + 0777 ); + + pos = fileName.find( "/", ++pos ); + } +} + +} diff --git a/OS/OS_mac_interpose.h b/OS/OS_mac_interpose.h new file mode 100644 index 00000000..49a08c74 --- /dev/null +++ b/OS/OS_mac_interpose.h @@ -0,0 +1,171 @@ +/* +// Copyright (c) 2018 Intel Corporation +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +*/ + +#pragma once + +#define CLINTERCEPT_DYLD_INTERPOSE(_funcname) \ + __attribute__((used)) static const struct \ + { \ + const void* replacement; \ + const void* replacee; \ + } \ + _interpose_ ## _funcname __attribute__ ((section ("__DATA,__interpose"))) = \ + { \ + (const void*)(unsigned long)&CLIRN( _funcname ), \ + (const void*)(unsigned long)&_funcname \ + }; + +CLINTERCEPT_DYLD_INTERPOSE(clGetPlatformIDs); +CLINTERCEPT_DYLD_INTERPOSE(clGetPlatformInfo); +CLINTERCEPT_DYLD_INTERPOSE(clGetDeviceIDs); +CLINTERCEPT_DYLD_INTERPOSE(clGetDeviceInfo); +CLINTERCEPT_DYLD_INTERPOSE(clCreateContext); +CLINTERCEPT_DYLD_INTERPOSE(clCreateContextFromType); +CLINTERCEPT_DYLD_INTERPOSE(clRetainContext); +CLINTERCEPT_DYLD_INTERPOSE(clReleaseContext); +CLINTERCEPT_DYLD_INTERPOSE(clGetContextInfo); +CLINTERCEPT_DYLD_INTERPOSE(clCreateCommandQueue); +CLINTERCEPT_DYLD_INTERPOSE(clRetainCommandQueue); +CLINTERCEPT_DYLD_INTERPOSE(clReleaseCommandQueue); +CLINTERCEPT_DYLD_INTERPOSE(clGetCommandQueueInfo); +CLINTERCEPT_DYLD_INTERPOSE(clSetCommandQueueProperty); +CLINTERCEPT_DYLD_INTERPOSE(clCreateBuffer); +CLINTERCEPT_DYLD_INTERPOSE(clCreateImage2D); +CLINTERCEPT_DYLD_INTERPOSE(clCreateImage3D); +CLINTERCEPT_DYLD_INTERPOSE(clRetainMemObject); +CLINTERCEPT_DYLD_INTERPOSE(clReleaseMemObject); +CLINTERCEPT_DYLD_INTERPOSE(clGetSupportedImageFormats); +CLINTERCEPT_DYLD_INTERPOSE(clGetMemObjectInfo); +CLINTERCEPT_DYLD_INTERPOSE(clGetImageInfo); +CLINTERCEPT_DYLD_INTERPOSE(clCreateSampler); +CLINTERCEPT_DYLD_INTERPOSE(clRetainSampler); +CLINTERCEPT_DYLD_INTERPOSE(clReleaseSampler); +CLINTERCEPT_DYLD_INTERPOSE(clGetSamplerInfo); +CLINTERCEPT_DYLD_INTERPOSE(clCreateProgramWithSource); +CLINTERCEPT_DYLD_INTERPOSE(clCreateProgramWithBinary); +CLINTERCEPT_DYLD_INTERPOSE(clRetainProgram); +CLINTERCEPT_DYLD_INTERPOSE(clReleaseProgram); +CLINTERCEPT_DYLD_INTERPOSE(clBuildProgram); +CLINTERCEPT_DYLD_INTERPOSE(clUnloadCompiler); +CLINTERCEPT_DYLD_INTERPOSE(clGetProgramInfo); +CLINTERCEPT_DYLD_INTERPOSE(clGetProgramBuildInfo); +CLINTERCEPT_DYLD_INTERPOSE(clCreateKernel); +CLINTERCEPT_DYLD_INTERPOSE(clCreateKernelsInProgram); +CLINTERCEPT_DYLD_INTERPOSE(clRetainKernel); +CLINTERCEPT_DYLD_INTERPOSE(clReleaseKernel); +CLINTERCEPT_DYLD_INTERPOSE(clSetKernelArg); +CLINTERCEPT_DYLD_INTERPOSE(clGetKernelInfo); +CLINTERCEPT_DYLD_INTERPOSE(clGetKernelWorkGroupInfo); +CLINTERCEPT_DYLD_INTERPOSE(clWaitForEvents); +CLINTERCEPT_DYLD_INTERPOSE(clGetEventInfo); +CLINTERCEPT_DYLD_INTERPOSE(clRetainEvent); +CLINTERCEPT_DYLD_INTERPOSE(clReleaseEvent); +CLINTERCEPT_DYLD_INTERPOSE(clGetEventProfilingInfo); +CLINTERCEPT_DYLD_INTERPOSE(clFlush); +CLINTERCEPT_DYLD_INTERPOSE(clFinish); +CLINTERCEPT_DYLD_INTERPOSE(clEnqueueReadBuffer); +CLINTERCEPT_DYLD_INTERPOSE(clEnqueueWriteBuffer); +CLINTERCEPT_DYLD_INTERPOSE(clEnqueueCopyBuffer); +CLINTERCEPT_DYLD_INTERPOSE(clEnqueueReadImage); +CLINTERCEPT_DYLD_INTERPOSE(clEnqueueWriteImage); +CLINTERCEPT_DYLD_INTERPOSE(clEnqueueCopyImage); +CLINTERCEPT_DYLD_INTERPOSE(clEnqueueCopyImageToBuffer); +CLINTERCEPT_DYLD_INTERPOSE(clEnqueueCopyBufferToImage); +CLINTERCEPT_DYLD_INTERPOSE(clEnqueueMapBuffer); +CLINTERCEPT_DYLD_INTERPOSE(clEnqueueMapImage); +CLINTERCEPT_DYLD_INTERPOSE(clEnqueueUnmapMemObject); +CLINTERCEPT_DYLD_INTERPOSE(clEnqueueNDRangeKernel); +CLINTERCEPT_DYLD_INTERPOSE(clEnqueueTask); +CLINTERCEPT_DYLD_INTERPOSE(clEnqueueNativeKernel); +CLINTERCEPT_DYLD_INTERPOSE(clEnqueueMarker); +CLINTERCEPT_DYLD_INTERPOSE(clEnqueueWaitForEvents); +CLINTERCEPT_DYLD_INTERPOSE(clEnqueueBarrier); + +// Optional features? +CLINTERCEPT_DYLD_INTERPOSE(clGetExtensionFunctionAddress); +CLINTERCEPT_DYLD_INTERPOSE(clGetExtensionFunctionAddressForPlatform); + +// OpenCL 1.1 Entry Points (optional) +CLINTERCEPT_DYLD_INTERPOSE(clCreateSubBuffer); +CLINTERCEPT_DYLD_INTERPOSE(clSetMemObjectDestructorCallback); +CLINTERCEPT_DYLD_INTERPOSE(clCreateUserEvent); +CLINTERCEPT_DYLD_INTERPOSE(clSetUserEventStatus); +CLINTERCEPT_DYLD_INTERPOSE(clSetEventCallback); +CLINTERCEPT_DYLD_INTERPOSE(clEnqueueReadBufferRect); +CLINTERCEPT_DYLD_INTERPOSE(clEnqueueWriteBufferRect); +CLINTERCEPT_DYLD_INTERPOSE(clEnqueueCopyBufferRect); + +// OpenCL 1.2 Entry Points (optional) +CLINTERCEPT_DYLD_INTERPOSE(clCompileProgram); +CLINTERCEPT_DYLD_INTERPOSE(clCreateFromGLTexture); +CLINTERCEPT_DYLD_INTERPOSE(clCreateImage); +CLINTERCEPT_DYLD_INTERPOSE(clCreateProgramWithBuiltInKernels); +CLINTERCEPT_DYLD_INTERPOSE(clCreateSubDevices); +CLINTERCEPT_DYLD_INTERPOSE(clEnqueueBarrierWithWaitList); +CLINTERCEPT_DYLD_INTERPOSE(clEnqueueFillBuffer); +CLINTERCEPT_DYLD_INTERPOSE(clEnqueueFillImage); +CLINTERCEPT_DYLD_INTERPOSE(clEnqueueMarkerWithWaitList); +CLINTERCEPT_DYLD_INTERPOSE(clEnqueueMigrateMemObjects); +CLINTERCEPT_DYLD_INTERPOSE(clGetKernelArgInfo); +CLINTERCEPT_DYLD_INTERPOSE(clLinkProgram); +CLINTERCEPT_DYLD_INTERPOSE(clReleaseDevice); +CLINTERCEPT_DYLD_INTERPOSE(clRetainDevice); +CLINTERCEPT_DYLD_INTERPOSE(clUnloadPlatformCompiler); + +// OpenCL 2.0 Entry Points (optional) +#if 0 +// Disabled for now, until Apple supports OpenCL 2.0. +CLINTERCEPT_DYLD_INTERPOSE(clSVMAlloc); +CLINTERCEPT_DYLD_INTERPOSE(clSVMFree); +CLINTERCEPT_DYLD_INTERPOSE(clEnqueueSVMFree); +CLINTERCEPT_DYLD_INTERPOSE(clEnqueueSVMMemcpy); +CLINTERCEPT_DYLD_INTERPOSE(clEnqueueSVMMemFill); +CLINTERCEPT_DYLD_INTERPOSE(clEnqueueSVMMap); +CLINTERCEPT_DYLD_INTERPOSE(clEnqueueSVMUnmap); +CLINTERCEPT_DYLD_INTERPOSE(clSetKernelArgSVMPointer); +CLINTERCEPT_DYLD_INTERPOSE(clSetKernelExecInfo); +CLINTERCEPT_DYLD_INTERPOSE(clCreatePipe); +CLINTERCEPT_DYLD_INTERPOSE(clGetPipeInfo); +CLINTERCEPT_DYLD_INTERPOSE(clCreateCommandQueueWithProperties); +CLINTERCEPT_DYLD_INTERPOSE(clCreateSamplerWithProperties); +#endif + +// OpenCL 2.1 Entry Points (optional) +#if 0 +// Disabled for now, until Apple supports OpenCL 2.1. +CLINTERCEPT_DYLD_INTERPOSE(clSetDefaultDeviceCommandQueue); +CLINTERCEPT_DYLD_INTERPOSE(clGetDeviceAndHostTimer); +CLINTERCEPT_DYLD_INTERPOSE(clGetHostTimer); +CLINTERCEPT_DYLD_INTERPOSE(clCreateProgramWithIL); +CLINTERCEPT_DYLD_INTERPOSE(clCloneKernel); +CLINTERCEPT_DYLD_INTERPOSE(clGetKernelSubGroupInfo); +CLINTERCEPT_DYLD_INTERPOSE(clEnqueueSVMMigrateMem); +#endif + +// CL-GL Entry Points (optional) +CLINTERCEPT_DYLD_INTERPOSE(clCreateFromGLBuffer); +CLINTERCEPT_DYLD_INTERPOSE(clCreateFromGLTexture2D); +CLINTERCEPT_DYLD_INTERPOSE(clCreateFromGLTexture3D); +CLINTERCEPT_DYLD_INTERPOSE(clCreateFromGLRenderbuffer); +CLINTERCEPT_DYLD_INTERPOSE(clGetGLObjectInfo); +CLINTERCEPT_DYLD_INTERPOSE(clEnqueueAcquireGLObjects); +CLINTERCEPT_DYLD_INTERPOSE(clEnqueueReleaseGLObjects); diff --git a/OS/OS_timer.h b/OS/OS_timer.h new file mode 100644 index 00000000..fac6dc9a --- /dev/null +++ b/OS/OS_timer.h @@ -0,0 +1,125 @@ +/* +// Copyright (c) 2018 Intel Corporation +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +*/ + +#pragma once + +#if defined(_WIN32) +#include +// Visual Studio 2008 doesn't support stdint.h, but +// Visual Studio 2010 does. When CLIntercept stops +// supporting Visual Studio 2008 we can remove this +// typedef and include stdint.h instead. +typedef unsigned __int64 uint64_t; +#elif defined(__linux__) +#include +#include +#include +#endif + +/*****************************************************************************\ + +MACRO: + DISALLOW_COPY_AND_ASSIGN + +Description: + A macro to disallow the copy constructor and operator= functions + This should be used in the private: declarations for a class + +\*****************************************************************************/ +#if !defined(DISALLOW_COPY_AND_ASSIGN) +#define DISALLOW_COPY_AND_ASSIGN( TypeName ) \ + TypeName(const TypeName&); \ + void operator=(const TypeName&) +#endif + +namespace OS +{ + +class Timer +{ +public: + Timer() {}; + ~Timer() {}; + + bool Init( void ); + + uint64_t GetTimer( void ) const; + uint64_t TickToNS( uint64_t delta ) const; + +private: +#if defined(_WIN32) + LARGE_INTEGER m_Freq; +#endif + + DISALLOW_COPY_AND_ASSIGN( Timer ); +}; + +inline bool Timer::Init( void ) +{ +#if defined(_WIN32) + if( ::QueryPerformanceFrequency( &m_Freq ) == FALSE ) + { + return false; + } +#endif + return true; +} + +inline uint64_t Timer::GetTimer( void ) const +{ +#if defined(_WIN32) + LARGE_INTEGER i; + ::QueryPerformanceCounter( &i ); + return (uint64_t)i.QuadPart; +#elif defined(__linux__) +#ifdef USE_OLD_TIMER + timeval i; + gettimeofday( &i, NULL ); + return i.tv_sec * 1000000 + i.tv_usec; +#else + struct timespec t; + clock_gettime( CLOCK_MONOTONIC, &t ); + return t.tv_sec * 1000000000 + t.tv_nsec; +#endif +#else +#error Need to implement Timer::GetTimer! +#endif +} + +inline uint64_t Timer::TickToNS( uint64_t delta ) const +{ +#if defined(_WIN32) + double ns = delta * ( 1000000000.0 / m_Freq.QuadPart ); + return (uint64_t)ns; +#elif defined(__linux__) +#ifdef USE_OLD_TIMER + double ns = delta * 1000.0; + return (uint64_t)ns; +#else + return delta; +#endif +#else +#error Need to implement Timer::TickToNS! +#endif +} + +} diff --git a/OS/OS_windows.cpp b/OS/OS_windows.cpp new file mode 100644 index 00000000..711a9785 --- /dev/null +++ b/OS/OS_windows.cpp @@ -0,0 +1,37 @@ +/* +// Copyright (c) 2018 Intel Corporation +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +*/ + +#include "OS_windows.h" + +namespace OS +{ + +Services::Services( void* pGlobalData ) +{ + m_hInstance = (HINSTANCE)pGlobalData; +} + +Services::~Services() +{ +} + +} \ No newline at end of file diff --git a/OS/OS_windows.h b/OS/OS_windows.h new file mode 100644 index 00000000..34712b22 --- /dev/null +++ b/OS/OS_windows.h @@ -0,0 +1,192 @@ +/* +// Copyright (c) 2018 Intel Corporation +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +*/ + +#pragma once + +#include "OS_windows_common.h" + +#include "resource/clIntercept_resource.h" + +namespace OS +{ + +class Services : public Services_Common +{ +public: + Services( void* pGlobalData ); + ~Services(); + + bool Init(); + + bool GetCLInterceptName( + std::string& name ) const; + + bool GetPrecompiledKernelString( + const char*& str, + size_t& length ) const; + + bool GetBuiltinKernelString( + const char*& str, + size_t& length ) const; + + bool ExecuteCommand( + const std::string& filename ) const; + bool StartAubCapture( + const std::string& fileName, + uint64_t delay ) const; + bool StopAubCapture( + uint64_t delay ) const; + +private: + HINSTANCE m_hInstance; + + DISALLOW_COPY_AND_ASSIGN( Services ); +}; + +inline bool Services::Init() +{ + if( m_hInstance == NULL ) + { + return false; + } + + return Services_Common::Init(); +} + +inline bool Services::GetCLInterceptName( + std::string& name ) const +{ + char dllName[ MAX_PATH ]; + + if( GetModuleFileNameA( m_hInstance, dllName, MAX_PATH - 1 ) ) + { + name = dllName; + return true; + } + + return false; +} + +inline bool Services::GetPrecompiledKernelString( + const char*& str, + size_t& length ) const +{ + bool success = false; + + HRSRC hrsrc = ::FindResource( + m_hInstance, + MAKEINTRESOURCE(IDR_TEXT_PRECOMPILED_KERNELS), + "TEXT" ); + + if( hrsrc != NULL ) + { + length = ::SizeofResource( + m_hInstance, + hrsrc ); + + HGLOBAL hres = ::LoadResource( + m_hInstance, + hrsrc ); + if( hres != NULL ) + { + void* pVoid = ::LockResource( hres ); + if( pVoid ) + { + str = (const char*)pVoid; + success = true; + } + } + } + + return success; +} + +inline bool Services::GetBuiltinKernelString( + const char*& str, + size_t& length ) const +{ + bool success = false; + + HRSRC hrsrc = ::FindResource( + m_hInstance, + MAKEINTRESOURCE(IDR_TEXT_BUILTIN_KERNELS), + "TEXT" ); + + if( hrsrc != NULL ) + { + length = ::SizeofResource( + m_hInstance, + hrsrc ); + + HGLOBAL hres = ::LoadResource( + m_hInstance, + hrsrc ); + if( hres != NULL ) + { + void* pVoid = ::LockResource( hres ); + if( pVoid ) + { + str = (const char*)pVoid; + success = true; + } + } + } + + return success; +} + +inline bool Services::ExecuteCommand( const std::string& command ) const +{ + int res = system( command.c_str() ); + return res != -1; +} + +inline bool Services::StartAubCapture( + const std::string& fileName, + uint64_t delay ) const +{ + if( delay ) + { + Sleep( (DWORD)delay ); + } + + std::string command = "kdc.exe " + fileName; + int res = system(command.c_str()); + //fprintf(stderr, "Running the command: %s returned %d\n", command.c_str(), res ); + return res != -1; +} + +inline bool Services::StopAubCapture( + uint64_t delay ) const +{ + if( delay ) + { + Sleep( (DWORD)delay ); + } + + std::string command = "kdc.exe -off"; + int res = system(command.c_str()); + //fprintf(stderr, "Running the command: %s returned %d\n", command.c_str(), res ); + return res != -1; +} + +} \ No newline at end of file diff --git a/OS/OS_windows_common.cpp b/OS/OS_windows_common.cpp new file mode 100644 index 00000000..0d2b723a --- /dev/null +++ b/OS/OS_windows_common.cpp @@ -0,0 +1,41 @@ +/* +// Copyright (c) 2018 Intel Corporation +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +*/ + +#include "OS_windows_common.h" + +namespace OS +{ + +const char* Services_Common::ENV_PREFIX = ""; +const char* Services_Common::REGISTRY_KEY = "SOFTWARE\\INTEL\\IGFX"; +const char* Services_Common::LOG_DIR = NULL; + +Services_Common::Services_Common() +{ +} + +Services_Common::~Services_Common() +{ + DeleteCriticalSection( &m_CriticalSection ); +} + +} \ No newline at end of file diff --git a/OS/OS_windows_common.h b/OS/OS_windows_common.h new file mode 100644 index 00000000..af6b8c0c --- /dev/null +++ b/OS/OS_windows_common.h @@ -0,0 +1,379 @@ +/* +// Copyright (c) 2018 Intel Corporation +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +*/ + +#pragma once + +#include "OS_timer.h" + +#include +#include + +/*****************************************************************************\ + +MACRO: + DISALLOW_COPY_AND_ASSIGN + +Description: + A macro to disallow the copy constructor and operator= functions + This should be used in the private: declarations for a class + +\*****************************************************************************/ +#if !defined(DISALLOW_COPY_AND_ASSIGN) +#define DISALLOW_COPY_AND_ASSIGN( TypeName ) \ + TypeName(const TypeName&); \ + void operator=(const TypeName&) +#endif + +// Visual Studio 2008 doesn't support stdint.h, but +// Visual Studio 2010 does. When CLIntercept stops +// supporting Visual Studio 2008 we can remove this +// typedef and include stdint.h instead. +typedef unsigned __int64 uint64_t; + +namespace OS +{ + +class Services_Common +{ +public: + static const char* ENV_PREFIX; + static const char* REGISTRY_KEY; + static const char* LOG_DIR; + + Services_Common(); + ~Services_Common(); + + bool Init(); + + void EnterCriticalSection(); + void LeaveCriticalSection(); + + uint64_t GetProcessID() const; + uint64_t GetThreadID() const; + + std::string GetProcessName() const; + + bool ReadRegistry( + const std::string& name, + void* pValue, + size_t size ) const; + + void OutputDebugString( + const std::string& str ) const; + + uint64_t GetTimer() const; + uint64_t TickToNS( + uint64_t delta ) const; + + void* LoadLibrary( + const std::string& libraryName ) const; + void UnloadLibrary( + void*& pLibrary ) const; + + void* GetFunctionPointer( + void* pLibrary, + const std::string& functionName ) const; + + void GetDumpDirectoryName( + const std::string& subDir, + std::string& directoryName ) const; + void GetDumpDirectoryNameWithoutProcessName( + const std::string& subDir, + std::string& directoryName) const; + void MakeDumpDirectories( + const std::string& fileName ) const; + +private: + Timer m_Timer; + CRITICAL_SECTION m_CriticalSection; + + DISALLOW_COPY_AND_ASSIGN( Services_Common ); +}; + +inline bool Services_Common::Init() +{ + if( m_Timer.Init() == false ) + { + return false; + } + + if( ::InitializeCriticalSectionAndSpinCount( + &m_CriticalSection, + 0x400 ) == FALSE ) + { + return false; + } + + return true; +} + +inline void Services_Common::EnterCriticalSection() +{ + ::EnterCriticalSection( &m_CriticalSection ); +} + +inline void Services_Common::LeaveCriticalSection() +{ + ::LeaveCriticalSection( &m_CriticalSection ); +} + +inline uint64_t Services_Common::GetProcessID() const +{ + return GetCurrentProcessId(); +} + +inline uint64_t Services_Common::GetThreadID() const +{ + return GetCurrentThreadId(); +} + +inline std::string Services_Common::GetProcessName() const +{ + char processName[ MAX_PATH ]; + char* pProcessName = processName; + + if( GetModuleFileNameA( NULL, processName, MAX_PATH - 1 ) ) + { + pProcessName = strrchr( processName, '\\' ); + pProcessName++; + } + else + { + strcpy_s( processName, MAX_PATH, "process.exe" ); + } + + return std::string(pProcessName); +} + +inline bool Services_Common::ReadRegistry( + const std::string& name, + void* pValue, + size_t size ) const +{ + // Look at environment variables first: + { + std::string envName(ENV_PREFIX); + envName += name; + + char* envVal = NULL; + size_t len = 0; + errno_t err = _dupenv_s( &envVal, &len, envName.c_str() ); + if( !err ) + { + if( ( envVal != NULL ) && ( size == sizeof(unsigned int) ) ) + { + unsigned int *puVal = (unsigned int *)pValue; + *puVal = atoi(envVal); + return true; + } + else if( ( envVal != NULL ) && ( strlen(envVal) < size ) ) + { + char* pStr = (char*)pValue; + strcpy_s( pStr, size, envVal ); + return true; + } + free( envVal ); + } + } + + LONG success = ERROR_SUCCESS; + HKEY cliKey; + + // Try HKEY_CURRENT_USER first. + + success = ::RegOpenKeyEx( + HKEY_CURRENT_USER, + REGISTRY_KEY, + 0, + KEY_READ, + &cliKey ); + if( ERROR_SUCCESS == success ) + { + DWORD dwSize = (DWORD)size; + + success = ::RegQueryValueEx( + cliKey, + name.c_str(), + NULL, + NULL, + (LPBYTE)pValue, + &dwSize ); + + ::RegCloseKey( cliKey ); + } + + // Only try HKEY_LOCAL_MACHINE if we didn't find the + // control in HKEY_CURRENT_USER. This way we maintain + // backwards compatibility with existing installations + // of CLIntercept, but controls in HKEY_CURRENT_USER + // "win". + + if( ERROR_SUCCESS != success ) + { + success = ::RegOpenKeyEx( + HKEY_LOCAL_MACHINE, + REGISTRY_KEY, + 0, + KEY_READ, + &cliKey ); + if( ERROR_SUCCESS == success ) + { + DWORD dwSize = (DWORD)size; + + success = ::RegQueryValueEx( + cliKey, + name.c_str(), + NULL, + NULL, + (LPBYTE)pValue, + &dwSize ); + + ::RegCloseKey( cliKey ); + } + } + + return ( ERROR_SUCCESS == success ); +} + +inline void Services_Common::OutputDebugString( + const std::string& str ) const +{ + ::OutputDebugString( str.c_str() ); +} + +inline uint64_t Services_Common::GetTimer() const +{ + return m_Timer.GetTimer(); +} + +inline uint64_t Services_Common::TickToNS( + uint64_t delta ) const +{ + return m_Timer.TickToNS( delta ); +} + +inline void* Services_Common::LoadLibrary( + const std::string& libraryName ) const +{ + HMODULE hModule = ::LoadLibraryA( libraryName.c_str() ); + return hModule; +} + +inline void Services_Common::UnloadLibrary( + void*& pLibrary ) const +{ + HMODULE hModule = (HMODULE)pLibrary; + ::FreeLibrary( hModule ); + pLibrary = NULL; +} + +inline void* Services_Common::GetFunctionPointer( + void* pLibrary, + const std::string& functionName ) const +{ + if( pLibrary ) + { + HMODULE hModule = (HMODULE)pLibrary; + return ::GetProcAddress( hModule, functionName.c_str() ); + } + else + { + return NULL; + } +} + +inline void Services_Common::GetDumpDirectoryName( + const std::string& subDir, + std::string& directoryName ) const +{ + // Return log dir override if set in regkeys + if( LOG_DIR ) + { + directoryName = LOG_DIR; + return; + } + + // Get the system root and add our directory name. + { + char* systemDrive = NULL; + size_t length = 0; + + _dupenv_s( &systemDrive, &length, "SystemDrive" ); + + directoryName = systemDrive; + directoryName += "/Intel/"; + directoryName += subDir; + directoryName += "/"; + + free( systemDrive ); + } + + // Add the process name to the directory name. + directoryName += GetProcessName(); +} + +inline void Services_Common::GetDumpDirectoryNameWithoutProcessName( + const std::string& subDir, + std::string& directoryName) const +{ + // Return log dir override if set in regkeys + if( LOG_DIR ) + { + directoryName = LOG_DIR; + return; + } + + // Get the system root and add our directory name. + { + char* systemDrive = NULL; + size_t length = 0; + + _dupenv_s(&systemDrive, &length, "SystemDrive"); + + directoryName = systemDrive; + directoryName += "/Intel/"; + directoryName += subDir; + directoryName += "/"; + + free(systemDrive); + } +} + +inline void Services_Common::MakeDumpDirectories( + const std::string& fileName ) const +{ + // The first directory name is the root. We don't + // have to make a directory for it. + std::string::size_type pos = fileName.find( "/" ); + + pos = fileName.find( "/", ++pos ); + while( pos != std::string::npos ) + { + CreateDirectoryA( + fileName.substr( 0, pos ).c_str(), + NULL ); + + pos = fileName.find( "/", ++pos ); + } +} + +} diff --git a/README.md b/README.md new file mode 100644 index 00000000..2d69769e --- /dev/null +++ b/README.md @@ -0,0 +1,82 @@ +# Intercept Layer for OpenCL™ Applications + +The Intercept Layer for OpenCL Applications is a tool that can intercept +and modify OpenCL calls for debugging and performance analysis. Using the +Intercept Layer for OpenCL Applications requires no application or driver +modifications. + +To operate, the Intercept Layer for OpenCL Applications masquerades as the +OpenCL ICD loader (usually) or as an OpenCL implementation (rarely) and is +loaded when the application intends to load the real OpenCL ICD loader. As +part of the Intercept Layer for OpenCL Application's initialization, it loads +the real OpenCL ICD loader and gets function pointers to the real OpenCL +entry points. Then, whenever the application makes an OpenCL call, the call +is intercepted and can be passed through to the real OpenCL with or without +changes. + +This project adheres to the Intercept Layer for OpenCL Application's +[code of conduct](CODE_OF_CONDUCT.md). By participating, you are expected to +uphold this code. + +## Documentation + +All controls are documented [here](docs/controls.md). + +Instructions to build the Intercept Layer for OpenCL Applications can be found [here](docs/build.md). + +Instructions to install the Intercept Layer for OpenCL Applications can be found [here](docs/install.md). + +Detailed instructions: +* [How to Inject Modified Programs](docs/injecting_programs.md) +* [How to Use the Intercept Layer for OpenCL Applications with VTune](docs/vtune_logging.md) +* [How to Use the Intercept Layer for OpenCL Applications with Chrome](docs/chrome_tracing.md) + +## License + +The Intercept Layer for OpenCL Applications is licensed under the [MIT License](LICENSE). + +Notes: + +* These files are partially generated and hence do not include license file headers, however + they are also licensed under the MIT License: + - [resource/clIntercept.rc](resource/clIntercept.rc) + - [resource/clIntercept_resource.h](resource/clIntercept_resource.h) + - [config/CLIConfig.rc](config/CLIConfig.rc) + - [config/resource.h](config/resource.h) + +### Attached Licenses + +The Intercept Layer for OpenCL Applications uses third-party code licensed under the following licenses: + +* These files are licensed under the [Khronos(tm) License][khronos_cl_license]: + - [CL/cl.h](CL/cl.h) + - [CL/cl_gl.h](CL/cl_gl.h) + - [CL/cl_platform.h](CL/cl_platform.h) + - [GL/glcorearb.h](GL/glcorearb.h) +* These files are licensed under the [Boost Software License - Version 1.0][boost_license]: + - [cmake_modules/GetGitRevisionDescription.cmake](cmake_modules/GetGitRevisionDescription.cmake) + - [cmake_modules/GetGitRevisionDescription.cmake.in](cmake_modules/GetGitRevisionDescription.cmake.in) + +## Support + +Please file a GitHub issue to report an issue or ask questions. Private or +sensitive issues may be submitted via email to this project's maintainer +(Ben Ashbaugh - ben 'dot' ashbaugh 'at' intel 'dot' com), or to any other +Intel GitHub maintainer (see profile for email address). + +## How to Contribute + +Contributions to the Intercept Layer for OpenCL Applications are welcomed and +encouraged. Please see [CONTRIBUTING](CONTRIBUTING.md) for details how to +contribute to the project. + +--- + +OpenCL and the OpenCL logo are trademarks of Apple Inc. used by permission by Khronos. + +\* Other names and brands may be claimed as the property of others. + +Copyright (c) 2018, Intel(R) Corporation + +[khronos_cl_license]: https://github.com/KhronosGroup/OpenCL-Headers/blob/master/LICENSE +[boost_license]: http://www.boost.org/LICENSE_1_0.txt diff --git a/Src/clIntercept.def b/Src/clIntercept.def new file mode 100644 index 00000000..f801b775 --- /dev/null +++ b/Src/clIntercept.def @@ -0,0 +1,140 @@ +; Copyright (c) 2018 Intel Corporation +; +; Permission is hereby granted, free of charge, to any person obtaining a copy +; of this software and associated documentation files (the "Software"), to deal +; in the Software without restriction, including without limitation the rights +; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +; copies of the Software, and to permit persons to whom the Software is +; furnished to do so, subject to the following conditions: +; +; The above copyright notice and this permission notice shall be included in all +; copies or substantial portions of the Software. +; +; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +; SOFTWARE. + +LIBRARY opencl.dll +EXPORTS + clBuildProgram + clCloneKernel + clCompileProgram + clCreateBuffer + clCreateCommandQueue + clCreateCommandQueueWithProperties + clCreateContext + clCreateContextFromType + clCreateFromGLBuffer + clCreateFromGLRenderbuffer + clCreateFromGLTexture + clCreateFromGLTexture2D + clCreateFromGLTexture3D + clCreateImage + clCreateImage2D + clCreateImage3D + clCreateKernel + clCreateKernelsInProgram + clCreatePipe + clCreateProgramWithBinary + clCreateProgramWithBuiltInKernels + clCreateProgramWithIL + clCreateProgramWithSource + clCreateSampler + clCreateSamplerWithProperties + clCreateSubBuffer + clCreateSubDevices + clCreateUserEvent + clEnqueueAcquireGLObjects + clEnqueueBarrier + clEnqueueBarrierWithWaitList + clEnqueueCopyBuffer + clEnqueueCopyBufferRect + clEnqueueCopyBufferToImage + clEnqueueCopyImage + clEnqueueCopyImageToBuffer + clEnqueueFillBuffer + clEnqueueFillImage + clEnqueueMapBuffer + clEnqueueMapImage + clEnqueueMarker + clEnqueueMarkerWithWaitList + clEnqueueMigrateMemObjects + clEnqueueNDRangeKernel + clEnqueueNativeKernel + clEnqueueReadBuffer + clEnqueueReadBufferRect + clEnqueueReadImage + clEnqueueReleaseGLObjects + clEnqueueSVMFree + clEnqueueSVMMap + clEnqueueSVMMemcpy + clEnqueueSVMMemFill + clEnqueueSVMMigrateMem + clEnqueueSVMUnmap + clEnqueueTask + clEnqueueUnmapMemObject + clEnqueueWaitForEvents + clEnqueueWriteBuffer + clEnqueueWriteBufferRect + clEnqueueWriteImage + clFinish + clFlush + clGetCommandQueueInfo + clGetContextInfo + clGetDeviceAndHostTimer + clGetDeviceIDs + clGetDeviceInfo + clGetEventInfo + clGetEventProfilingInfo + clGetExtensionFunctionAddress + clGetExtensionFunctionAddressForPlatform + clGetGLObjectInfo + clGetGLTextureInfo + clGetHostTimer + clGetImageInfo + clGetKernelArgInfo + clGetKernelInfo + clGetKernelSubGroupInfo + clGetKernelWorkGroupInfo + clGetMemObjectInfo + clGetPipeInfo + clGetPlatformIDs + clGetPlatformInfo + clGetProgramBuildInfo + clGetProgramInfo + clGetSamplerInfo + clGetSupportedImageFormats + clLinkProgram + clReleaseCommandQueue + clReleaseContext + clReleaseDevice + clReleaseEvent + clReleaseKernel + clReleaseMemObject + clReleaseProgram + clReleaseSampler + clRetainCommandQueue + clRetainContext + clRetainDevice + clRetainEvent + clRetainKernel + clRetainMemObject + clRetainProgram + clRetainSampler + clSetCommandQueueProperty + clSetDefaultDeviceCommandQueue + clSetEventCallback + clSetKernelArg + clSetKernelArgSVMPointer + clSetKernelExecInfo + clSetMemObjectDestructorCallback + clSetUserEventStatus + clSVMAlloc + clSVMFree + clUnloadCompiler + clUnloadPlatformCompiler + clWaitForEvents \ No newline at end of file diff --git a/Src/clIntercept.map b/Src/clIntercept.map new file mode 100644 index 00000000..515906f2 --- /dev/null +++ b/Src/clIntercept.map @@ -0,0 +1,175 @@ +/* +// Copyright (c) 2018 Intel Corporation +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +*/ + +INTERNAL { + global: + _binary_Kernels_builtin_kernels_cl_start; + _binary_Kernels_builtin_kernels_cl_end; + _binary_Kernels_precompiled_kernels_cl_start; + _binary_Kernels_precompiled_kernels_cl_end; + local: + *; +}; + +OPENCL_1.0 { + global: + clBuildProgram; + clCreateBuffer; + clCreateCommandQueue; + clCreateContext; + clCreateContextFromType; + clCreateFromGLBuffer; + clCreateFromGLRenderbuffer; + clCreateFromGLTexture2D; + clCreateFromGLTexture3D; + clCreateImage2D; + clCreateImage3D; + clCreateKernel; + clCreateKernelsInProgram; + clCreateProgramWithBinary; + clCreateProgramWithSource; + clCreateSampler; + clEnqueueAcquireGLObjects; + clEnqueueBarrier; + clEnqueueCopyBuffer; + clEnqueueCopyBufferToImage; + clEnqueueCopyImage; + clEnqueueCopyImageToBuffer; + clEnqueueMapBuffer; + clEnqueueMapImage; + clEnqueueMarker; + clEnqueueNDRangeKernel; + clEnqueueNativeKernel; + clEnqueueReadBuffer; + clEnqueueReadImage; + clEnqueueReleaseGLObjects; + clEnqueueTask; + clEnqueueUnmapMemObject; + clEnqueueWaitForEvents; + clEnqueueWriteBuffer; + clEnqueueWriteImage; + clFinish; + clFlush; + clGetCommandQueueInfo; + clGetContextInfo; + clGetDeviceIDs; + clGetDeviceInfo; + clGetEventInfo; + clGetEventProfilingInfo; + clGetExtensionFunctionAddress; + clGetGLObjectInfo; + clGetGLTextureInfo; + clGetImageInfo; + clGetKernelInfo; + clGetKernelWorkGroupInfo; + clGetMemObjectInfo; + clGetPlatformIDs; + clGetPlatformInfo; + clGetProgramBuildInfo; + clGetProgramInfo; + clGetSamplerInfo; + clGetSupportedImageFormats; + clReleaseCommandQueue; + clReleaseContext; + clReleaseEvent; + clReleaseKernel; + clReleaseMemObject; + clReleaseProgram; + clReleaseSampler; + clRetainCommandQueue; + clRetainContext; + clRetainEvent; + clRetainKernel; + clRetainMemObject; + clRetainProgram; + clRetainSampler; + clSetCommandQueueProperty; + clSetKernelArg; + clUnloadCompiler; + clWaitForEvents; +} INTERNAL; + +OPENCL_1.1 { + global: + clCreateSubBuffer; + clCreateUserEvent; + clEnqueueCopyBufferRect; + clEnqueueReadBufferRect; + clEnqueueWriteBufferRect; + clSetEventCallback; + clSetMemObjectDestructorCallback; + clSetUserEventStatus; +} OPENCL_1.0; + +OPENCL_1.2 { + global: + clCompileProgram; + clCreateFromGLTexture; + clCreateImage; + clCreateProgramWithBuiltInKernels; + clCreateSubDevices; + clEnqueueBarrierWithWaitList; + clEnqueueFillBuffer; + clEnqueueFillImage; + clEnqueueMarkerWithWaitList; + clEnqueueMigrateMemObjects; + clGetExtensionFunctionAddressForPlatform; + clGetKernelArgInfo; + clLinkProgram; + clReleaseDevice; + clRetainDevice; + clUnloadPlatformCompiler; +} OPENCL_1.1; + +OPENCL_2.0 { + global: + clCreateCommandQueueWithProperties; + clCreatePipe; + clCreateSamplerWithProperties; + clEnqueueSVMFree; + clEnqueueSVMMap; + clEnqueueSVMMemcpy; + clEnqueueSVMMemFill; + clEnqueueSVMUnmap; + clGetPipeInfo; + clSetKernelArgSVMPointer; + clSetKernelExecInfo; + clSVMAlloc; + clSVMFree; +} OPENCL_1.2; + +OPENCL_2.1 { + global: + clCloneKernel; + clCreateProgramWithIL; + clEnqueueSVMMigrateMem; + clGetDeviceAndHostTimer; + clGetHostTimer; + clGetKernelSubGroupInfo; + clSetDefaultDeviceCommandQueue; +} OPENCL_2.0; + +OPENCL_2.2 { + global: + clSetProgramReleaseCallback; + clSetProgramSpecializationConstant; +} OPENCL_2.1; diff --git a/Src/cli_ext.h b/Src/cli_ext.h new file mode 100644 index 00000000..ad5f708d --- /dev/null +++ b/Src/cli_ext.h @@ -0,0 +1,701 @@ +/* +// Copyright (c) 2018 Intel Corporation +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +*/ + +#pragma once + +// cl_khr_gl_sharing +extern CL_API_ENTRY +cl_int CL_API_CALL clGetGLContextInfoKHR( + const cl_context_properties *properties, + cl_gl_context_info param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret); + +// cl_khr_gl_event +#define CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR 0x200D + +// cl_khr_gl_event +extern CL_API_ENTRY +cl_event CL_API_CALL clCreateEventFromGLsyncKHR( + cl_context context, + cl_GLsync sync, + cl_int* errcode_ret); + +#if defined(_WIN32) + +// Minimal set of types for cl_khr_d3d10_sharing. +// Don't include cl_d3d10.h here because we don't want a dependency on d3d10.h. +typedef cl_uint cl_d3d10_device_source_khr; +typedef cl_uint cl_d3d10_device_set_khr; +class ID3D10Buffer; +class ID3D10Texture2D; +class ID3D10Texture3D; + +// cl_khr_d3d10_sharing +extern CL_API_ENTRY +cl_int CL_API_CALL clGetDeviceIDsFromD3D10KHR( + cl_platform_id platform, + cl_d3d10_device_source_khr d3d_device_source, + void* d3d_object, + cl_d3d10_device_set_khr d3d_device_set, + cl_uint num_entries, + cl_device_id* devices, + cl_uint* num_devices); + +// cl_khr_d3d10_sharing +extern CL_API_ENTRY +cl_mem CL_API_CALL clCreateFromD3D10BufferKHR( + cl_context context, + cl_mem_flags flags, + ID3D10Buffer* resource, + cl_int* errcode_ret); + +// cl_khr_d3d10_sharing +extern CL_API_ENTRY +cl_mem CL_API_CALL clCreateFromD3D10Texture2DKHR( + cl_context context, + cl_mem_flags flags, + ID3D10Texture2D* resource, + UINT subresource, + cl_int* errcode_ret); + +// cl_khr_d3d10_sharing +extern CL_API_ENTRY +cl_mem CL_API_CALL clCreateFromD3D10Texture3DKHR( + cl_context context, + cl_mem_flags flags, + ID3D10Texture3D* resource, + UINT subresource, + cl_int* errcode_ret); + +// cl_khr_d3d10_sharing +extern CL_API_ENTRY +cl_int CL_API_CALL clEnqueueAcquireD3D10ObjectsKHR( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event); + +// cl_khr_d3d10_sharing +extern CL_API_ENTRY +cl_int CL_API_CALL clEnqueueReleaseD3D10ObjectsKHR( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event); + +// Minimal set of types for cl_khr_d3d11_sharing. +// Don't include cl_d3d11.h here because we don't want a dependency on d3d10.h. +typedef cl_uint cl_d3d11_device_source_khr; +typedef cl_uint cl_d3d11_device_set_khr; +class ID3D11Buffer; +class ID3D11Texture2D; +class ID3D11Texture3D; + +// cl_khr_d3d11_sharing +extern CL_API_ENTRY +cl_int CL_API_CALL clGetDeviceIDsFromD3D11KHR( + cl_platform_id platform, + cl_d3d11_device_source_khr d3d_device_source, + void* d3d_object, + cl_d3d11_device_set_khr d3d_device_set, + cl_uint num_entries, + cl_device_id* devices, + cl_uint* num_devices); + +// cl_khr_d3d11_sharing +extern CL_API_ENTRY +cl_mem CL_API_CALL clCreateFromD3D11BufferKHR( + cl_context context, + cl_mem_flags flags, + ID3D11Buffer* resource, + cl_int* errcode_ret); + +// cl_khr_d3d11_sharing +extern CL_API_ENTRY +cl_mem CL_API_CALL clCreateFromD3D11Texture2DKHR( + cl_context context, + cl_mem_flags flags, + ID3D11Texture2D* resource, + UINT subresource, + cl_int* errcode_ret); + +// cl_khr_d3d11_sharing +extern CL_API_ENTRY +cl_mem CL_API_CALL clCreateFromD3D11Texture3DKHR( + cl_context context, + cl_mem_flags flags, + ID3D11Texture3D* resource, + UINT subresource, + cl_int* errcode_ret); + +// cl_khr_d3d11_sharing +extern CL_API_ENTRY +cl_int CL_API_CALL clEnqueueAcquireD3D11ObjectsKHR( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event); + +// cl_khr_d3d11_sharing +extern CL_API_ENTRY +cl_int CL_API_CALL clEnqueueReleaseD3D11ObjectsKHR( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event); + +// Minimal set of types for cl_khr_dx9_media_sharing. +// Don't include cl_d3d9.h here because we don't want a dependency on d3d9.h. +typedef cl_uint cl_dx9_media_adapter_type_khr; +typedef cl_uint cl_dx9_media_adapter_set_khr; +typedef cl_uint cl_dx9_media_adapter_type_khr; +class IDirect3DSurface9; + +// cl_khr_dx9_media_sharing +extern CL_API_ENTRY +cl_int CL_API_CALL clGetDeviceIDsFromDX9MediaAdapterKHR( + cl_platform_id platform, + cl_uint num_media_adapters, + cl_dx9_media_adapter_type_khr* media_adapters_type, + void* media_adapters, + cl_dx9_media_adapter_set_khr media_adapter_set, + cl_uint num_entries, + cl_device_id* devices, + cl_uint* num_devices); + +// cl_khr_dx9_media_sharing +extern CL_API_ENTRY +cl_mem CL_API_CALL clCreateFromDX9MediaSurfaceKHR( + cl_context context, + cl_mem_flags flags, + cl_dx9_media_adapter_type_khr adapter_type, + void* surface_info, + cl_uint plane, + cl_int* errcode_ret); + +// cl_khr_dx9_media_sharing +extern CL_API_ENTRY +cl_int CL_API_CALL clEnqueueAcquireDX9MediaSurfacesKHR( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event); + +// cl_khr_dx9_media_sharing +extern CL_API_ENTRY +cl_int CL_API_CALL clEnqueueReleaseDX9MediaSurfacesKHR( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event); + +// Minimal set of types for cl_intel_d3d9_media_sharing. +// Don't include cl_d3d9.h here because we don't want a dependency on d3d9.h. +typedef cl_uint cl_dx9_device_source_intel; +typedef cl_uint cl_dx9_device_set_intel; +class IDirect3DSurface9; + +// cl_intel_dx9_media_sharing +extern CL_API_ENTRY +cl_int CL_API_CALL clGetDeviceIDsFromDX9INTEL( + cl_platform_id platform, + cl_dx9_device_source_intel d3d_device_source, + void *dx9_object, + cl_dx9_device_set_intel d3d_device_set, + cl_uint num_entries, + cl_device_id* devices, + cl_uint* num_devices ); + +// cl_intel_dx9_media_sharing +extern CL_API_ENTRY +cl_mem CL_API_CALL clCreateFromDX9MediaSurfaceINTEL( + cl_context context, + cl_mem_flags flags, + IDirect3DSurface9* resource, + HANDLE sharedHandle, + UINT plane, + cl_int* errcode_ret ); + +// cl_intel_dx9_media_sharing +extern CL_API_ENTRY +cl_int CL_API_CALL clEnqueueAcquireDX9ObjectsINTEL( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event ); + +// cl_intel_dx9_media_sharing +extern CL_API_ENTRY +cl_int CL_API_CALL clEnqueueReleaseDX9ObjectsINTEL( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event ); + +#endif + +// cl_khr_il_program +#define CL_DEVICE_IL_VERSION_KHR 0x105B +#define CL_PROGRAM_IL_KHR 0x1169 +extern CL_API_ENTRY +cl_program CL_API_CALL clCreateProgramWithILKHR( + cl_context context, + const void* il, + size_t length, + cl_int* errcode_ret ); + +// cl_khr_subgroups +typedef cl_uint cl_kernel_sub_group_info; +extern CL_API_ENTRY +cl_int CL_API_CALL clGetKernelSubGroupInfoKHR( + cl_kernel kernel, + cl_device_id device, + cl_kernel_sub_group_info param_name, + size_t input_value_size, + const void* input_value, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret); + +// cl_khr_create_command_queue +typedef cl_bitfield cl_queue_properties_khr; +extern CL_API_ENTRY +cl_command_queue CL_API_CALL clCreateCommandQueueWithPropertiesKHR( + cl_context context, + cl_device_id device, + const cl_queue_properties_khr* properties, + cl_int* errcode_ret); + +// Unofficial MDAPI extension: +extern CL_API_ENTRY +cl_command_queue CL_API_CALL clCreatePerfCountersCommandQueueINTEL( + cl_context context, + cl_device_id device, + cl_command_queue_properties properties, + cl_uint configuration, + cl_int* errcode_ret); + +extern CL_API_ENTRY +cl_int CL_API_CALL clSetPerformanceConfigurationINTEL( + cl_device_id device, + cl_uint count, + cl_uint* offsets, + cl_uint* values ); + +// Unofficial kernel profiling extension: +#define CL_CONTEXT_KERNEL_PROFILING_MODES_COUNT_INTEL 0x407A +#define CL_CONTEXT_KERNEL_PROFILING_MODE_INFO_INTEL 0x407B +#define CL_KERNEL_IL_SYMBOLS_INTEL 0x407C +#define CL_KERNEL_BINARY_PROGRAM_INTEL 0x407D + +// Unofficial VTune Debug Info extension: +#define CL_PROGRAM_DEBUG_INFO_INTEL 0x4100 +#define CL_PROGRAM_DEBUG_INFO_SIZES_INTEL 0x4101 +#define CL_KERNEL_BINARIES_INTEL 0x4102 +#define CL_KERNEL_BINARY_SIZES_INTEL 0x4103 + +// VME + +typedef struct _cl_accelerator_intel* cl_accelerator_intel; +typedef cl_uint cl_accelerator_type_intel; +typedef cl_uint cl_accelerator_info_intel; + +// Error Codes +#define CL_INVALID_ACCELERATOR_INTEL -1094 +#define CL_INVALID_ACCELERATOR_TYPE_INTEL -1095 +#define CL_INVALID_ACCELERATOR_DESC_INTEL -1096 +#define CL_ACCELERATOR_TYPE_NOT_SUPPORTED_INTEL -1097 + +// cl_device_info +#define CL_DEVICE_ME_VERSION_INTEL 0x407E +#define CL_DEVICE_TRANSFORM_MASK_MAX_WIDTH_INTEL 0x409C +#define CL_DEVICE_TRANSFORM_MASK_MAX_HEIGHT_INTEL 0x409D +#define CL_DEVICE_TRANSFORM_FILTER_MAX_WIDTH_INTEL 0x409E +#define CL_DEVICE_TRANSFORM_FILTER_MAX_HEIGHT_INTEL 0x409F + +// cl_accelerator_type_intel +#define CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL 0x0 + +// cl_accelerator_info_intel +#define CL_ACCELERATOR_DESCRIPTOR_INTEL 0x4090 +#define CL_ACCELERATOR_REFERENCE_COUNT_INTEL 0x4091 +#define CL_ACCELERATOR_CONTEXT_INTEL 0x4092 +#define CL_ACCELERATOR_TYPE_INTEL 0x4093 + +// cl_motion_detect_desc_intel flags +#define CL_ME_MB_TYPE_16x16_INTEL 0x0 +#define CL_ME_MB_TYPE_8x8_INTEL 0x1 +#define CL_ME_MB_TYPE_4x4_INTEL 0x2 + +#define CL_ME_SUBPIXEL_MODE_INTEGER_INTEL 0x0 +#define CL_ME_SUBPIXEL_MODE_HPEL_INTEL 0x1 +#define CL_ME_SUBPIXEL_MODE_QPEL_INTEL 0x2 + +#define CL_ME_SAD_ADJUST_MODE_NONE_INTEL 0x0 +#define CL_ME_SAD_ADJUST_MODE_HAAR_INTEL 0x1 + +#define CL_ME_SEARCH_PATH_RADIUS_2_2_INTEL 0x0 +#define CL_ME_SEARCH_PATH_RADIUS_4_4_INTEL 0x1 +#define CL_ME_SEARCH_PATH_RADIUS_16_12_INTEL 0x5 + +#define CL_ME_CHROMA_INTRA_PREDICT_ENABLED_INTEL 0x1 +#define CL_ME_LUMA_INTRA_PREDICT_ENABLED_INTEL 0x2 + +#define CL_ME_COST_PENALTY_NONE_INTEL 0x0 +#define CL_ME_COST_PENALTY_LOW_INTEL 0x1 +#define CL_ME_COST_PENALTY_NORMAL_INTEL 0x2 +#define CL_ME_COST_PENALTY_HIGH_INTEL 0x3 + +#define CL_ME_COST_PRECISION_QPEL_INTEL 0x0 +#define CL_ME_COST_PRECISION_HPEL_INTEL 0x1 +#define CL_ME_COST_PRECISION_PEL_INTEL 0x2 +#define CL_ME_COST_PRECISION_DPEL_INTEL 0x3 + +#define CL_ME_VERSION_LEGACY_INTEL 0x0 +#define CL_ME_VERSION_ADVANCED_VER_1_INTEL 0x1 + +typedef struct _cl_motion_estimation_desc_intel { + cl_uint mb_block_type; + cl_uint subpixel_mode; + cl_uint sad_adjust_mode; + cl_uint search_path_type; +} cl_motion_estimation_desc_intel; + +extern CL_API_ENTRY +cl_accelerator_intel CL_API_CALL clCreateAcceleratorINTEL( + cl_context context, + cl_accelerator_type_intel accelerator_type, + size_t descriptor_size, + const void* descriptor, + cl_int* errcode_ret ); + +extern CL_API_ENTRY +cl_int CL_API_CALL clGetAcceleratorInfoINTEL( + cl_accelerator_intel accelerator, + cl_accelerator_info_intel param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret ); + +extern CL_API_ENTRY +cl_int CL_API_CALL clRetainAcceleratorINTEL( + cl_accelerator_intel accelerator ); + +extern CL_API_ENTRY +cl_int CL_API_CALL clReleaseAcceleratorINTEL( + cl_accelerator_intel accelerator ); + +// cl_intel_egl_image_yuv +#define CL_EGL_YUV_PLANE_INTEL 0x4107 + +// cl_intel_simultaneous_sharing +#define CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL 0x4104 +#define CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL 0x4105 + +// cl_intel_thread_local_exec +#define CL_QUEUE_THREAD_LOCAL_EXEC_ENABLE_INTEL (((cl_bitfield)1) << 31) + +// cl_intel_va_api_media_sharing + +#define CL_VA_API_DISPLAY_INTEL 0x4094 +#define CL_PREFERRED_DEVICES_FOR_VA_API_INTEL 0x4095 +#define CL_ALL_DEVICES_FOR_VA_API_INTEL 0x4096 +#define CL_CONTEXT_VA_API_DISPLAY_INTEL 0x4097 +#define CL_MEM_VA_API_SURFACE_INTEL 0x4098 +#define CL_IMAGE_VA_API_PLANE_INTEL 0x4099 +#define CL_COMMAND_ACQUIRE_VA_API_MEDIA_SURFACES_INTEL 0x409A +#define CL_COMMAND_RELEASE_VA_API_MEDIA_SURFACES_INTEL 0x409B + +#define CL_INVALID_VA_API_MEDIA_ADAPTER_INTEL -1098 +#define CL_INVALID_VA_API_MEDIA_SURFACE_INTEL -1099 +#define CL_VA_API_MEDIA_SURFACE_ALREADY_ACQUIRED_INTEL -1100 +#define CL_VA_API_MEDIA_SURFACE_NOT_ACQUIRED_INTEL -1101 + +// Minimal set of types for cl_intel_va_api_media_sharing. +typedef cl_uint cl_va_api_device_source_intel; +typedef cl_uint cl_va_api_device_set_intel; +struct VASurfaceID; + +extern CL_API_ENTRY +cl_int CL_API_CALL clGetDeviceIDsFromVA_APIMediaAdapterINTEL( + cl_platform_id platform, + cl_va_api_device_source_intel media_adapter_type, + void *media_adapter, + cl_va_api_device_set_intel media_adapter_set, + cl_uint num_entries, + cl_device_id *devices, + cl_uint *num_devices); + +extern CL_API_ENTRY +cl_mem CL_API_CALL clCreateFromVA_APIMediaSurfaceINTEL( + cl_context context, + cl_mem_flags flags, + VASurfaceID *surface, + cl_uint plane, + cl_int *errcode_ret); + +extern CL_API_ENTRY +cl_int CL_API_CALL clEnqueueAcquireVA_APIMediaSurfacesINTEL( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem *mem_objects, + cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event); + +extern CL_API_ENTRY +cl_int CL_API_CALL clEnqueueReleaseVA_APIMediaSurfacesINTEL( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem *mem_objects, + cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event); + +// cl_intel_packed_yuv +#define CL_YUYV_INTEL 0x4076 +#define CL_UYVY_INTEL 0x4077 +#define CL_YVYU_INTEL 0x4078 +#define CL_VYUY_INTEL 0x4079 + +// cl_intel_planar_yuv + +// cl_channel_order +#define CL_NV12_INTEL 0x410E + +// cl_mem_flags +#define CL_MEM_NO_ACCESS_INTEL (1 << 24) +#define CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL (1 << 25) + +// cl_device_info +#define CL_DEVICE_PLANAR_YUV_MAX_WIDTH_INTEL 0x417E +#define CL_DEVICE_PLANAR_YUV_MAX_HEIGHT_INTEL 0x417F + +// cl_intel_required_subgroup_size +#define CL_DEVICE_SUB_GROUP_SIZES_INTEL 0x4108 +#define CL_KERNEL_SPILL_MEM_SIZE_INTEL 0x4109 +#define CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL 0x410A + +// cl_intel_driver_diagnostics +#define CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL 0x4106 + +// cl_intelx_video_enhancement +// This is the base-functionality VEBox extension. +// Note: These are preview enum names and values! + +// cl_device_info +#define CL_DEVICE_VE_VERSION_INTEL 0x4160 +#define CL_DEVICE_VE_ENGINE_COUNT_INTEL 0x4161 + +// cl_queue_properties / cl_command_queue_info +#define CL_QUEUE_VE_ENABLE_INTEL 0x4162 + +// attribute_ids for cl_vebox_attrib_desc_intel +#define CL_VE_ACCELERATOR_ATTRIB_DENOISE_INTEL 0x4163 +#define CL_VE_ACCELERATOR_ATTRIB_DEINTERLACE_INTEL 0x4164 +#define CL_VE_ACCELERATOR_ATTRIB_HOT_PIXEL_CORR_INTEL 0x4165 + +// cl_accelerator_info_intel +#define CL_VE_ACCELERATOR_HISTOGRAMS_INTEL 0x4166 +#define CL_VE_ACCELERATOR_STATISTICS_INTEL 0x4167 +#define CL_VE_ACCELERATOR_STMM_INPUT_INTEL 0x4168 +#define CL_VE_ACCELERATOR_STMM_OUTPUT_INTEL 0x4169 + +// cl_intelx_ve_color_pipeline +// Note: These are preview enum names and values! + +// cl_device_info +#define CL_DEVICE_VE_COLOR_PIPE_VERSION_INTEL 0x416A + +// attribute_ids for cl_vebox_attrib_desc_intel +#define CL_VE_ACCELERATOR_ATTRIB_STD_STE_INTEL 0x416B +#define CL_VE_ACCELERATOR_ATTRIB_GAMUT_COMP_INTEL 0x416C +#define CL_VE_ACCELERATOR_ATTRIB_GECC_INTEL 0x416D +#define CL_VE_ACCELERATOR_ATTRIB_ACE_INTEL 0x416E +#define CL_VE_ACCELERATOR_ATTRIB_ACE_ADV_INTEL 0x416F +#define CL_VE_ACCELERATOR_ATTRIB_TCC_INTEL 0x4170 +#define CL_VE_ACCELERATOR_ATTRIB_PROC_AMP_INTEL 0x4171 +#define CL_VE_ACCELERATOR_ATTRIB_BACK_END_CSC_INTEL 0x4172 +#define CL_VE_ACCELERATOR_ATTRIB_AOI_ALPHA_INTEL 0x4173 +#define CL_VE_ACCELERATOR_ATTRIB_CCM_INTEL 0x4174 +#define CL_VE_ACCELERATOR_ATTRIB_FWD_GAMMA_CORRECT_INTEL 0x4175 +#define CL_VE_ACCELERATOR_ATTRIB_FRONT_END_CSC_INTEL 0x4176 + +// cl_intelx_ve_camera_pipeline +// Note, these are preview enum names and values! + +// cl_device_info +#define CL_DEVICE_VE_CAMERA_PIPE_VERSION_INTEL 0x4177 + +// attribute_ids for cl_vebox_attrib_desc_intel +#define CL_VE_ACCELERATOR_ATTRIB_BLACK_LEVEL_CORR_INTEL 0x4178 +#define CL_VE_ACCELERATOR_ATTRIB_DEMOSAIC_INTEL 0x4179 +#define CL_VE_ACCELERATOR_ATTRIB_WHITE_BALANCE_CORR_INTEL 0x417A +#define CL_VE_ACCELERATOR_ATTRIB_VIGNETTE_INTEL 0x417B + +// HEVC PAK +// Note, this extension is still in development! + +// cl_device_info +#define CL_DEVICE_PAK_VERSION_INTEL 0x4180 +#define CL_DEVICE_PAK_AVAILABLE_CODECS_INTEL 0x4181 + +// cl_queue_properties / cl_command_queue_info +#define CL_QUEUE_PAK_ENABLE_INTEL 0x4189 + +// cl_accelerator_info_intel +#define CL_PAK_CTU_COUNT_INTEL 0x4182 +#define CL_PAK_CTU_WIDTH_INTEL 0x4183 +#define CL_PAK_CTU_HEIGHT_INTEL 0x4184 +#define CL_PAK_MAX_INTRA_DEPTH_INTEL 0x4185 +#define CL_PAK_MAX_INTER_DEPTH_INTEL 0x4186 +#define CL_PAK_NUM_CUS_PER_CTU_INTEL 0x4187 +#define CL_PAK_MV_BUFFER_SIZE_INTEL 0x4188 + +// Error Codes +// These are currently all mapped to CL_INVALID_VALUE. +// Need official error code assignment. +#define CL_INVALID_PAK_CTU_SIZE_INTEL CL_INVALID_VALUE +#define CL_INVALID_PAK_TU_SIZE_INTEL CL_INVALID_VALUE +#define CL_INVALID_PAK_TU_INTRA_DEPTH_INTEL CL_INVALID_VALUE +#define CL_INVALID_PAK_TU_INTER_DEPTH_INTEL CL_INVALID_VALUE +#define CL_INVALID_PAK_BITRATE_RANGE_INTEL CL_INVALID_VALUE +#define CL_INVALID_PAK_INSERTION_INTEL CL_INVALID_VALUE +#define CL_INVALID_PAK_CTU_POSITION_INTEL CL_INVALID_VALUE +#define CL_INVALID_PAK_REFERENCE_IMAGE_INDEX_INTEL CL_INVALID_VALUE + +// Altera Extensions: + +// cl_altera_device_temperature +#define CL_DEVICE_CORE_TEMPERATURE_ALTERA 0x40F3 + +// cl_altera_compiler_mode +#define CL_CONTEXT_COMPILER_MODE_ALTERA 0x40F0 +#define CL_CONTEXT_PROGRAM_EXE_LIBRARY_ROOT_ALTERA 0x40F1 +#define CL_CONTEXT_OFFLINE_DEVICE_ALTERA 0x40F2 + +// These are from the Khronos cl_ext.h: + +// cl_khr_icd +#define CL_PLATFORM_ICD_SUFFIX_KHR 0x0920 +#define CL_PLATFORM_NOT_FOUND_KHR -1001 + +// cl_khr_initalize_memory +#define CL_CONTEXT_MEMORY_INITIALIZE_KHR 0x2030 + +// cl_khr_terminate_context +#define CL_DEVICE_TERMINATE_CAPABILITY_KHR 0x2031 +#define CL_CONTEXT_TERMINATE_KHR 0x2032 + +// cl_khr_spir +#define CL_DEVICE_SPIR_VERSIONS 0x40E0 +#define CL_PROGRAM_BINARY_TYPE_INTERMEDIATE 0x40E1 + +// cl_khr_subgroups +#define CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR 0x2033 +#define CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR 0x2034 + +// cl_nv_device_attribute_query +#define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000 +#define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV 0x4001 +#define CL_DEVICE_REGISTERS_PER_BLOCK_NV 0x4002 +#define CL_DEVICE_WARP_SIZE_NV 0x4003 +#define CL_DEVICE_GPU_OVERLAP_NV 0x4004 +#define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV 0x4005 +#define CL_DEVICE_INTEGRATED_MEMORY_NV 0x4006 + +// cl_ext_atomic_counters +#define CL_DEVICE_MAX_ATOMIC_COUNTERS_EXT 0x4032 + +// cl_amd_device_attribute_query +#define CL_DEVICE_PROFILING_TIMER_OFFSET_AMD 0x4036 +#define CL_DEVICE_TOPOLOGY_AMD 0x4037 +#define CL_DEVICE_BOARD_NAME_AMD 0x4038 +#define CL_DEVICE_GLOBAL_FREE_MEMORY_AMD 0x4039 +#define CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD 0x4040 +#define CL_DEVICE_SIMD_WIDTH_AMD 0x4041 +#define CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD 0x4042 +#define CL_DEVICE_WAVEFRONT_WIDTH_AMD 0x4043 +#define CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD 0x4044 +#define CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD 0x4045 +#define CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD 0x4046 +#define CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD 0x4047 +#define CL_DEVICE_LOCAL_MEM_BANKS_AMD 0x4048 +#define CL_DEVICE_THREAD_TRACE_SUPPORTED_AMD 0x4049 +#define CL_DEVICE_GFXIP_MAJOR_AMD 0x404A +#define CL_DEVICE_GFXIP_MINOR_AMD 0x404B +#define CL_DEVICE_AVAILABLE_ASYNC_QUEUES_AMD 0x404C + +// cl_amd_offline_devices +#define CL_CONTEXT_OFFLINE_DEVICES_AMD 0x403F + +// cl_ext_device_fission +#define CL_DEVICE_PARTITION_EQUALLY_EXT 0x4050 +#define CL_DEVICE_PARTITION_BY_COUNTS_EXT 0x4051 +#define CL_DEVICE_PARTITION_BY_NAMES_EXT 0x4052 +#define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT 0x4053 +#define CL_DEVICE_PARENT_DEVICE_EXT 0x4054 +#define CL_DEVICE_PARTITION_TYPES_EXT 0x4055 +#define CL_DEVICE_AFFINITY_DOMAINS_EXT 0x4056 +#define CL_DEVICE_REFERENCE_COUNT_EXT 0x4057 +#define CL_DEVICE_PARTITION_STYLE_EXT 0x4058 + +#define CL_DEVICE_PARTITION_FAILED_EXT -1057 +#define CL_INVALID_PARTITION_COUNT_EXT -1058 +#define CL_INVALID_PARTITION_NAME_EXT -1059 + +// cl_qcom_ext_host_ptr +#define CL_MEM_EXT_HOST_PTR_QCOM (1 << 29) + +#define CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM 0x40A0 +#define CL_DEVICE_PAGE_SIZE_QCOM 0x40A1 +#define CL_IMAGE_ROW_ALIGNMENT_QCOM 0x40A2 +#define CL_IMAGE_SLICE_ALIGNMENT_QCOM 0x40A3 +#define CL_MEM_HOST_UNCACHED_QCOM 0x40A4 +#define CL_MEM_HOST_WRITEBACK_QCOM 0x40A5 +#define CL_MEM_HOST_WRITETHROUGH_QCOM 0x40A6 +#define CL_MEM_HOST_WRITE_COMBINING_QCOM 0x40A7 + +// cl_qcom_ion_host_ptr +#define CL_MEM_ION_HOST_PTR_QCOM 0x40A8 + +// cl_arm_printf extension +#define CL_PRINTF_CALLBACK_ARM 0x40B0 +#define CL_PRINTF_BUFFERSIZE_ARM 0x40B1 diff --git a/Src/common.h b/Src/common.h new file mode 100644 index 00000000..86e42c6d --- /dev/null +++ b/Src/common.h @@ -0,0 +1,110 @@ +/* +// Copyright (c) 2018 Intel Corporation +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +*/ + +#pragma once + +#define CL_USE_DEPRECATED_OPENCL_1_0_APIS +#define CL_USE_DEPRECATED_OPENCL_1_1_APIS +#define CL_USE_DEPRECATED_OPENCL_2_0_APIS + +#if defined(__ANDROID__) +#include +#elif defined(_WIN32) || defined(__linux__) +#include "GL/glcorearb.h" +#elif defined(__APPLE__) +#include +#else +#error Unknown OS! +#endif + +// Note: This is purposefully including the CLIntercept version of cl.h +// and cl_gl.h, not the system header files. +#include "CL/cl.h" +#include "CL/cl_gl.h" + +#if defined(_WIN32) + #define CLI_DEBUG_BREAK() __debugbreak(); +#elif defined(__linux__) || defined(__APPLE__) + #include + #include + #define CLI_DEBUG_BREAK() raise(SIGTRAP); +#else + #error Unknown OS! +#endif + +#ifdef _DEBUG + #define CLI_ASSERT(x) \ + { \ + if (!(x)) \ + { \ + CLI_DEBUG_BREAK(); \ + } \ + } +#else + #define CLI_ASSERT(x) +#endif + +#if defined(_WIN32) || defined(__linux__) + #define CLIRN( _funcname ) _funcname +#elif defined(__APPLE__) + #define CLIRN( _funcname ) i ## _funcname +#else + #error Unknown OS! +#endif + +#if defined(_WIN32) + #define CLI_SPRINTF(_s, _sz, _f, ...) sprintf_s(_s, _TRUNCATE, _f, ##__VA_ARGS__) + #define CLI_VSPRINTF(_s, _sz, _f, _a) vsnprintf_s(_s, _TRUNCATE, _f, _a) + #define CLI_MEMCPY(_d, _dsz, _s, _sz) memcpy_s(_d, _dsz, _s, _sz) + #define CLI_STRCAT(_d, _dsz, _s) strcat_s(_d, _dsz, _s) + #define CLI_STRTOK(_s, _d, _c) strtok_s(_s, _d, _c) + #define CLI_C_ASSERT(e) typedef char __C_ASSERT__[(e)?1:-1] +#else + #if !defined(MAX_PATH) + #define MAX_PATH 256 + #endif + #define CLI_SPRINTF(_s, _sz, _f, ...) snprintf(_s, _sz, _f, ##__VA_ARGS__) + #define CLI_VSPRINTF(_s, _sz, _f, _a) vsnprintf(_s, _sz, _f, _a) + // TODO: Investigate how to reliably use memcpy_s on Linux: + #define CLI_MEMCPY(_d, _dsz, _s, _sz) memcpy(_d, _s, _sz) + #define CLI_STRCAT(_d, _dsz, _s) strcat(_d, _s) + #define CLI_STRTOK(_s, _d, _c) strtok_r(_s, _d, _c) + #define CLI_C_ASSERT(e) typedef char __attribute__((unused)) __C_ASSERT__[(e)?1:-1] +#endif + +#define CLI_MAX_STRING_SIZE 1024 + +/*****************************************************************************\ + +MACRO: + DISALLOW_COPY_AND_ASSIGN + +Description: + A macro to disallow the copy constructor and operator= functions + This should be used in the private: declarations for a class + +\*****************************************************************************/ +#if !defined(DISALLOW_COPY_AND_ASSIGN) +#define DISALLOW_COPY_AND_ASSIGN( TypeName ) \ + TypeName(const TypeName&); \ + void operator=(const TypeName&) +#endif diff --git a/Src/controls.h b/Src/controls.h new file mode 100644 index 00000000..6dba71de --- /dev/null +++ b/Src/controls.h @@ -0,0 +1,177 @@ +/* +// Copyright (c) 2018 Intel Corporation +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +*/ + +#ifndef CLI_CONTROL +#error Must define CLI_CONTROL before including this file! +#endif + +#ifndef CLI_CONTROL_SEPARATOR +#define CLI_CONTROL_SEPARATOR( _name ) +#endif + +CLI_CONTROL_SEPARATOR( Logging Controls: ) +CLI_CONTROL( bool, AppendFiles, false, "By default, the Intercept Layer for OpenCL Applications log files will be created from scratch when the intercept DLL is loaded, and any Intercept Layer for OpenCL Applications report files will be created from scratch when the intercept DLL is unloaded. If AppendFiles is set to a nonzero value, the Intercept Layer for OpenCL Applications will append to an existing file instead of recreating it. This can be useful if an application loads and unloads the intercept DLL multiple times, or to simply preserve log or report data from run-to-run." ) +CLI_CONTROL( bool, LogToFile, false, "If set to a nonzero value, sends log information to the file \"clintercept_log.txt\" instead of to stderr. The log file will be placed in the directory \"%SYSTEMDRIVE%\\Intel\\CLIntercept_Dump\\\"." ) +CLI_CONTROL( bool, LogToDebugger, false, "If set to a nonzero value, sends log information to the debugger instead of to stderr. If both LogToFile and LogToDebugger are nonzero then log information will be sent both to a file and to the debugger." ) +CLI_CONTROL( int, LogIndent, 0, "Indents each log entry by this many spaces." ) +CLI_CONTROL( bool, BuildLogging, false, "If set to a nonzero value, logs the program build log after each call to clBuildProgram(). This will likely only function correctly for synchronous builds. Note that the build log is logged regardless of whether the program built successfully, which allows compiler warnings to be logged for successful compiles." ) +CLI_CONTROL( bool, PreferredWorkGroupSizeMultipleLogging, false, "If set to a nonzero value, logs the preferred work group size multiple for each kernel after each call to clCreateKernel(). On some devices this is the equivalent of the SIMD size for this kernel." ) +CLI_CONTROL( bool, CallLogging, false, "If set to a nonzero value, logs function entry and exit information for every OpenCL call. This can be used to easily determine which OpenCL call is causing an application to crash or fail or if a crash occurs outside of an OpenCL call. This setting is best used with LogToFile or LogToDebugger as it can generate a lot of log data." ) +CLI_CONTROL( bool, CallLoggingEnqueueCounter, false, "If set to a nonzero value, logs the enqueue counter in addition to function entry and exit information for every OpenCL call. This can be used to determine appropriate limits for DumpBuffersMinEnqueue, DumpBuffersMaxEnqueue, DumpImagesMinEnqueue, or DumpBuffersMaxEnqueue. If CallLogging is disabled then this control will have no effect." ) +CLI_CONTROL( bool, CallLoggingThreadId, false, "If set to a nonzero value, logs the ID of the calling thread in addition to function entry and exit information for every OpenCL call. This can be helpful when debugging multi-threading issues." ) +CLI_CONTROL( bool, CallLoggingThreadNumber, false, "If set to a nonzero value, logs the symbolic number of the calling thread in addition to function entry and exit information for every OpenCL call. This can be helpful when debugging multi-threading issues." ) +CLI_CONTROL( bool, CallLoggingElapsedTime, false, "If set to a nonzero value, logs the elapsed time in microseconds in addition to function entry and exit information for every OpenCL call, starting from the time the intercept DLL is loaded." ) +CLI_CONTROL( bool, ITTCallLogging, false, "If set to a nonzero value, logs function entry and exit information for every OpenCL call using the ITT APIs. This feature will only function if the Intercept Layer for OpenCL Applications is built with ITT support." ) +CLI_CONTROL( bool, ChromeCallLogging, false, "If set to a nonzero value, logs function entry and exit information for every OpenCL call to a JSON file that may be used for Chrome Tracing." ) +CLI_CONTROL( bool, ErrorLogging, false, "If set to a nonzero value, logs all OpenCL errors and the function name that caused the error." ) +CLI_CONTROL( bool, ErrorAssert, false, "If set to a nonzero value, breaks into the debugger when an OpenCL error occurs." ) +CLI_CONTROL( bool, ContextCallbackLogging, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will install a callback for every context and log any calls to the context callback. The application's context callback, if any, will be invoked after the Intercept Layer for OpenCL Applications' context callback." ) +CLI_CONTROL( cl_uint, ContextHintLevel, 0, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will attempt to create contexts with the CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL property set to the specified value. If this property is specified by the application, the Intercept Layer for OpenCL Applications will overwrite it with the specified value, otherwise the property and the specified value will be added to the list of context creation properties. This functionality is only available for OpenCL implementations that support the cl_intel_driver_diagnostics extension. If this functionality is not available in the underlying OpenCL implementation, the unmodified list of context properties will be used to create the context instead. More information about this feature, including valid values and their meaning, can be found in the cl_intel_driver_diagnostics extension specification." ) +CLI_CONTROL( bool, EventCallbackLogging, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will install its own callback for every event callback and log the call to the event callback. The application's event callback will be invoked after the Intercept Layer for OpenCL Applications' event callback." ) +CLI_CONTROL( bool, CLInfoLogging, false, "If set to a nonzero value, logs information about the platforms and devices in the system on the first call to clGetPlatformIDs()." ) +CLI_CONTROL( std::string, LogDir, "", "If set, the Intercept Layer for OpenCL Applications will emit logs to this directory instead of the default log directory." ) + +CLI_CONTROL_SEPARATOR( Performance Timing Controls: ) +CLI_CONTROL( bool, HostPerformanceTiming, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will track the minimum, maximum, and average host CPU time for each OpenCL entry point. When the process exits, this information will be printed to the file \"clIntercept_report.txt\" in the directory \"%SYSTEMDRIVE%\\Intel\\CLIntercept_Dump\\\"." ) +CLI_CONTROL( bool, DevicePerformanceTiming, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will add event profiling to track the minimum, maximum, and average device time for each OpenCL command. This operation may be fairly intrusive and may have side effects; in particular it forces all command queues to be created with PROFILING_ENABLED and may increment the reference count for application events. When the process exits, this information will be printed to the file \"clIntercept_report.txt\" in the directory \"%SYSTEMDRIVE%\\Intel\\CLIntercept_Dump\\\"." ) +CLI_CONTROL( bool, DevicePerformanceTimeHashTracking, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will distinguish between OpenCL NDRange kernels from programs with different hashes for the purpose of device performance timing." ) +CLI_CONTROL( bool, DevicePerformanceTimeKernelInfoTracking,false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will distinguish between OpenCL NDRange kernels using information such as the kernel's Preferred Work Group Size Multiple (AKA SIMD size)." ) +CLI_CONTROL( bool, DevicePerformanceTimeGWSTracking, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will distinguish between OpenCL NDRange kernels with different global work sizes for the purpose of device performance timing." ) +CLI_CONTROL( bool, DevicePerformanceTimeLWSTracking, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will distinguish between OpenCL NDRange kernels with different local work sizes for the purpose of device performance timing." ) +CLI_CONTROL( bool, DevicePerformanceTimingSkipUnmap, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will skip device performance timing for unmap operations. This is a workaround for a bug in some OpenCL implementations, where querying events created from unmap operations results in driver crashes." ) +CLI_CONTROL( bool, HostPerformanceTimeLogging, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will log the host elapsed time for each OpenCL entry point. This can be useful to identify OpenCL entry points that execute significantly slower or faster than average on the host." ) +CLI_CONTROL( bool, DevicePerformanceTimeLogging, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will log the device execution time deltas for each OpenCL command. This can be useful to identify specific OpenCL commands that execute significantly slower or faster than average on the device. If DevicePerformanceTiming is disabled then this control will have no effect." ) +CLI_CONTROL( bool, DevicePerformanceTimelineLogging, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will log the device execution times for each OpenCL command. This can be useful to visualize the execution timeline of OpenCL commands that execute on the device. If DevicePerformanceTiming is disabled then this control will have no effect." ) +CLI_CONTROL( std::string, DevicePerfCounterCustom, "", "If set, the Intercept Layer for OpenCL Applications will collect MDAPI metrics for the Metric Set corresponding to this value for each OpenCL command. Frequently used Metric Sets include: ComputeBasic, ComputeExtended, L3_1, Sampler. The output file has the potential to be very big depending on the work load. This operation may be fairly intrusive and may have side effects; in particular it forces all command queues to be created with PROFILING_ENABLED and may increment the reference count for application events. When the process exits, this information will be printed to the file \"clintercept_perfcounter_dump_.txt\" in the directory \"%SYSTEMDRIVE%\\Intel\\CLIntercept_Dump\\\". This feature will only function if the Intercept Layer for OpenCL Applications is built with MDAPI support." ) +CLI_CONTROL( std::string, DevicePerfCounterFile, "", "Full path to a custom MDAPI file. This can be used to add custom Metric Sets." ) +CLI_CONTROL( bool, DevicePerfCounterTiming, false, "If set to a nonzero value and DevicePerfCounterCustom is set, the Intercept Layer for OpenCL Applications will enable Intel GPU Performance Counters to track the minimum, maximum, and average performance counter deltas for each OpenCL command. This operation may be fairly intrusive and may have side effects; in particular it forces all command queues to be created with PROFILING_ENABLED and may increment the reference count for application events. When the process exits, this information will be printed to the file \"clIntercept_report.txt\" in the directory \"%SYSTEMDRIVE%\\Intel\\CLIntercept_Dump\\\". This feature will only function if the Intercept Layer for OpenCL Applications is built with MDAPI support." ) +CLI_CONTROL( bool, ITTPerformanceTiming, false, "[Note: This control makes ITT calls, but they appear to do nothing!] If set to a nonzero value, the Intercept Layer for OpenCL Applications will generate ITT-compatible performance timing data. Similar to DevicePerformanceTiming, this operation may be fairly intrusive and may have side effects; in particular it forces all command queues to be created with PROFILING_ENABLED and may increment the reference count for application events. ITTPerformanceTiming will also silently create OpenCL command queues that support advanced performance counters if this functionality is available. This feature will only function if the Intercept Layer for OpenCL Applications is built with ITT support." ) +CLI_CONTROL( bool, ITTShowOnlyExecutingEvents, false, "[Note: This control makes ITT calls, but they appear to do nothing!] By default, when ITTPerformanceTiming is enabled, the Intercept Layer for OpenCL Applications will generate ITT-compatible information for all states of an OpenCL event: when the command was queued, when it was submitted, when it started executing, and when it finished executing. If ITTShowOnlyExecutingEvents is set to a nonzero value, the Intercept Layer for OpenCL Applications will only generate ITT-compatible instrumentation when an event begins executing and when an event ends executing. Since no information will be displayed about when a command is queued or submitted, this can sometimes make it easier to identify times when the device is idle. This feature will only function if the Intercept Layer for OpenCL Applications is built with ITT support." ) +CLI_CONTROL( bool, ChromePerformanceTiming, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will generate device performance timing information in a JSON file that may be used for Chrome Tracing." ) + +CLI_CONTROL_SEPARATOR( Controls for Dumping and Injecting Programs and Build Options: ) +CLI_CONTROL( bool, OmitProgramNumber, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will omit the program number from dumped file names and hash tracking. This can produce deterministic results even if programs are built in a non-deterministic order (say, by multiple threads)." ) +CLI_CONTROL( bool, SimpleDumpProgramSource, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will dump the last string(s) passed to clCreateProgramWithSource() to the file kernel.cl, and the last program options passed to clBuildProgram() to the file kernel.txt. These files will be dumped to the application's working directory. If an application fails to compile a program and exits the program immediately after detecting a compile failure SimpleDumpProgram may be all that is needed to identify the program and program options that are failing to compile." ) +CLI_CONTROL( bool, DumpProgramSourceScript, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will dump every string passed to clCreateProgramWithSource() to its own file. The directory names and file names for the dumped files match the directory names and file names expected by a modified OpenCL conformance test script to capture kernels. This setting overrides SimpleDumpProgramSource, and if it is set to a nonzero value then the value of SimpleDumpProgramSource is ignored." ) +CLI_CONTROL( bool, DumpProgramSource, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will dump every string passed to clCreateProgramWithSource() to its own file. The files will be dumped to \"%SYSTEMDRIVE%\\Intel\\CLIntercept_Dump\\\". The filename will have the form \"CLI___source.cl\". Program options that are passed to clBuildProgram() or clCompileProgram() will be dumped to the same directory with the filename \"CLI____options.txt\". This setting can be used for information purposes to see all kernels that are used by an application or to dump programs for program injection. This setting overrides DumpProgramSourceScript and SimpleDumpProgramSource, and if it is set to a nozero value then the values of DumpProgramSourceScript and SimpleDumpProgramSource will be ignored." ) +CLI_CONTROL( bool, DumpInputProgramBinaries, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will dump every program binary that is passed to clCreateProgramWithBinary() to its own file. The files will be dumped to \"%SYSTEMDRIVE%\\Intel\\CLIntercept_Dump\\\". The filename will have the form \"CLI___.bin\". This is the input program binary provided by the application, and not a device binary queried from the OpenCL implementation. In particular, note that it may be a SPIR 1.2 binary." ) +CLI_CONTROL( bool, DumpProgramBinaries, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will dump every program binary that was successfully built with clBuildProgram() to its own file. The files will be dumped to \"%SYSTEMDRIVE%\\Intel\\CLIntercept_Dump\\\". The filename will have the form \"CLI____.bin\". Program options that are passed to clBuildProgram() or clCompileProgram() will be dumped to the same directory with the filename \"CLI____options.txt\". This setting can be used to examine compiled program binaries or to dump program binaries for program binary injection. Note that this option dumps the output binary, which is a device binary, after calling clBuildProgram()." ) +CLI_CONTROL( bool, DumpProgramSPIRV, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will dump every program IL binary passed to clCreateProgramWithIL() to its own file. The files will be dumped to \"%SYSTEMDRIVE%\\Intel\\CLIntercept_Dump\\\". The filename will have the form \"CLI___0000.spv\" - for now at least!. Program options that are passed to clBuildProgram() or clCompileProgram() will be dumped to the same directory with the filename \"CLI____options.txt\". This setting can be used for information purposes to see all kernels that are used by an application or to dump SPIRV programs for SPIRV injection." ) +CLI_CONTROL( bool, InjectProgramSource, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will look to inject potentially modified kernel source to clCreateProgramWithSource() and/or potentially modified options to clBuildProgram()." ) +CLI_CONTROL( bool, InjectProgramBinaries, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will look to inject potentially modified kernel binaries via clCreateProgramWithBinary() in place of program text for each call to clCreateProgramWithSource(). This is typically done to reduce program compilation time or to use known good program binaries." ) +CLI_CONTROL( bool, RejectProgramBinaries, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will reject kernel binaries passed via clCreateProgramWithBinary() and return CL_INVALID_BINARY. This can be used to force an application to re-compile program binaries from source." ) +CLI_CONTROL( bool, InjectProgramSPIRV, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will look to inject potentially modified kernel SPIR-V binaries via clCreateProgramWithIL() in place of program text for each call to clCreateProgramWithSource()." ) +CLI_CONTROL( bool, PrependProgramSource, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will look to prepend kernel code from a file to the application provided kernel source passed to clCreateProgramWithSource(). The Intercept Layer for OpenCL Applications will look for kernel source to prepend in the directory \"%SYSTEMDRIVE%\\Intel\\CLIntercept_Dump\\\". The files that are searched for are (in order) \"CLI___prepend.cl\", \"CLI__prepend.cl\", and \"CLI_prepend.cl\"." ) +CLI_CONTROL( std::string, AppendBuildOptions, "", "If set, the Intercept Layer for OpenCL Applications will add these build options to the end of any application provided or injected build options for each call to clBuildProgram()." ) +CLI_CONTROL( bool, DumpProgramBuildLogs, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will dump build logs for every device a program is built for to a separate file. The files will be dumped to \"%SYSTEMDRIVE%\\Intel\\CLIntercept_Dump\\\". The filename will have the form \"CLI_____build_log.txt\"." ) + +CLI_CONTROL_SEPARATOR( Controls for Automatically Creating SPIR-V Modules: ) +CLI_CONTROL( bool, AutoCreateSPIRV, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will automatically create SPIR-V modules by invoking CLANG each time a program is built. The files will be dumped to \"%SYSTEMDRIVE%\\Intel\\CLIntercept_Dump\\\". The filename will have the form \"CLI___.spv\". Because invoking CLANG requires a file containing the OpenCL C source, setting this option implicitly sets DumpProgramSource as well. Additionally, this feature is not available for injected program source." ) +CLI_CONTROL( std::string, SPIRVClang, "clang", "The clang executable used to compile an OpenCL C program to a SPIR-V module. This can be an executable in the system path, a relative path, or a full absolute path." ) +CLI_CONTROL( std::string, SPIRVCLHeader, "opencl.h", "The OpenCL header file used to compile an OpenCL C program to a SPIR-V module. This must be a relative path or a full absolute path." ) +CLI_CONTROL( std::string, SPIRVDis, "spirv-dis", "The spirv-dis executable used to optionally disassemble the compiled SPIR-V module to a SPIR-V text representation. This can be an executable in the system path, a relative path, or a full absolute path." ) +CLI_CONTROL( std::string, DefaultOptions, "-cc1 -x cl -cl-std=CL1.2 -D__OPENCL_C_VERSION__=120 -D__OPENCL_VERSION__=120 -emit-spirv -triple=spir", "This is the list of options that is implicitly passed to CLANG to build a non-OpenCL 2.0 SPIR-V module. Any application-provided build options will be appended to these build options." ) +CLI_CONTROL( std::string, OpenCL2Options, "-cc1 -x cl -cl-std=CL2.0 -D__OPENCL_C_VERSION__=200 -D__OPENCL_VERSION__=200 -emit-spirv -triple=spir", "This is the list of options that is implicitly passed to CLANG to build an OpenCL 2.0 SPIR-V module. Any application-provided build options will be appended to these build options." ) + +CLI_CONTROL_SEPARATOR( Controls for Dumping Buffers and Images: ) +CLI_CONTROL( bool, DumpArgumentsOnSet, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will dump the argument value on calls to clSetKernelArg(). Arguments are dumped as raw binary data to \"%SYSTEMDRIVE%\\Intel\\CLIntercept_Dump\\\\SetKernelArg\". The filenames will have the form \"SetKernelArg__Kernel__Arg_.bin\"." ) +CLI_CONTROL( bool, DumpBuffersAfterCreate, false, "If set, the Intercept Layer for OpenCL Applications will dump buffers to a file after creation. This control still honors the enqueue counter limits, even though no enqueues are involved during buffer creation. Currently only works for cl_mem buffers created from host pointers." ) +CLI_CONTROL( bool, DumpBuffersAfterMap, false, "If set, the Intercept Layer for OpenCL Applications will dump the contents of a buffer to a file after the buffer is mapped. Only valid if the buffer is NOT mapped with CL_MAP_WRITE_INVALIDATE_REGION. If the buffer was mapped non-blocking, this may insert a clFinish() into the command queue, which may have functional or performance implications." ) +CLI_CONTROL( bool, DumpBuffersBeforeUnmap, false, "If set, the Intercept Layer for OpenCL Applications will dump the contents of a buffer to a file immediately before the buffer is unmapped. This is done by inserting a blocking clEnqueueMapBuffer() (and matching clEnqueueUnmapMemObject()) into the command queue, which may have functional or performance implications." ) +CLI_CONTROL( bool, DumpBuffersBeforeEnqueue, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will dump buffers before calls to clEnqueueNDRangeKernel(). Only buffers that are kernel arguments for the kernel being enqueued are dumped. Buffers are dumped as raw binary data to \"%SYSTEMDRIVE%\\Intel\\CLIntercept_Dump\\\\memDumpPreEnqueue\". The filenames will have the form \"Enqueue__Kernel__Arg__Buffer_.bin\"." ) +CLI_CONTROL( bool, DumpBuffersAfterEnqueue, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will dump buffers after calls to clEnqueueNDRangeKernel(). Only buffers that are kernel arguments for the kernel being enqueued are dumped. Buffers are dumped as raw binary data to \"%SYSTEMDRIVE%\\Intel\\CLIntercept_Dump\\\\memDumpPostEnqueue\". The filenames will have the form \"Enqueue__Kernel__Arg__Buffer_.bin\". Note that this is the same naming convention as with DumpBuffersBeforeEnqueue, so the changes resulting from an enqueue can be determined by diff'ing the preEnqueue folder with the postEnqueue folder." ) +CLI_CONTROL( std::string, DumpBuffersForKernel, "", "If set, the Intercept Layer for OpenCL Applications will only dump buffers when the specified kernel is enqueued. This control is ignored unless DumpBuffersBeforeEnqueue or DumpBuffersAfterEnqueue are enabled." ) +CLI_CONTROL( bool, DumpImagesBeforeEnqueue, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will dump images before calls to clEnqueueNDRangeKernel(). Only images that are kernel arguments for the kernel being enqueued are dumped. Images are dumped as raw binary data to \"%SYSTEMDRIVE%\\Intel\\CLIntercept_Dump\\\\memDumpPreEnqueue\". The filenames will have the form \"Enqueue__Kernel__Arg__Image__xx_bpp.raw\"." ) +CLI_CONTROL( bool, DumpImagesAfterEnqueue, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will dump images after calls to clEnqueueNDRangeKernel(). Only images that are kernel arguments for the kernel being enqueued are dumped. Images are dumped as raw binary data to \"%SYSTEMDRIVE%\\Intel\\CLIntercept_Dump\\\\memDumpPostEnqueue\". The filenames will have the form \"Enqueue__Kernel__Arg__Image__xx_bpp.raw\". Note that this is the same naming convention as with DumpImagesBeforeEnqueue, so the changes resulting from an enqueue can be determined by diff'ing the preEnqueue folder with the postEnqueue folder." ) +CLI_CONTROL( std::string, DumpImagesForKernel, "", "If set, the Intercept Layer for OpenCL Applications will only dump image when the specified kernel is enqueued. This control is ignored unless DumpImagesBeforeEnqueue or DumpImagesAfterEnqueue are enabled." ) +CLI_CONTROL( cl_uint, DumpBuffersMinEnqueue, 0, "The Intercept Layer for OpenCL Applications will only dump buffers when the enqueue counter is greater than this value, inclusive." ) +CLI_CONTROL( cl_uint, DumpBuffersMaxEnqueue, UINT_MAX, "The Intercept Layer for OpenCL Applications will only dump buffers when the enqueue counter is less than this value, inclusive." ) +CLI_CONTROL( cl_uint, DumpImagesMinEnqueue, 0, "The Intercept Layer for OpenCL Applications will only dump images when the enqueue counter is greater than this value, inclusive." ) +CLI_CONTROL( cl_uint, DumpImagesMaxEnqueue, UINT_MAX, "The Intercept Layer for OpenCL Applications will only dump images when the enqueue counter is less than this value, inclusive." ) + +CLI_CONTROL_SEPARATOR( AubCapture Controls: ) +CLI_CONTROL( bool, AubCapture, false, "This is the master control for aub capture. The Intercept Layer for OpenCL Applications doesn't implement aub capture itself, but can be used to selectively enable and disable aub capture via kdc.exe." ) +CLI_CONTROL( bool, AubCaptureIndividualEnqueues, false, "If set, the Intercept Layer for OpenCL Applications will invoke kdc.exe to start aub capture before a kernel enqueue, and will also invoke kdc.exe to stop aub capture immediately after the kernel enqueue. Each .daf file will have the form \"AubCapture_Enqueue__kernel_.daf\". Note that non-kernel enqueues such as calls to clEnqueueReadBuffer() and clEnqueueWriteBuffer() will NOT be aub captured when this control is set. The AubCaptureMinEnqueue and AubCaptureMaxEnqueue controls are still honored when AubCaptureIndividualEnqueues is set." ) +CLI_CONTROL( cl_uint, AubCaptureMinEnqueue, 0, "The Intercept Layer for OpenCL Applications will only invoke kdc.exe to enable aub capture when the enqueue counter is greater than this value, inclusive." ) +CLI_CONTROL( cl_uint, AubCaptureMaxEnqueue, UINT_MAX, "The Intercept Layer for OpenCL Applications will invoke kdc.exe to stop aub capture when the encounter is greater than this value, meaning that only enqueues less than this value, inclusive, will be captured. If the enqueue counter never reaches this value, the Intercept Layer for OpenCL Applications will stop aub capture when the DLL is unloaded." ) +CLI_CONTROL( std::string, AubCaptureKernelName, "", "If set, the Intercept Layer for OpenCL Applications will only invoke kdc.exe to enable aub capture when the kernel name equals this name.") +CLI_CONTROL( std::string, AubCaptureKernelGWS, "", "If set, the Intercept Layer for OpenCL Applications will only invoke kdc.exe to enable aub capture when the NDRange global work size matches this string. The string should have the form \"XxYxZ\". The wildcard \"*\" matches all global work sizes.") +CLI_CONTROL( std::string, AubCaptureKernelLWS, "", "If set, the Intercept Layer for OpenCL Applications will only invoke kdc.exe to enable aub capture when the NDRange local work size matches this string. The string should have the form \"XxYxZ\". The wildcard \"*\" matches all local work sizes, and the string \"NULL\" matches a NULL local work size.") +CLI_CONTROL( bool, AubCaptureUniqueKernels, false, "If set, the Intercept Layer for OpenCL Applications will only invoke kdc.exe to enable aub capture if the kernel signature (i.e. hash + kernelname + gws + lws) has not been seen already. The behavior of this control is well-defined when AubCaptureIndividualEnqueues is not set, but it doesn't make much sense without AubCaptureIndividualEnqueues." ) +CLI_CONTROL( cl_uint, AubCaptureNumKernelEnqueuesSkip, 0, "The Intercept Layer for OpenCL Applications will skip this many kernel enqueues before invoking kdc.exe to enable aub capture. The behavior of this control is well-defined when AubCaptureIndividualEnqueues is not set, but it doesn't make much sense without AubCaptureIndividualEnqueues.") +CLI_CONTROL( cl_uint, AubCaptureNumKernelEnqueuesCapture, UINT_MAX, "The Intercept Layer for OpenCL Applications will only capture this many kernel enqueues. The behavior of this control is well-defined when AubCaptureIndividualEnqueues is not set, but it doesn't make much sense without AubCaptureIndividualEnqueues.") +CLI_CONTROL( cl_uint, AubCaptureStartWait, 0, "The Intercept Layer for OpenCL Applications will wait for this many milliseconds before invoking kdc.exe to begin aub capture.") +CLI_CONTROL( cl_uint, AubCaptureEndWait, 0, "The Intercept Layer for OpenCL Applications will wait for this many milliseconds before invoking kdc.exe to end aub capture.") + +CLI_CONTROL_SEPARATOR( Execution Controls: ) +CLI_CONTROL( bool, NoErrors, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will cause all OpenCL APIs to return a successful error status." ) +CLI_CONTROL( bool, FinishAfterEnqueue, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications inserts a call to clFinish() after every enqueue. The command queue that the command was just enqueued to is passed to clFinish(). This can be used to debug possible timing or resource management issues and will likely impact performance." ) +CLI_CONTROL( bool, FlushAfterEnqueue, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications inserts a call to clFlush() after every enqueue. The command queue that the command was just enqueued to is passed to clFlush(). This can also be used to debug possible timing or resource management issues and is slightly less obtrusive than FinishAfterEnqueue but still will likely impact performance. If both FinishAfterEnqueue and FlushAfterEnqueue are nonzero then the Intercept Layer for OpenCL Applications will only insert a call to clFinish() after every enqueue, because clFinish() implies clFlush()." ) +CLI_CONTROL( bool, FlushAfterEnqueueBarrier, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications inserts a call to clFlush() after every barrier enqueue. The command queue that the command was just enqueued to is passed to clFlush(). This has been useful to debug out-of-order queue issues." ) +CLI_CONTROL( bool, InOrderQueue, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will force all queues to be created in-order. This can be used for performance analysis, but may lead to deadlocks in some cases." ) +CLI_CONTROL( bool, NullEnqueue, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will silently ignore any enqueue. This can be used for performance analysis, but will likely cause errors if the application relies on any sort of information from OpenCL events and should be used carefully." ) +CLI_CONTROL( bool, NullLocalWorkSize, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will force the local work size argument to clEnqueueNDRangeKernel() to be NULL, which causes the OpenCL implementation to pick the local work size. Note that this control takes effect before NullLocalWorkSizeX / NullLocalWorkSizeY / NullLocalWorkSizeZ (see below), so enabling both controls will have the effect of forcing a specific local work size." ) +CLI_CONTROL( size_t, NullLocalWorkSizeX, 0, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will set the local work size that will be used if an application passes NULL as the local work size to clEnqueueNDRangeKernel(). 1D dispatches will only look at NullLocalWorkSizeX, 2D dispatches will only look at NullLocalWorkSizeX and NullLocalWorkSizeY, while 3D dispatches will look at NullLocalWorkSizeX, NullLocalWorkSizeY, and NullLocalWorkSizeZ. If the specified values for NullLocalWorkSize do not evenly divide the global work size then the specified values of NullLocalWorkSize will not take effect." ) +CLI_CONTROL( size_t, NullLocalWorkSizeY, 0, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will set the local work size that will be used if an application passes NULL as the local work size to clEnqueueNDRangeKernel(). 1D dispatches will only look at NullLocalWorkSizeX, 2D dispatches will only look at NullLocalWorkSizeX and NullLocalWorkSizeY, while 3D dispatches will look at NullLocalWorkSizeX, NullLocalWorkSizeY, and NullLocalWorkSizeZ. If the specified values for NullLocalWorkSize do not evenly divide the global work size then the specified values of NullLocalWorkSize will not take effect." ) +CLI_CONTROL( size_t, NullLocalWorkSizeZ, 0, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will set the local work size that will be used if an application passes NULL as the local work size to clEnqueueNDRangeKernel(). 1D dispatches will only look at NullLocalWorkSizeX, 2D dispatches will only look at NullLocalWorkSizeX and NullLocalWorkSizeY, while 3D dispatches will look at NullLocalWorkSizeX, NullLocalWorkSizeY, and NullLocalWorkSizeZ. If the specified values for NullLocalWorkSize do not evenly divide the global work size then the specified values of NullLocalWorkSize will not take effect." ) +CLI_CONTROL( bool, InitializeBuffers, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will initialize the contents of allocated buffers with zero. Only valid for non-COPY_HOST_PTR and non-USE_HOST_PTR allocations." ) + +CLI_CONTROL_SEPARATOR( Platform and Device Query Overrides: ) +CLI_CONTROL( std::string, PlatformName, "", "If set to a non-empty value, the clGetPlatformInfo() query for CL_PLATFORM_NAME will return this string instead of the true platform name." ) +CLI_CONTROL( std::string, PlatformVendor, "", "If set to a non-empty value, the clGetPlatformInfo() query for CL_PLATFORM_VENDOR will return this string instead of the true platform vendor." ) +CLI_CONTROL( std::string, PlatformProfile, "", "If set to a non-empty value, the clGetPlatformInfo() query for CL_PLATFORM_PROFILE will return this string instead of the true platform profile." ) +CLI_CONTROL( std::string, PlatformVersion, "", "If set to a non-empty string, the clGetPlatformInfo() query for CL_PLATFORM_VERSION will return this string instead of the true platform version." ) +CLI_CONTROL( cl_uint, DeviceTypeFilter, CL_DEVICE_TYPE_ALL, "Hides all device types that are not in the filter. Note: CL_DEVICE_TYPE_CPU = 2, CL_DEVICE_TYPE_GPU = 4, CL_DEVICE_TYPE_ACCELERATOR = 8, CL_DEVICE_TYPE_CUSTOM = 16." ) +CLI_CONTROL( cl_uint, DeviceType, 0, "If set to a non-zero value, the clGetDeviceInfo() query for CL_DEVICE_TYPE will return this value instead of the true device type. In addition, calls to clGetDeviceIDs() for this device type will return all devices, not just devices of the requested type. This can be used to enumerate all devices (even CPUs) as GPUs, or vice versa." ) +CLI_CONTROL( std::string, DeviceName, "", "If set to a non-empty string, the clGetDeviceInfo() query for CL_DEVICE_NAME will return this value instead of the true device name." ) +CLI_CONTROL( std::string, DeviceVendor, "", "If set to a non-empty string, the clGetDeviceInfo() query for CL_DEVICE_VENDOR will return this value instead of the true device vendor." ) +CLI_CONTROL( std::string, DeviceProfile, "", "If set to a non-empty string, the clGetDeviceInfo() query for CL_DEVICE_PROFILE will return this value instead of the true device profile." ) +CLI_CONTROL( std::string, DeviceVersion, "", "If set to a non-empty string, the clGetDeviceInfo() query for CL_DEVICE_VERSION will return this value instead of the true device version." ) +CLI_CONTROL( std::string, DeviceCVersion, "", "If set to a non-empty string, the clGetDeviceInfo() query for CL_DEVICE_OPENCL_C_VERSION will return this value instead of the true device version." ) +CLI_CONTROL( std::string, DeviceExtensions, "", "If set to a non-empty string, the clGetDeviceInfo() query for CL_DEVICE_EXTENSIONS will return this value instead of the true device extensions string." ) +CLI_CONTROL( cl_uint, DeviceVendorID, 0, "If set to a non-zero value, the clGetDeviceInfo() query for CL_DEVICE_VENDOR will return this value instead of the true device vendor ID." ) +CLI_CONTROL( cl_uint, DeviceMaxComputeUnits, 0, "If set to a non-zero value, the clGetDeviceInfo() query for CL_DEVICE_MAX_COMPUTE_UNITS will return this value instead of the true device max compute units." ) +CLI_CONTROL( cl_uint, DevicePreferredVectorWidthChar, UINT_MAX, "If set to a non-negative value, the clGetDeviceInfo() query for CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR will return this value instead of the true device preferred vector width." ) +CLI_CONTROL( cl_uint, DevicePreferredVectorWidthShort, UINT_MAX, "If set to a non-negative value, the clGetDeviceInfo() query for CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT will return this value instead of the true device preferred vector width." ) +CLI_CONTROL( cl_uint, DevicePreferredVectorWidthInt, UINT_MAX, "If set to a non-negative value, the clGetDeviceInfo() query for CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT will return this value instead of the true device preferred vector width." ) +CLI_CONTROL( cl_uint, DevicePreferredVectorWidthLong, UINT_MAX, "If set to a non-negative value, the clGetDeviceInfo() query for CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG will return this value instead of the true device preferred vector width." ) +CLI_CONTROL( cl_uint, DevicePreferredVectorWidthHalf, UINT_MAX, "If set to a non-negative value, the clGetDeviceInfo() query for CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF will return this value instead of the true device preferred vector width." ) +CLI_CONTROL( cl_uint, DevicePreferredVectorWidthFloat, UINT_MAX, "If set to a non-negative value, the clGetDeviceInfo() query for CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT will return this value instead of the true device preferred vector width." ) +CLI_CONTROL( cl_uint, DevicePreferredVectorWidthDouble, UINT_MAX, "If set to a non-negative value, the clGetDeviceInfo() query for CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE will return this value instead of the true device preferred vector width." ) + +CLI_CONTROL_SEPARATOR( Precompiled Kernel and Builtin Kernel Override Controls: ) +CLI_CONTROL( bool, ForceByteBufferOverrides, false, "If set to a nonzero value, each of the buffer functions that are overridden (via one or more of the keys below) will use a byte-wise operation to read/write/copy the buffer (default behavior is to try to copy multiple bytes at a time, if possible). Note: Requires OpenCL 1.1 or the \"byte addressable store\" extension." ) +CLI_CONTROL( bool, OverrideReadBuffer, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will use a kernel to implement clEnqueueReadBuffer() instead of the implementation's clEnqueueReadBuffer(). Note: Requires OpenCL 1.1 or the \"byte addressable store\" extension." ) +CLI_CONTROL( bool, OverrideWriteBuffer, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will use a kernel to implement clEnqueueWriteBuffer() instead of the implementation's clEnqueueWriteBuffer(). Note: Requires OpenCL 1.1 or the \"byte addressable store\" extension." ) +CLI_CONTROL( bool, OverrideCopyBuffer, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will use a kernel to implement clEnqueueCopyBuffer() instead of the implementation's clEnqueueCopyBuffer(). Note: Requires OpenCL 1.1 or the \"byte addressable store\" extension." ) +CLI_CONTROL( bool, OverrideReadImage, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will use a kernel to implement clEnqueueReadImage() instead of the implementation's clEnqueueReadImage(). Only 2D images are currently supported." ) +CLI_CONTROL( bool, OverrideWriteImage, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will use a kernel to implement clEnqueueWriteImage() instead of the implementation's clEnqueueWriteImage(). Only 2D images are currently supported." ) +CLI_CONTROL( bool, OverrideCopyImage, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will use a kernel to implement clEnqueueCopyImage() instead of the implementation's clEnqueueCopyImage(). Only 2D images are currently supported." ) +CLI_CONTROL( bool, OverrideBuiltinKernels, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will use its own version of the built-in OpenCL kernels that may be accessed via clCreateProgramWithBuiltInKernels(). At present, only the VME block_motion_estimate_intel kernel is implemented." ) + +CLI_CONTROL_SEPARATOR( SIMD Survey Controls: ) +CLI_CONTROL( bool, SIMDSurvey, false, "Executes a SIMD survey state machine. The general idea of the SIMD survey state machine is to create and manage three additional kernels for each actual OpenCL kernel, one for each SIMD size. Then, execute and time the three kernels, and choose the fastest for subsequent executions." ) +CLI_CONTROL( cl_uint, SIMDSurveyWarmupIterations, 4, "This is the number of NDRanges that the SIMD survey state machine ignores before starting to time the SIMD survey." ) +CLI_CONTROL( std::string, SIMDSurveySIMD8Option, "-DSIMD_SURVEY=8 ", "This is the build option that is pre-pended to the application-specified build options to create the SIMD8 kernel." ) +CLI_CONTROL( std::string, SIMDSurveySIMD16Option, "-DSIMD_SURVEY=16", "This is the build option that is pre-pended to the application-specified build options to create the SIMD16 kernel." ) +CLI_CONTROL( std::string, SIMDSurveySIMD32Option, "-DSIMD_SURVEY=32", "This is the build option that is pre-pended to the application-specified build options to create the SIMD32 kernel." ) +CLI_CONTROL( bool, SIMDOracle, false, "[Note: Not currently implemented, but the idea behind the SIMD oracle is to save the best SIMD size from run-to-run, so the full SIMD survey does not need to be re-executed.]" ) diff --git a/Src/dispatch.cpp b/Src/dispatch.cpp new file mode 100644 index 00000000..81999575 --- /dev/null +++ b/Src/dispatch.cpp @@ -0,0 +1,8612 @@ +/* +// Copyright (c) 2018 Intel Corporation +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +*/ + +#include + +#include "intercept.h" + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clGetPlatformIDs)( + cl_uint num_entries, + cl_platform_id* platforms, + cl_uint* num_platforms ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + LOG_CLINFO(); + + CALL_LOGGING_ENTER(); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = pIntercept->dispatch().clGetPlatformIDs( + num_entries, + platforms, + num_platforms ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT(); + + return retVal; + } + else + { + return dummyDispatch.clGetPlatformIDs( + num_entries, + platforms, + num_platforms ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clGetPlatformInfo)( + cl_platform_id platform, + cl_platform_info param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + std::string platformInfo; + if( pIntercept->callLogging() ) + { + pIntercept->getPlatformInfoString( + platform, + platformInfo ); + } + CALL_LOGGING_ENTER( "platform = [ %s ], param_name = %s (%08X)", + platformInfo.c_str(), + pIntercept->enumName().name( param_name ).c_str(), + param_name ); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = CL_SUCCESS; + + if( pIntercept->overrideGetPlatformInfo( + param_name, + param_value_size, + param_value, + param_value_size_ret, + retVal ) == false ) + { + retVal = pIntercept->dispatch().clGetPlatformInfo( + platform, + param_name, + param_value_size, + param_value, + param_value_size_ret ); + } + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT(); + + return retVal; + } + else + { + return dummyDispatch.clGetPlatformInfo( + platform, + param_name, + param_value_size, + param_value, + param_value_size_ret ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clGetDeviceIDs)( + cl_platform_id platform, + cl_device_type device_type, + cl_uint num_entries, + cl_device_id* devices, + cl_uint* num_devices ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + std::string platformInfo; + if( pIntercept->callLogging() ) + { + pIntercept->getPlatformInfoString( + platform, + platformInfo ); + } + CALL_LOGGING_ENTER( "platform = [ %s ], device_type = %s (%llX)", + platformInfo.c_str(), + pIntercept->enumName().name_device_type( device_type ).c_str(), + device_type ); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = CL_SUCCESS; + + device_type = pIntercept->filterDeviceType( device_type ); + + retVal = pIntercept->dispatch().clGetDeviceIDs( + platform, + device_type, + num_entries, + devices, + num_devices ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT(); + + return retVal; + } + else + { + return dummyDispatch.clGetDeviceIDs( + platform, + device_type, + num_entries, + devices, + num_devices ); + } +} + +/////////////////////////////////////////////////////////////////////////////// + +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clGetDeviceInfo)( + cl_device_id device, + cl_device_info param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret ) +{ + CLIntercept* pIntercept = GetIntercept(); + if (pIntercept) + { + std::string deviceInfo; + if( pIntercept->callLogging() ) + { + pIntercept->getDeviceInfoString( + 1, + &device, + deviceInfo ); + } + CALL_LOGGING_ENTER( "device = [ %s ], param_name = %s (%08X)", + deviceInfo.c_str(), + pIntercept->enumName().name( param_name ).c_str(), + param_name ); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = CL_SUCCESS; + + if( pIntercept->overrideGetDeviceInfo( + device, + param_name, + param_value_size, + param_value, + param_value_size_ret, + retVal ) == false ) + { + retVal = pIntercept->dispatch().clGetDeviceInfo( + device, + param_name, + param_value_size, + param_value, + param_value_size_ret ); + } + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT(); + + return retVal; + } + else + { + return dummyDispatch.clGetDeviceInfo( + device, + param_name, + param_value_size, + param_value, + param_value_size_ret ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// OpenCL 1.2 +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clCreateSubDevices)( + cl_device_id in_device, + const cl_device_partition_property* properties, + cl_uint num_devices, + cl_device_id* out_devices, + cl_uint* num_devices_ret ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + CALL_LOGGING_ENTER(); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = pIntercept->dispatch().clCreateSubDevices( + in_device, + properties, + num_devices, + out_devices, + num_devices_ret ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT(); + + return retVal; + } + else + { + return dummyDispatch.clCreateSubDevices( + in_device, + properties, + num_devices, + out_devices, + num_devices_ret ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// OpenCL 1.2 +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clRetainDevice)( + cl_device_id device ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + cl_uint ref_count = 0; + if( pIntercept->callLogging() ) + { + ref_count = 0; + pIntercept->dispatch().clGetDeviceInfo( + device, + CL_DEVICE_REFERENCE_COUNT, + sizeof( ref_count ), + &ref_count, + NULL ); + } + CALL_LOGGING_ENTER( "[ ref count = %d ] device = %p", + ref_count, + device ); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = pIntercept->dispatch().clRetainDevice( + device ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + if( pIntercept->callLogging() ) + { + ref_count = 0; + pIntercept->dispatch().clGetDeviceInfo( + device, + CL_DEVICE_REFERENCE_COUNT, + sizeof( ref_count ), + &ref_count, + NULL ); + } + CALL_LOGGING_EXIT( "[ ref count = %d ]", + ref_count ); + + return retVal; + } + else + { + return dummyDispatch.clRetainDevice( + device ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// OpenCL 1.2 +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clReleaseDevice)( + cl_device_id device ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + cl_uint ref_count = 0; + if( pIntercept->callLogging() ) + { + ref_count = 0; + pIntercept->dispatch().clGetDeviceInfo( + device, + CL_DEVICE_REFERENCE_COUNT, + sizeof( ref_count ), + &ref_count, + NULL ); + } + CALL_LOGGING_ENTER( "[ ref count = %d ] device = %p", + ref_count, + device ); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = pIntercept->dispatch().clReleaseDevice( + device ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + if( pIntercept->callLogging() && ref_count != 0 ) + { + // This isn't strictly correct, but it's pretty close, and it + // avoids crashes in some cases for bad implementations. + --ref_count; + } + CALL_LOGGING_EXIT( "[ ref count = %d ]", + ref_count ); + + return retVal; + } + else + { + return dummyDispatch.clReleaseDevice( + device ); + } +} + +#ifdef __ANDROID__ +//Workaround for Android, shared library destructor isn't called +static int contextCount = 0; +static std::mutex mContextCount; +#endif + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_context CL_API_CALL CLIRN(clCreateContext)( + const cl_context_properties* properties, + cl_uint num_devices, + const cl_device_id* devices, + void (CL_CALLBACK *pfn_notify)(const char *, const void *, size_t, void *), + void* user_data, + cl_int* errcode_ret ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + cl_context_properties* newProperties = NULL; + cl_context retVal = NULL; + + std::string contextProperties; + std::string deviceInfo; + if( pIntercept->callLogging() ) + { + pIntercept->getContextPropertiesString( + properties, + contextProperties ); + pIntercept->getDeviceInfoString( + num_devices, + devices, + deviceInfo ); + } + CALL_LOGGING_ENTER( "properties = [ %s ], num_devices = %d, devices = [ %s ]", + contextProperties.c_str(), + num_devices, + deviceInfo.c_str() ); + CREATE_CONTEXT_OVERRIDE_INIT( properties, pfn_notify, user_data, newProperties ); + CHECK_ERROR_INIT( errcode_ret ); + CPU_PERFORMANCE_TIMING_START(); + + if( ( retVal == NULL ) && newProperties ) + { + retVal = pIntercept->dispatch().clCreateContext( + newProperties, + num_devices, + devices, + pfn_notify, + user_data, + errcode_ret ); + } + if( retVal == NULL ) + { + retVal = pIntercept->dispatch().clCreateContext( + properties, + num_devices, + devices, + pfn_notify, + user_data, + errcode_ret ); + } + + ITT_ADD_PARAM_AS_METADATA( retVal ); + + INIT_PRECOMPILED_KERNEL_OVERRIDES( retVal ); + INIT_BUILTIN_KERNEL_OVERRIDES( retVal ); + + CPU_PERFORMANCE_TIMING_END(); + CREATE_CONTEXT_OVERRIDE_CLEANUP( retVal, newProperties ); + CHECK_ERROR( errcode_ret[0] ); + CALL_LOGGING_EXIT( "returned %p", retVal ); + +#ifdef __ANDROID__ + mContextCount.lock(); + contextCount ++; + mContextCount.unlock(); +#endif + return retVal; + } + else + { + return dummyDispatch.clCreateContext( + properties, + num_devices, + devices, + pfn_notify, + user_data, + errcode_ret ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_context CL_API_CALL CLIRN(clCreateContextFromType)( + const cl_context_properties* properties, + cl_device_type device_type, + void (CL_CALLBACK *pfn_notify)(const char *, const void *, size_t, void *), + void* user_data, + cl_int* errcode_ret ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + cl_context_properties* newProperties = NULL; + cl_context retVal = NULL; + + std::string contextProperties; + if( pIntercept->callLogging() ) + { + pIntercept->getContextPropertiesString( + properties, + contextProperties ); + } + CALL_LOGGING_ENTER( "properties = [ %s ], device_type = %s (%llX)", + contextProperties.c_str(), + pIntercept->enumName().name_device_type( device_type ).c_str(), + device_type ); + CREATE_CONTEXT_OVERRIDE_INIT( properties, pfn_notify, user_data, newProperties ); + CHECK_ERROR_INIT( errcode_ret ); + CPU_PERFORMANCE_TIMING_START(); + + device_type = pIntercept->filterDeviceType( device_type ); + + if( ( retVal == NULL ) && newProperties ) + { + retVal = pIntercept->dispatch().clCreateContextFromType( + newProperties, + device_type, + pfn_notify, + user_data, + errcode_ret ); + } + if( retVal == NULL ) + { + retVal = pIntercept->dispatch().clCreateContextFromType( + properties, + device_type, + pfn_notify, + user_data, + errcode_ret ); + } + + ITT_ADD_PARAM_AS_METADATA( retVal ); + + INIT_PRECOMPILED_KERNEL_OVERRIDES( retVal ); + INIT_BUILTIN_KERNEL_OVERRIDES( retVal ); + + CPU_PERFORMANCE_TIMING_END(); + CREATE_CONTEXT_OVERRIDE_CLEANUP( retVal, newProperties ); + CHECK_ERROR( errcode_ret[0] ); + CALL_LOGGING_EXIT( "returned %p", retVal ); + + return retVal; + } + else + { + return dummyDispatch.clCreateContextFromType( + properties, + device_type, + pfn_notify, + user_data, + errcode_ret ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clRetainContext)( + cl_context context ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + cl_uint ref_count = 0; + if( pIntercept->callLogging() ) + { + ref_count = 0; + pIntercept->dispatch().clGetContextInfo( + context, + CL_CONTEXT_REFERENCE_COUNT, + sizeof( ref_count ), + &ref_count, + NULL ); + } + CALL_LOGGING_ENTER( "[ ref count = %d ] context = %p", + ref_count, + context ); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = pIntercept->dispatch().clRetainContext( + context ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + if( pIntercept->callLogging() ) + { + ref_count = 0; + pIntercept->dispatch().clGetContextInfo( + context, + CL_CONTEXT_REFERENCE_COUNT, + sizeof( ref_count ), + &ref_count, + NULL ); + } + CALL_LOGGING_EXIT( "[ ref count = %d ]", + ref_count ); + + return retVal; + } + else + { + return dummyDispatch.clRetainContext( + context ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clReleaseContext)( + cl_context context ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + cl_uint ref_count = 0; + if( pIntercept->callLogging() ) + { + ref_count = 0; + pIntercept->dispatch().clGetContextInfo( + context, + CL_CONTEXT_REFERENCE_COUNT, + sizeof( ref_count ), + &ref_count, + NULL ); + } + CALL_LOGGING_ENTER( "[ ref count = %d ] context = %p", + ref_count, + context ); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = pIntercept->dispatch().clReleaseContext( + context ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + if( pIntercept->callLogging() && ref_count != 0 ) + { + // This isn't strictly correct, but it's pretty close, and it + // avoids crashes in some cases for bad implementations. + --ref_count; + } + CALL_LOGGING_EXIT( "[ ref count = %d ]", + ref_count ); + +#if 0 + pIntercept->report(); +#endif + +#ifdef __ANDROID__ + mContextCount.lock(); + contextCount --; + mContextCount.unlock(); + + if( contextCount == 0 ) + { + pIntercept->report(); + } +#endif + return retVal; + } + else + { + return dummyDispatch.clReleaseContext( + context ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clGetContextInfo)( + cl_context context, + cl_context_info param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + CALL_LOGGING_ENTER( "param_name = %s (%08X)", + pIntercept->enumName().name( param_name ).c_str(), + param_name ); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = pIntercept->dispatch().clGetContextInfo( + context, + param_name, + param_value_size, + param_value, + param_value_size_ret ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT(); + + return retVal; + } + else + { + return dummyDispatch.clGetContextInfo( + context, + param_name, + param_value_size, + param_value, + param_value_size_ret ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_command_queue CL_API_CALL CLIRN(clCreateCommandQueue)( + cl_context context, + cl_device_id device, + cl_command_queue_properties properties, + cl_int* errcode_ret ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + std::string deviceInfo; + if( pIntercept->callLogging() ) + { + pIntercept->getDeviceInfoString( + 1, + &device, + deviceInfo ); + } + CALL_LOGGING_ENTER( "device = [ %s ], properties = %s (%llX)", + deviceInfo.c_str(), + pIntercept->enumName().name_command_queue_properties( properties ).c_str(), + properties ); + + pIntercept->modifyCommandQueueProperties( properties ); + + CHECK_ERROR_INIT( errcode_ret ); + CPU_PERFORMANCE_TIMING_START(); + + cl_command_queue retVal = NULL; + +#if defined(USE_MDAPI) + if( !pIntercept->config().DevicePerfCounterCustom.empty() ) + { + retVal = pIntercept->createMDAPICommandQueue( + context, + device, + properties, + errcode_ret ); + } +#endif + + if( retVal == NULL ) + { + retVal = pIntercept->dispatch().clCreateCommandQueue( + context, + device, + properties, + errcode_ret ); + } + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( errcode_ret[0] ); + ITT_REGISTER_COMMAND_QUEUE( retVal, false ); + CHROME_REGISTER_COMMAND_QUEUE( retVal ); + CALL_LOGGING_EXIT( "returned %p", retVal ); + + return retVal; + } + else + { + return dummyDispatch.clCreateCommandQueue( + context, + device, + properties, + errcode_ret ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clRetainCommandQueue)( + cl_command_queue command_queue ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + cl_uint ref_count = 0; + if( pIntercept->callLogging() ) + { + ref_count = 0; + pIntercept->dispatch().clGetCommandQueueInfo( + command_queue, + CL_QUEUE_REFERENCE_COUNT, + sizeof( ref_count ), + &ref_count, + NULL ); + } + CALL_LOGGING_ENTER( "[ ref count = %d ] command_queue = %p", + ref_count, + command_queue ); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = pIntercept->dispatch().clRetainCommandQueue( + command_queue ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + if( pIntercept->callLogging() ) + { + ref_count = 0; + pIntercept->dispatch().clGetCommandQueueInfo( + command_queue, + CL_QUEUE_REFERENCE_COUNT, + sizeof( ref_count ), + &ref_count, + NULL ); + } + CALL_LOGGING_EXIT( "[ ref count = %d ]", + ref_count ); + + return retVal; + } + else + { + return dummyDispatch.clRetainCommandQueue( + command_queue ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clReleaseCommandQueue)( + cl_command_queue command_queue ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + cl_uint ref_count = 0; + if( pIntercept->callLogging() ) + { + ref_count = 0; + pIntercept->dispatch().clGetCommandQueueInfo( + command_queue, + CL_QUEUE_REFERENCE_COUNT, + sizeof( ref_count ), + &ref_count, + NULL ); + } + CALL_LOGGING_ENTER( "[ ref count = %d ] command_queue = %p", + ref_count, + command_queue ); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = pIntercept->dispatch().clReleaseCommandQueue( + command_queue ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + ITT_RELEASE_COMMAND_QUEUE( command_queue ); + if( pIntercept->callLogging() && ref_count != 0 ) + { + // This isn't strictly correct, but it's pretty close, and it + // avoids crashes in some cases for bad implementations. + --ref_count; + } + CALL_LOGGING_EXIT( "[ ref count = %d ]", + ref_count ); + + return retVal; + } + else + { + return dummyDispatch.clReleaseCommandQueue( + command_queue ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clGetCommandQueueInfo)( + cl_command_queue command_queue, + cl_command_queue_info param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + CALL_LOGGING_ENTER( "param_name = %s (%08X)", + pIntercept->enumName().name( param_name ).c_str(), + param_name ); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = pIntercept->dispatch().clGetCommandQueueInfo( + command_queue, + param_name, + param_value_size, + param_value, + param_value_size_ret ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT(); + + return retVal; + } + else + { + return dummyDispatch.clGetCommandQueueInfo( + command_queue, + param_name, + param_value_size, + param_value, + param_value_size_ret ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clSetCommandQueueProperty)( + cl_command_queue command_queue, + cl_command_queue_properties properties, + cl_bool enable, + cl_command_queue_properties* old_properties ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + CALL_LOGGING_ENTER(); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = pIntercept->dispatch().clSetCommandQueueProperty( + command_queue, + properties, + enable, + old_properties ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT(); + + return retVal; + } + else + { + return dummyDispatch.clSetCommandQueueProperty( + command_queue, + properties, + enable, + old_properties ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_mem CL_API_CALL CLIRN(clCreateBuffer)( + cl_context context, + cl_mem_flags flags, + size_t size, + void* host_ptr, + cl_int* errcode_ret ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + CALL_LOGGING_ENTER( "flags = %s (%llX), size = %d, host_ptr = %p", + pIntercept->enumName().name_mem_flags( flags ).c_str(), + flags, + size, + host_ptr ); + INITIALIZE_BUFFER_CONTENTS_INIT( flags, size, host_ptr ); + CHECK_ERROR_INIT( errcode_ret ); + CPU_PERFORMANCE_TIMING_START(); + + cl_mem retVal = pIntercept->dispatch().clCreateBuffer( + context, + flags, + size, + host_ptr, + errcode_ret ); + + CPU_PERFORMANCE_TIMING_END(); + ADD_BUFFER( retVal ); + INITIALIZE_BUFFER_CONTENTS_CLEANUP( flags, host_ptr ); + DUMP_BUFFER_AFTER_CREATE( retVal, flags, host_ptr, size ); + CHECK_ERROR( errcode_ret[0] ); + CALL_LOGGING_EXIT( "returned %p", retVal ); + + return retVal; + } + else + { + return dummyDispatch.clCreateBuffer( + context, + flags, + size, + host_ptr, + errcode_ret ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// OpenCL 1.1 +CL_API_ENTRY cl_mem CL_API_CALL CLIRN(clCreateSubBuffer)( + cl_mem buffer, + cl_mem_flags flags, + cl_buffer_create_type buffer_create_type, + const void *buffer_create_info, + cl_int *errcode_ret ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + std::string argsString; + if( pIntercept->callLogging() ) + { + pIntercept->getCreateSubBufferArgsString( + buffer_create_type, + buffer_create_info, + argsString ); + } + CALL_LOGGING_ENTER( "buffer = %p, flags = %s (%llX), %s", + buffer, + pIntercept->enumName().name_mem_flags( flags ).c_str(), + flags, + argsString.c_str() ); + CPU_PERFORMANCE_TIMING_START(); + + cl_mem retVal = pIntercept->dispatch().clCreateSubBuffer( + buffer, + flags, + buffer_create_type, + buffer_create_info, + errcode_ret ); + + CPU_PERFORMANCE_TIMING_END(); + ADD_BUFFER( retVal ); + CHECK_ERROR( errcode_ret[0] ); + CALL_LOGGING_EXIT( "returned %p", retVal ); + + return retVal; + } + else + { + return dummyDispatch.clCreateSubBuffer( + buffer, + flags, + buffer_create_type, + buffer_create_info, + errcode_ret ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// OpenCL 1.2 +CL_API_ENTRY cl_mem CL_API_CALL CLIRN(clCreateImage)( + cl_context context, + cl_mem_flags flags, + const cl_image_format* image_format, + const cl_image_desc* image_desc, + void* host_ptr, + cl_int* errcode_ret ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + if( image_desc && image_format ) + { + CALL_LOGGING_ENTER( + "flags = %s (%llX), " + "format->channel_order = %s, " + "format->channel_data_type = %s, " + "desc->type = %s, " + "desc->width = %d, " + "desc->height = %d, " + "desc->depth = %d, " + "desc->array_size = %d, " + "desc->row_pitch = %d, " + "desc->slice_pitch = %d, " + "desc->num_mip_levels = %d, " + "desc->num_samples = %d, " + "desc->mem_object = %p, " + "host_ptr = %p ", + pIntercept->enumName().name_mem_flags( flags ).c_str(), + flags, + pIntercept->enumName().name( image_format->image_channel_order ).c_str(), + pIntercept->enumName().name( image_format->image_channel_data_type ).c_str(), + pIntercept->enumName().name( image_desc->image_type ).c_str(), + image_desc->image_width, + image_desc->image_height, + image_desc->image_depth, + image_desc->image_array_size, + image_desc->image_row_pitch, + image_desc->image_slice_pitch, + image_desc->num_mip_levels, + image_desc->num_samples, + image_desc->mem_object, + host_ptr ); + } + else + { + CALL_LOGGING_ENTER(); + } + + CHECK_ERROR_INIT( errcode_ret ); + CPU_PERFORMANCE_TIMING_START(); + + cl_mem retVal = pIntercept->dispatch().clCreateImage( + context, + flags, + image_format, + image_desc, + host_ptr, + errcode_ret ); + + CPU_PERFORMANCE_TIMING_END(); + ADD_IMAGE( retVal ); + CHECK_ERROR( errcode_ret[0] ); + CALL_LOGGING_EXIT( "returned %p", retVal ); + + return retVal; + } + else + { + return dummyDispatch.clCreateImage( + context, + flags, + image_format, + image_desc, + host_ptr, + errcode_ret ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_mem CL_API_CALL CLIRN(clCreateImage2D)( + cl_context context, + cl_mem_flags flags, + const cl_image_format* image_format, + size_t image_width, + size_t image_height, + size_t image_row_pitch, + void* host_ptr, + cl_int* errcode_ret ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + if( image_format ) + { + CALL_LOGGING_ENTER( + "flags = %s (%llX), " + "format->channel_order = %s, " + "format->channel_data_type = %s, " + "image_width = %d, " + "image_height = %d, " + "image_row_pitch = %d, " + "host_ptr = %p ", + pIntercept->enumName().name_mem_flags( flags ).c_str(), + flags, + pIntercept->enumName().name( image_format->image_channel_order ).c_str(), + pIntercept->enumName().name( image_format->image_channel_data_type ).c_str(), + image_width, + image_height, + image_row_pitch, + host_ptr ); + } + else + { + CALL_LOGGING_ENTER(); + } + + CHECK_ERROR_INIT( errcode_ret ); + CPU_PERFORMANCE_TIMING_START(); + + cl_mem retVal = pIntercept->dispatch().clCreateImage2D( + context, + flags, + image_format, + image_width, + image_height, + image_row_pitch, + host_ptr, + errcode_ret ); + + CPU_PERFORMANCE_TIMING_END(); + ADD_IMAGE( retVal ); + CHECK_ERROR( errcode_ret[0] ); + CALL_LOGGING_EXIT( "returned %p", retVal ); + + return retVal; + } + else + { + return dummyDispatch.clCreateImage2D( + context, + flags, + image_format, + image_width, + image_height, + image_row_pitch, + host_ptr, + errcode_ret ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_mem CL_API_CALL CLIRN(clCreateImage3D)( + cl_context context, + cl_mem_flags flags, + const cl_image_format* image_format, + size_t image_width, + size_t image_height, + size_t image_depth, + size_t image_row_pitch, + size_t image_slice_pitch, + void* host_ptr, + cl_int* errcode_ret ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + if( image_format ) + { + CALL_LOGGING_ENTER( + "flags = %s (%llX), " + "format->channel_order = %s, " + "format->channel_data_type = %s, " + "image_width = %d, " + "image_height = %d, " + "image_row_pitch = %d, " + "image_slice_pitch = %d, " + "host_ptr = %p ", + pIntercept->enumName().name_mem_flags( flags ).c_str(), + flags, + pIntercept->enumName().name( image_format->image_channel_order ).c_str(), + pIntercept->enumName().name( image_format->image_channel_data_type ).c_str(), + image_width, + image_height, + image_depth, + image_row_pitch, + image_slice_pitch, + host_ptr ); + } + else + { + CALL_LOGGING_ENTER(); + } + + CHECK_ERROR_INIT( errcode_ret ); + CPU_PERFORMANCE_TIMING_START(); + + cl_mem retVal = pIntercept->dispatch().clCreateImage3D( + context, + flags, + image_format, + image_width, + image_height, + image_depth, + image_row_pitch, + image_slice_pitch, + host_ptr, + errcode_ret ); + + CPU_PERFORMANCE_TIMING_END(); + ADD_IMAGE( retVal ); + CHECK_ERROR( errcode_ret[0] ); + CALL_LOGGING_EXIT( "returned %p", retVal ); + + return retVal; + } + else + { + return dummyDispatch.clCreateImage3D( + context, + flags, + image_format, + image_width, + image_height, + image_depth, + image_row_pitch, + image_slice_pitch, + host_ptr, + errcode_ret ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clRetainMemObject)( + cl_mem memobj ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + cl_uint ref_count = 0; + if( pIntercept->callLogging() ) + { + ref_count = 0; + pIntercept->dispatch().clGetMemObjectInfo( + memobj, + CL_MEM_REFERENCE_COUNT, + sizeof( ref_count ), + &ref_count, + NULL ); + } + CALL_LOGGING_ENTER( "[ ref count = %d ] mem = %p", + ref_count, + memobj ); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = pIntercept->dispatch().clRetainMemObject( + memobj ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + if( pIntercept->callLogging() ) + { + ref_count = 0; + pIntercept->dispatch().clGetMemObjectInfo( + memobj, + CL_MEM_REFERENCE_COUNT, + sizeof( ref_count ), + &ref_count, + NULL ); + } + CALL_LOGGING_EXIT( "[ ref count = %d ]", + ref_count ); + + return retVal; + } + else + { + return dummyDispatch.clRetainMemObject( + memobj ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clReleaseMemObject)( + cl_mem memobj ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + REMOVE_MEMOBJ( memobj ); + + cl_uint ref_count = 0; + if( pIntercept->callLogging() ) + { + ref_count = 0; + pIntercept->dispatch().clGetMemObjectInfo( + memobj, + CL_MEM_REFERENCE_COUNT, + sizeof( ref_count ), + &ref_count, + NULL ); + } + CALL_LOGGING_ENTER( "[ ref count = %d ] mem = %p", + ref_count, + memobj ); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = pIntercept->dispatch().clReleaseMemObject( + memobj ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + if( pIntercept->callLogging() && ref_count != 0 ) + { + // This isn't strictly correct, but it's pretty close, and it + // avoids crashes in some cases for bad implementations. + --ref_count; + } + CALL_LOGGING_EXIT( "[ ref count = %d ]", + ref_count ); + + return retVal; + } + else + { + return dummyDispatch.clReleaseMemObject( + memobj ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clGetSupportedImageFormats)( + cl_context context, + cl_mem_flags flags, + cl_mem_object_type image_type, + cl_uint num_entries, + cl_image_format* image_formats, + cl_uint* num_image_formats ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + CALL_LOGGING_ENTER( "flags = %s (%llX), image_type = %s (%X)", + pIntercept->enumName().name_mem_flags( flags ).c_str(), + flags, + pIntercept->enumName().name( image_type ).c_str(), + image_type ); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = pIntercept->dispatch().clGetSupportedImageFormats( + context, + flags, + image_type, + num_entries, + image_formats, + num_image_formats ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT(); + + return retVal; + } + else + { + return dummyDispatch.clGetSupportedImageFormats( + context, + flags, + image_type, + num_entries, + image_formats, + num_image_formats ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clGetMemObjectInfo)( + cl_mem memobj, + cl_mem_info param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + CALL_LOGGING_ENTER( "mem = %p, param_name = %s (%08X)", + memobj, + pIntercept->enumName().name( param_name ).c_str(), + param_name ); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = pIntercept->dispatch().clGetMemObjectInfo( + memobj, + param_name, + param_value_size, + param_value, + param_value_size_ret ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT(); + + return retVal; + } + else + { + return dummyDispatch.clGetMemObjectInfo( + memobj, + param_name, + param_value_size, + param_value, + param_value_size_ret ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clGetImageInfo)( + cl_mem image, + cl_image_info param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + CALL_LOGGING_ENTER( "mem = %p, param_name = %s (%08X)", + image, + pIntercept->enumName().name( param_name ).c_str(), + param_name ); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = pIntercept->dispatch().clGetImageInfo( + image, + param_name, + param_value_size, + param_value, + param_value_size_ret ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT(); + + return retVal; + } + else + { + return dummyDispatch.clGetImageInfo( + image, + param_name, + param_value_size, + param_value, + param_value_size_ret ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// OpenCL 1.1 +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clSetMemObjectDestructorCallback)( + cl_mem memobj, + void (CL_CALLBACK *pfn_notify)( cl_mem, void* ), + void *user_data ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + CALL_LOGGING_ENTER(); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = pIntercept->dispatch().clSetMemObjectDestructorCallback( + memobj, + pfn_notify, + user_data ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT(); + + return retVal; + } + else + { + return dummyDispatch.clSetMemObjectDestructorCallback( + memobj, + pfn_notify, + user_data ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_sampler CL_API_CALL CLIRN(clCreateSampler)( + cl_context context, + cl_bool normalized_coords, + cl_addressing_mode addressing_mode, + cl_filter_mode filter_mode, + cl_int* errcode_ret ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + std::string samplerProperties; + if( pIntercept->callLogging() ) + { + cl_sampler_properties sampler_properties[] = { + CL_SAMPLER_NORMALIZED_COORDS, normalized_coords, + CL_SAMPLER_ADDRESSING_MODE, addressing_mode, + CL_SAMPLER_FILTER_MODE, filter_mode, + 0 + }; + pIntercept->getSamplerPropertiesString( + sampler_properties, + samplerProperties ); + } + + CALL_LOGGING_ENTER( "properties = [ %s ]", + samplerProperties.c_str() ); + CHECK_ERROR_INIT( errcode_ret ); + CPU_PERFORMANCE_TIMING_START(); + + cl_sampler retVal = pIntercept->dispatch().clCreateSampler( + context, + normalized_coords, + addressing_mode, + filter_mode, + errcode_ret ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( errcode_ret[0] ); + CALL_LOGGING_EXIT( "returned %p", retVal ); + ADD_SAMPLER(retVal, samplerProperties); + + return retVal; + } + else + { + return dummyDispatch.clCreateSampler( + context, + normalized_coords, + addressing_mode, + filter_mode, + errcode_ret ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clRetainSampler)( + cl_sampler sampler ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + cl_uint ref_count = 0; + if( pIntercept->callLogging() ) + { + ref_count = 0; + pIntercept->dispatch().clGetSamplerInfo( + sampler, + CL_SAMPLER_REFERENCE_COUNT, + sizeof( ref_count ), + &ref_count, + NULL ); + } + CALL_LOGGING_ENTER( "[ ref count = %d ] sampler = %p", + ref_count, + sampler ); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = pIntercept->dispatch().clRetainSampler( + sampler ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + if( pIntercept->callLogging() ) + { + ref_count = 0; + pIntercept->dispatch().clGetSamplerInfo( + sampler, + CL_SAMPLER_REFERENCE_COUNT, + sizeof( ref_count ), + &ref_count, + NULL ); + } + CALL_LOGGING_EXIT( "[ ref count = %d ]", + ref_count ); + + return retVal; + } + else + { + return dummyDispatch.clRetainSampler( + sampler ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clReleaseSampler)( + cl_sampler sampler ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + cl_uint ref_count = 0; + if( pIntercept->callLogging() ) + { + pIntercept->dispatch().clGetSamplerInfo( + sampler, + CL_SAMPLER_REFERENCE_COUNT, + sizeof( ref_count ), + &ref_count, + NULL ); + } + CALL_LOGGING_ENTER( "[ ref count = %d ] sampler = %p", + ref_count, + sampler ); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = pIntercept->dispatch().clReleaseSampler( + sampler ); + + if ( --ref_count == 0 ) + { + pIntercept->removeSampler( sampler ); + } + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + if( pIntercept->callLogging() && ref_count != 0 ) + { + // This isn't strictly correct, but it's pretty close, and it + // avoids crashes in some cases for bad implementations. + --ref_count; + } + CALL_LOGGING_EXIT( "[ ref count = %d ]", + ref_count ); + + return retVal; + } + else + { + return dummyDispatch.clReleaseSampler( + sampler ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clGetSamplerInfo)( + cl_sampler sampler, + cl_sampler_info param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + CALL_LOGGING_ENTER( "param_name = %s (%08X)", + pIntercept->enumName().name( param_name ).c_str(), + param_name ); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = pIntercept->dispatch().clGetSamplerInfo( + sampler, + param_name, + param_value_size, + param_value, + param_value_size_ret ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT(); + + return retVal; + } + else + { + return dummyDispatch.clGetSamplerInfo( + sampler, + param_name, + param_value_size, + param_value, + param_value_size_ret ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_program CL_API_CALL CLIRN(clCreateProgramWithSource)( + cl_context context, + cl_uint count, + const char** strings, + const size_t* lengths, + cl_int* errcode_ret ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + char* singleString = NULL; + uint64_t hash = 0; + + CREATE_COMBINED_PROGRAM_STRING( count, strings, lengths, singleString, hash ); + INJECT_PROGRAM_SOURCE( count, strings, lengths, singleString, hash ); + PREPEND_PROGRAM_SOURCE( count, strings, lengths, singleString, hash ); + + CALL_LOGGING_ENTER( "context = %p, count = %d", + context, + count ); + CHECK_ERROR_INIT( errcode_ret ); + CPU_PERFORMANCE_TIMING_START(); + + cl_program retVal = NULL; + + if( ( retVal == NULL ) && + pIntercept->config().InjectProgramBinaries ) + { + retVal = pIntercept->createProgramWithInjectionBinaries( + hash, + context, + errcode_ret ); + } + + if( ( retVal == NULL ) && + pIntercept->config().InjectProgramSPIRV ) + { + retVal = pIntercept->createProgramWithInjectionSPIRV( + hash, + context, + errcode_ret ); + } + + if( retVal == NULL ) + { + retVal = pIntercept->dispatch().clCreateProgramWithSource( + context, + count, + strings, + lengths, + errcode_ret ); + } + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( errcode_ret[0] ); + SIMD_SURVEY_CREATE_PROGRAM_FROM_SOURCE( + retVal, + context, + count, + strings, + lengths ); + CALL_LOGGING_EXIT( "returned %p, program number = %04d", + retVal, + pIntercept->getProgramNumber() ); + + DUMP_PROGRAM_SOURCE( retVal, singleString, hash ); + SAVE_PROGRAM_HASH( retVal, hash ); + DELETE_COMBINED_PROGRAM_STRING( singleString ); + + return retVal; + } + else + { + return dummyDispatch.clCreateProgramWithSource( + context, + count, + strings, + lengths, + errcode_ret ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_program CL_API_CALL CLIRN(clCreateProgramWithBinary)( + cl_context context, + cl_uint num_devices, + const cl_device_id* device_list, + const size_t* lengths, + const unsigned char** binaries, + cl_int* binary_status, + cl_int* errcode_ret ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + uint64_t hash = 0; + + COMPUTE_BINARY_HASH( num_devices, lengths, binaries, hash ); + + CALL_LOGGING_ENTER( "context = %p, num_devices = %d", + context, + num_devices ); + CHECK_ERROR_INIT( errcode_ret ); + CPU_PERFORMANCE_TIMING_START(); + + cl_program retVal = NULL; + + if( pIntercept->config().RejectProgramBinaries ) + { + if( errcode_ret != NULL ) + { + errcode_ret[0] = CL_INVALID_BINARY; + } + } + else + { + retVal = pIntercept->dispatch().clCreateProgramWithBinary( + context, + num_devices, + device_list, + lengths, + binaries, + binary_status, + errcode_ret ); + } + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( errcode_ret[0] ); + CALL_LOGGING_EXIT( "returned %p", retVal ); + + DUMP_INPUT_PROGRAM_BINARIES( + retVal, + num_devices, + device_list, + lengths, + binaries, + hash ); + SAVE_PROGRAM_HASH( retVal, hash ); + + return retVal; + } + else + { + return dummyDispatch.clCreateProgramWithBinary( + context, + num_devices, + device_list, + lengths, + binaries, + binary_status, + errcode_ret ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// OpenCL 1.2 +CL_API_ENTRY cl_program CL_API_CALL CLIRN(clCreateProgramWithBuiltInKernels)( + cl_context context, + cl_uint num_devices, + const cl_device_id* device_list, + const char* kernel_names, + cl_int* errcode_ret) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + CALL_LOGGING_ENTER( "context = %p, num_devices = %d, kernel_names = [ %s ]", + context, + num_devices, + kernel_names ); + CHECK_ERROR_INIT( errcode_ret ); + CPU_PERFORMANCE_TIMING_START(); + + cl_program retVal = NULL; + + if( ( retVal == NULL ) && + pIntercept->config().OverrideBuiltinKernels ) + { + retVal = pIntercept->createProgramWithBuiltinKernels( + context ); + } + + if( retVal == NULL ) + { + retVal = pIntercept->dispatch().clCreateProgramWithBuiltInKernels( + context, + num_devices, + device_list, + kernel_names, + errcode_ret); + } + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( errcode_ret[0] ); + CALL_LOGGING_EXIT( "returned %p", retVal ); + + return retVal; + } + else + { + return dummyDispatch.clCreateProgramWithBuiltInKernels( + context, + num_devices, + device_list, + kernel_names, + errcode_ret); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clRetainProgram)( + cl_program program ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + cl_uint ref_count = 0; + if( pIntercept->callLogging() ) + { + ref_count = 0; + pIntercept->dispatch().clGetProgramInfo( + program, + CL_PROGRAM_REFERENCE_COUNT, + sizeof( ref_count ), + &ref_count, + NULL ); + } + CALL_LOGGING_ENTER( "[ ref count = %d ] program = %p", + ref_count, + program ); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = pIntercept->dispatch().clRetainProgram( + program ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + if( pIntercept->callLogging() ) + { + ref_count = 0; + pIntercept->dispatch().clGetProgramInfo( + program, + CL_PROGRAM_REFERENCE_COUNT, + sizeof( ref_count ), + &ref_count, + NULL ); + } + CALL_LOGGING_EXIT( "[ ref count = %d ]", + ref_count ); + + return retVal; + } + else + { + return dummyDispatch.clRetainProgram( + program ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clReleaseProgram)( + cl_program program ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + cl_uint ref_count = 0; + if( pIntercept->callLogging() ) + { + ref_count = 0; + pIntercept->dispatch().clGetProgramInfo( + program, + CL_PROGRAM_REFERENCE_COUNT, + sizeof( ref_count ), + &ref_count, + NULL ); + } + CALL_LOGGING_ENTER( "[ ref count = %d ] program = %p", + ref_count, + program ); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = pIntercept->dispatch().clReleaseProgram( + program ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + if( pIntercept->callLogging() && ref_count != 0 ) + { + // This isn't strictly correct, but it's pretty close, and it + // avoids crashes in some cases for bad implementations. + --ref_count; + } + CALL_LOGGING_EXIT( "[ ref count = %d ]", + ref_count ); + + return retVal; + } + else + { + return dummyDispatch.clReleaseProgram( + program ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clBuildProgram)( + cl_program program, + cl_uint num_devices, + const cl_device_id* device_list, + const char* options, + void (CL_CALLBACK *pfn_notify)(cl_program program, void* user_data), + void* user_data ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + char* newOptions = NULL; + + MODIFY_PROGRAM_OPTIONS( program, options, newOptions ); + DUMP_PROGRAM_OPTIONS( program, options ); + + CALL_LOGGING_ENTER( "program = %p, pfn_notify = %p", program, pfn_notify ); + BUILD_LOGGING_INIT(); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = pIntercept->dispatch().clBuildProgram( + program, + num_devices, + device_list, + options, + pfn_notify, + user_data ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + BUILD_LOGGING( program, num_devices, device_list ); + SIMD_SURVEY_BUILD_PROGRAM( + program, + num_devices, + device_list, + options ); + CALL_LOGGING_EXIT(); + + DUMP_OUTPUT_PROGRAM_BINARIES( program ); + AUTO_CREATE_SPIRV( program, options ); + INCREMENT_PROGRAM_COMPILE_COUNT( program ); + DELETE_MODIFIED_OPTIONS( newOptions ); + + return retVal; + } + else + { + return dummyDispatch.clBuildProgram( + program, + num_devices, + device_list, + options, + pfn_notify, + user_data ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// OpenCL 1.2 +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clCompileProgram)( + cl_program program, + cl_uint num_devices, + const cl_device_id* device_list, + const char* options, + cl_uint num_input_headers, + const cl_program* input_headers, + const char** header_include_names, + void (CL_CALLBACK *pfn_notify)(cl_program program , void* user_data), + void* user_data ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + const bool modified = false; + + DUMP_PROGRAM_OPTIONS( program, options ); + + CALL_LOGGING_ENTER(); + BUILD_LOGGING_INIT(); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = pIntercept->dispatch().clCompileProgram( + program, + num_devices, + device_list, + options, + num_input_headers, + input_headers, + header_include_names, + pfn_notify, + user_data ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + BUILD_LOGGING( program, num_devices, device_list ); + CALL_LOGGING_EXIT(); + + INCREMENT_PROGRAM_COMPILE_COUNT( program ); + + return retVal; + } + else + { + return dummyDispatch.clCompileProgram( + program, + num_devices, + device_list, + options, + num_input_headers, + input_headers, + header_include_names, + pfn_notify, + user_data ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// OpenCL 1.2 +CL_API_ENTRY cl_program CL_API_CALL CLIRN(clLinkProgram)( + cl_context context, + cl_uint num_devices, + const cl_device_id* device_list, + const char* options, + cl_uint num_input_programs, + const cl_program* input_programs, + void (CL_CALLBACK *pfn_notify)(cl_program program, void* user_data), + void* user_data, + cl_int* errcode_ret ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + const bool modified = false; + + CALL_LOGGING_ENTER(); + CHECK_ERROR_INIT( errcode_ret ); + BUILD_LOGGING_INIT(); + CPU_PERFORMANCE_TIMING_START(); + + cl_program retVal = pIntercept->dispatch().clLinkProgram( + context, + num_devices, + device_list, + options, + num_input_programs, + input_programs, + pfn_notify, + user_data, + errcode_ret ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( errcode_ret[0] ); + BUILD_LOGGING( retVal, num_devices, device_list ); + CALL_LOGGING_EXIT(); + + // TODO: Is the resulting program ("retVal") the one that should be + // used here, to determine the hash for dumped options? + DUMP_PROGRAM_OPTIONS( retVal, options ); + INCREMENT_PROGRAM_COMPILE_COUNT( retVal ); + + return retVal; + } + else + { + return dummyDispatch.clLinkProgram( + context, + num_devices, + device_list, + options, + num_input_programs, + input_programs, + pfn_notify, + user_data, + errcode_ret ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// OpenCL 2.2 +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clSetProgramReleaseCallback)( + cl_program program, + void (CL_CALLBACK *pfn_notify)(cl_program program, void* user_data), + void* user_data ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + CALL_LOGGING_ENTER( "program = %p", program ); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = pIntercept->dispatch().clSetProgramReleaseCallback( + program, + pfn_notify, + user_data ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT(); + + return retVal; + } + else + { + return dummyDispatch.clSetProgramReleaseCallback( + program, + pfn_notify, + user_data ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// OpenCL 2.2 +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clSetProgramSpecializationConstant)( + cl_program program, + cl_uint spec_id, + size_t spec_size, + const void* spec_value ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + CALL_LOGGING_ENTER( "program = %p, spec_id = %u, spec_size = %u", program ); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = pIntercept->dispatch().clSetProgramSpecializationConstant( + program, + spec_id, + spec_size, + spec_value ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT(); + + return retVal; + } + else + { + return dummyDispatch.clSetProgramSpecializationConstant( + program, + spec_id, + spec_size, + spec_value ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// OpenCL 1.2 +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clUnloadPlatformCompiler)( + cl_platform_id platform ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + CALL_LOGGING_ENTER(); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = pIntercept->dispatch().clUnloadPlatformCompiler( + platform ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT(); + + return retVal; + } + else + { + return dummyDispatch.clUnloadPlatformCompiler( + platform); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clUnloadCompiler)( void ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + CALL_LOGGING_ENTER(); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = pIntercept->dispatch().clUnloadCompiler(); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT(); + + return retVal; + } + else + { + return dummyDispatch.clUnloadCompiler(); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clGetProgramInfo)( + cl_program program, + cl_program_info param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + CALL_LOGGING_ENTER( "param_name = %s (%08X)", + pIntercept->enumName().name( param_name ).c_str(), + param_name ); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = pIntercept->dispatch().clGetProgramInfo( + program, + param_name, + param_value_size, + param_value, + param_value_size_ret ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT(); + + return retVal; + } + else + { + return dummyDispatch.clGetProgramInfo( + program, + param_name, + param_value_size, + param_value, + param_value_size_ret ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clGetProgramBuildInfo)( + cl_program program, + cl_device_id device, + cl_program_build_info param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + CALL_LOGGING_ENTER( "param_name = %s (%08X)", + pIntercept->enumName().name( param_name ).c_str(), + param_name ); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = pIntercept->dispatch().clGetProgramBuildInfo( + program, + device, + param_name, + param_value_size, + param_value, + param_value_size_ret ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT(); + + return retVal; + } + else + { + return dummyDispatch.clGetProgramBuildInfo( + program, + device, + param_name, + param_value_size, + param_value, + param_value_size_ret ); + } +} + + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_kernel CL_API_CALL CLIRN(clCreateKernel)( + cl_program program, + const char* kernel_name, + cl_int* errcode_ret ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + CALL_LOGGING_ENTER( "program = %p, kernel_name = %s", + program, + kernel_name ); + CHECK_ERROR_INIT( errcode_ret ); + CPU_PERFORMANCE_TIMING_START(); + + cl_kernel retVal = NULL; + + if( ( retVal == NULL ) && + pIntercept->config().OverrideBuiltinKernels ) + { + retVal = pIntercept->createBuiltinKernel( + program, + kernel_name, + errcode_ret ); + } + + if( retVal == NULL ) + { + retVal = pIntercept->dispatch().clCreateKernel( + program, + kernel_name, + errcode_ret ); + } + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( errcode_ret[0] ); + SIMD_SURVEY_CREATE_KERNEL( program, retVal, kernel_name ); + CALL_LOGGING_EXIT( "returned %p", retVal ); + + if( retVal != NULL ) + { + pIntercept->addKernelName( + retVal, + kernel_name ); + if( pIntercept->config().PreferredWorkGroupSizeMultipleLogging ) + { + pIntercept->logPreferredWorkGroupSizeMultiple( + &retVal, + 1 ); + } + } + + return retVal; + } + else + { + return dummyDispatch.clCreateKernel( + program, + kernel_name, + errcode_ret ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clCreateKernelsInProgram)( + cl_program program, + cl_uint num_kernels, + cl_kernel* kernels, + cl_uint* num_kernels_ret ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + cl_uint local_num_kernels_ret = 0; + + if( num_kernels_ret == NULL ) + { + num_kernels_ret = &local_num_kernels_ret; + } + + CALL_LOGGING_ENTER( "program = %p", program ); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = pIntercept->dispatch().clCreateKernelsInProgram( + program, + num_kernels, + kernels, + num_kernels_ret ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + + std::string retString; + if( pIntercept->callLogging() ) + { + pIntercept->getCreateKernelsInProgramRetString( + retVal, + kernels, + num_kernels_ret, + retString ); + } + CALL_LOGGING_EXIT( "%s", retString.c_str() ); + + if( ( retVal == CL_SUCCESS ) && + ( kernels != NULL ) ) + { + pIntercept->addKernelNames( + kernels, + num_kernels_ret[0] ); + if( pIntercept->config().PreferredWorkGroupSizeMultipleLogging ) + { + pIntercept->logPreferredWorkGroupSizeMultiple( + kernels, + num_kernels_ret[0] ); + } + } + + return retVal; + } + else + { + return dummyDispatch.clCreateKernelsInProgram( + program, + num_kernels, + kernels, + num_kernels_ret ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clRetainKernel)( + cl_kernel kernel ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + cl_uint ref_count = 0; + if( pIntercept->callLogging() ) + { + ref_count = 0; + pIntercept->dispatch().clGetKernelInfo( + kernel, + CL_KERNEL_REFERENCE_COUNT, + sizeof( ref_count ), + &ref_count, + NULL ); + } + CALL_LOGGING_ENTER( "[ ref count = %d ] kernel = %p", + ref_count, + kernel ); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = pIntercept->dispatch().clRetainKernel( + kernel ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + if( pIntercept->callLogging() ) + { + ref_count = 0; + pIntercept->dispatch().clGetKernelInfo( + kernel, + CL_KERNEL_REFERENCE_COUNT, + sizeof( ref_count ), + &ref_count, + NULL ); + } + CALL_LOGGING_EXIT( "[ ref count = %d ]", + ref_count ); + + return retVal; + } + else + { + return dummyDispatch.clRetainKernel( + kernel ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clReleaseKernel)( + cl_kernel kernel ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + pIntercept->removeKernel( kernel ); + + cl_uint ref_count = 0; + if( pIntercept->callLogging() ) + { + ref_count = 0; + pIntercept->dispatch().clGetKernelInfo( + kernel, + CL_KERNEL_REFERENCE_COUNT, + sizeof( ref_count ), + &ref_count, + NULL ); + } + CALL_LOGGING_ENTER( "[ ref count = %d ] kernel = %p", + ref_count, + kernel ); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = pIntercept->dispatch().clReleaseKernel( + kernel ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + if( pIntercept->callLogging() && ref_count != 0 ) + { + // This isn't strictly correct, but it's pretty close, and it + // avoids crashes in some cases for bad implementations. + --ref_count; + } + CALL_LOGGING_EXIT( "[ ref count = %d ]", + ref_count ); + + return retVal; + } + else + { + return dummyDispatch.clReleaseKernel( + kernel ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clSetKernelArg)( + cl_kernel kernel, + cl_uint arg_index, + size_t arg_size, + const void* arg_value ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + std::string argsString; + if( pIntercept->callLogging() ) + { + pIntercept->getKernelArgString( + arg_index, + arg_size, + arg_value, + argsString ); + } + CALL_LOGGING_ENTER_KERNEL( + kernel, + "kernel = %p, %s", + kernel, + argsString.c_str() ); + + if ( pIntercept->config().DumpArgumentsOnSet ) + { + pIntercept->dumpArgument( kernel, arg_index, arg_size, arg_value ); + } + + SET_KERNEL_ARG( kernel, arg_index, arg_size, arg_value ); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = pIntercept->dispatch().clSetKernelArg( + kernel, + arg_index, + arg_size, + arg_value ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + SIMD_SURVEY_SET_KERNEL_ARG( + kernel, + arg_index, + arg_size, + arg_value ); + CALL_LOGGING_EXIT(); + + return retVal; + } + else + { + return dummyDispatch.clSetKernelArg( + kernel, + arg_index, + arg_size, + arg_value ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clGetKernelInfo)( + cl_kernel kernel, + cl_kernel_info param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + CALL_LOGGING_ENTER_KERNEL( kernel, "param_name = %s (%X)", + pIntercept->enumName().name( param_name ).c_str(), + param_name ); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = pIntercept->dispatch().clGetKernelInfo( + kernel, + param_name, + param_value_size, + param_value, + param_value_size_ret ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT(); + + return retVal; + } + else + { + return dummyDispatch.clGetKernelInfo( + kernel, + param_name, + param_value_size, + param_value, + param_value_size_ret ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// OpenCL 1.2 +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clGetKernelArgInfo)( + cl_kernel kernel, + cl_uint arg_indx, + cl_kernel_arg_info param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + CALL_LOGGING_ENTER_KERNEL( kernel, "param_name = %s (%X)", + pIntercept->enumName().name( param_name ).c_str(), + param_name ); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = pIntercept->dispatch().clGetKernelArgInfo( + kernel, + arg_indx, + param_name, + param_value_size, + param_value, + param_value_size_ret ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT(); + + return retVal; + } + else + { + return dummyDispatch.clGetKernelArgInfo( + kernel, + arg_indx, + param_name, + param_value_size, + param_value, + param_value_size_ret ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clGetKernelWorkGroupInfo)( + cl_kernel kernel, + cl_device_id device, + cl_kernel_work_group_info param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + CALL_LOGGING_ENTER_KERNEL( kernel, "param_name = %s (%X)", + pIntercept->enumName().name( param_name ).c_str(), + param_name ); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = pIntercept->dispatch().clGetKernelWorkGroupInfo( + kernel, + device, + param_name, + param_value_size, + param_value, + param_value_size_ret ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT(); + + return retVal; + } + else + { + return dummyDispatch.clGetKernelWorkGroupInfo( + kernel, + device, + param_name, + param_value_size, + param_value, + param_value_size_ret ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clWaitForEvents)( + cl_uint num_events, + const cl_event* event_list ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + cl_int retVal = CL_SUCCESS; + + if( pIntercept->nullEnqueue() == false ) + { + std::string eventList; + if( pIntercept->callLogging() ) + { + pIntercept->getEventListString( + num_events, + event_list, + eventList ); + } + CALL_LOGGING_ENTER( "event_list = %s", + eventList.c_str() ); + CPU_PERFORMANCE_TIMING_START(); + + retVal = pIntercept->dispatch().clWaitForEvents( + num_events, + event_list ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT(); + + DEVICE_PERFORMANCE_TIMING_CHECK(); + } + + return retVal; + } + else + { + return dummyDispatch.clWaitForEvents( + num_events, + event_list ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clGetEventInfo)( + cl_event event, + cl_event_info param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + cl_int retVal = CL_SUCCESS; + + if( pIntercept->nullEnqueue() == false ) + { + CALL_LOGGING_ENTER( "event = %p, param_name = %s (%08X)", + event, + pIntercept->enumName().name( param_name ).c_str(), + param_name ); + CPU_PERFORMANCE_TIMING_START(); + + retVal = pIntercept->dispatch().clGetEventInfo( + event, + param_name, + param_value_size, + param_value, + param_value_size_ret ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT(); + } + + return retVal; + } + else + { + return dummyDispatch.clGetEventInfo( + event, + param_name, + param_value_size, + param_value, + param_value_size_ret ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// OpenCL 1.1 +CL_API_ENTRY cl_event CL_API_CALL CLIRN(clCreateUserEvent)( + cl_context context, + cl_int *errcode_ret ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + cl_event retVal = NULL; + + if( pIntercept->nullEnqueue() == false ) + { + CALL_LOGGING_ENTER(); + CPU_PERFORMANCE_TIMING_START(); + + retVal = pIntercept->dispatch().clCreateUserEvent( + context, + errcode_ret ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( errcode_ret[0] ); + CALL_LOGGING_EXIT( "returned %p", retVal ); + } + + return retVal; + } + else + { + return dummyDispatch.clCreateUserEvent( + context, + errcode_ret ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clRetainEvent)( + cl_event event ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + cl_uint ref_count = 0; + if( pIntercept->callLogging() ) + { + ref_count = 0; + pIntercept->dispatch().clGetEventInfo( + event, + CL_EVENT_REFERENCE_COUNT, + sizeof( ref_count ), + &ref_count, + NULL ); + } + CALL_LOGGING_ENTER( "[ ref count = %d ] event = %p", + ref_count, + event ); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = pIntercept->dispatch().clRetainEvent( + event ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + if( pIntercept->callLogging() ) + { + ref_count = 0; + pIntercept->dispatch().clGetEventInfo( + event, + CL_EVENT_REFERENCE_COUNT, + sizeof( ref_count ), + &ref_count, + NULL ); + } + CALL_LOGGING_EXIT( "[ ref count = %d ]", + ref_count ); + + return retVal; + } + else + { + return dummyDispatch.clRetainEvent( + event ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clReleaseEvent)( + cl_event event ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + cl_uint ref_count = 0; + if( pIntercept->callLogging() ) + { + ref_count = 0; + pIntercept->dispatch().clGetEventInfo( + event, + CL_EVENT_REFERENCE_COUNT, + sizeof( ref_count ), + &ref_count, + NULL ); + } + CALL_LOGGING_ENTER( "[ ref count = %d ] event = %p", + ref_count, + event ); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = pIntercept->dispatch().clReleaseEvent( + event ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + if( pIntercept->callLogging() && ref_count != 0 ) + { + // This isn't strictly correct, but it's pretty close, and it + // avoids crashes in some cases for bad implementations. + --ref_count; + } + CALL_LOGGING_EXIT( "[ ref count = %d ]", + ref_count ); + + return retVal; + } + else + { + return dummyDispatch.clReleaseEvent( + event ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// OpenCL 1.1 +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clSetUserEventStatus)( + cl_event event, + cl_int execution_status ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + CALL_LOGGING_ENTER(); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = pIntercept->dispatch().clSetUserEventStatus( + event, + execution_status ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT(); + + return retVal; + } + else + { + return dummyDispatch.clSetUserEventStatus( + event, + execution_status ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// OpenCL 1.1 +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clSetEventCallback)( + cl_event event, + cl_int command_exec_callback_type, + void (CL_CALLBACK *pfn_notify)( cl_event, cl_int, void * ), + void *user_data ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + CALL_LOGGING_ENTER( "event = %p, callback_type = %s (%d)", + event, + pIntercept->enumName().name_command_exec_status( command_exec_callback_type ).c_str(), + command_exec_callback_type ); + EVENT_CALLBACK_OVERRIDE_INIT( pfn_notify, user_data ); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = pIntercept->dispatch().clSetEventCallback( + event, + command_exec_callback_type, + pfn_notify, + user_data ); + + CPU_PERFORMANCE_TIMING_END(); + EVENT_CALLBACK_OVERRIDE_CLEANUP( retVal ); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT(); + + return retVal; + } + else + { + return dummyDispatch.clSetEventCallback( + event, + command_exec_callback_type, + pfn_notify, + user_data ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clGetEventProfilingInfo)( + cl_event event, + cl_profiling_info param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + cl_int retVal = CL_SUCCESS; + + if( pIntercept->nullEnqueue() == false ) + { + CALL_LOGGING_ENTER( "param_name = %s (%08X)", + pIntercept->enumName().name( param_name ).c_str(), + param_name ); + CPU_PERFORMANCE_TIMING_START(); + + retVal = pIntercept->dispatch().clGetEventProfilingInfo( + event, + param_name, + param_value_size, + param_value, + param_value_size_ret ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT(); + } + + return retVal; + } + else + { + return dummyDispatch.clGetEventProfilingInfo( + event, + param_name, + param_value_size, + param_value, + param_value_size_ret ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clFlush)( + cl_command_queue command_queue ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + CALL_LOGGING_ENTER( "queue = %p", command_queue ); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = pIntercept->dispatch().clFlush( + command_queue ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT(); + + DEVICE_PERFORMANCE_TIMING_CHECK(); + + return retVal; + } + else + { + return dummyDispatch.clFlush( + command_queue ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clFinish)( + cl_command_queue command_queue ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + CALL_LOGGING_ENTER( "queue = %p", command_queue ); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = pIntercept->dispatch().clFinish( + command_queue ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT(); + + DEVICE_PERFORMANCE_TIMING_CHECK(); + + return retVal; + } + else + { + return dummyDispatch.clFinish( + command_queue ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clEnqueueReadBuffer)( + cl_command_queue command_queue, + cl_mem buffer, + cl_bool blocking_read, + size_t offset, + size_t cb, + void* ptr, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + cl_int retVal = CL_SUCCESS; + + CHECK_AUBCAPTURE_START( command_queue ); + + if( pIntercept->nullEnqueue() == false ) + { + CALL_LOGGING_ENTER( + "queue = %p, buffer = %p, %s, offset = %d, cb = %d, ptr = %p", + command_queue, + buffer, + blocking_read ? "blocking" : "non-blocking", + offset, + cb, + ptr ); + DEVICE_PERFORMANCE_TIMING_START( event ); + CPU_PERFORMANCE_TIMING_START(); + + ITT_ADD_PARAM_AS_METADATA( blocking_read ); + + if( pIntercept->config().OverrideReadBuffer ) + { + retVal = pIntercept->ReadBuffer( + command_queue, + buffer, + blocking_read, + offset, + cb, + ptr, + num_events_in_wait_list, + event_wait_list, + event ); + } + else + { + retVal = pIntercept->dispatch().clEnqueueReadBuffer( + command_queue, + buffer, + blocking_read, + offset, + cb, + ptr, + num_events_in_wait_list, + event_wait_list, + event ); + } + + CPU_PERFORMANCE_TIMING_END(); + DEVICE_PERFORMANCE_TIMING_END( event ); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT_EVENT( event ); + + if( blocking_read ) + { + DEVICE_PERFORMANCE_TIMING_CHECK(); + } + } + + FINISH_OR_FLUSH_AFTER_ENQUEUE( command_queue ); + CHECK_AUBCAPTURE_STOP( command_queue ); + + return retVal; + } + else + { + return dummyDispatch.clEnqueueReadBuffer( + command_queue, + buffer, + blocking_read, + offset, + cb, + ptr, + num_events_in_wait_list, + event_wait_list, + event ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// OpenCL 1.1 +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clEnqueueReadBufferRect)( + cl_command_queue command_queue, + cl_mem buffer, + cl_bool blocking_read, + const size_t *buffer_origin, + const size_t *host_origin, + const size_t *region, + size_t buffer_row_pitch, + size_t buffer_slice_pitch, + size_t host_row_pitch, + size_t host_slice_pitch, + void *ptr, + cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + cl_int retVal = CL_SUCCESS; + + CHECK_AUBCAPTURE_START( command_queue ); + + if( pIntercept->nullEnqueue() == false ) + { + if( ( buffer_origin != NULL ) && + ( host_origin != NULL ) && + ( region != NULL ) ) + { + CALL_LOGGING_ENTER( + "queue = %p, buffer = %p, %s, buffer_origin = < %d, %d, %d >, host_origin = < %d, %d, %d >, region = < %d, %d, %d >, ptr = %p", + command_queue, + buffer, + blocking_read ? "blocking" : "non-blocking", + buffer_origin[0], buffer_origin[1], buffer_origin[2], + host_origin[0], host_origin[1], host_origin[2], + region[0], region[1], region[2], + ptr ); + } + else + { + CALL_LOGGING_ENTER( + "queue = %p, buffer = %p, %s, ptr = %p", + command_queue, + buffer, + blocking_read ? "blocking" : "non-blocking", + ptr ); + } + DEVICE_PERFORMANCE_TIMING_START( event ); + CPU_PERFORMANCE_TIMING_START(); + + ITT_ADD_PARAM_AS_METADATA( blocking_read ); + + retVal = pIntercept->dispatch().clEnqueueReadBufferRect( + command_queue, + buffer, + blocking_read, + buffer_origin, + host_origin, + region, + buffer_row_pitch, + buffer_slice_pitch, + host_row_pitch, + host_slice_pitch, + ptr, + num_events_in_wait_list, + event_wait_list, + event ); + + CPU_PERFORMANCE_TIMING_END(); + DEVICE_PERFORMANCE_TIMING_END( event ); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT_EVENT( event ); + + if( blocking_read ) + { + DEVICE_PERFORMANCE_TIMING_CHECK(); + } + } + + FINISH_OR_FLUSH_AFTER_ENQUEUE( command_queue ); + CHECK_AUBCAPTURE_STOP( command_queue ); + + return retVal; + } + else + { + return dummyDispatch.clEnqueueReadBufferRect( + command_queue, + buffer, + blocking_read, + buffer_origin, + host_origin, + region, + buffer_row_pitch, + buffer_slice_pitch, + host_row_pitch, + host_slice_pitch, + ptr, + num_events_in_wait_list, + event_wait_list, + event ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clEnqueueWriteBuffer)( + cl_command_queue command_queue, + cl_mem buffer, + cl_bool blocking_write, + size_t offset, + size_t cb, + const void* ptr, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + cl_int retVal = CL_SUCCESS; + + CHECK_AUBCAPTURE_START( command_queue ); + + if( pIntercept->nullEnqueue() == false ) + { + CALL_LOGGING_ENTER( + "queue = %p, buffer = %p, %s, offset = %d, cb = %d, ptr = %p", + command_queue, + buffer, + blocking_write ? "blocking" : "non-blocking", + offset, + cb, + ptr ); + DEVICE_PERFORMANCE_TIMING_START( event ); + CPU_PERFORMANCE_TIMING_START(); + + ITT_ADD_PARAM_AS_METADATA( blocking_write ); + + if( pIntercept->config().OverrideWriteBuffer ) + { + retVal = pIntercept->WriteBuffer( + command_queue, + buffer, + blocking_write, + offset, + cb, + ptr, + num_events_in_wait_list, + event_wait_list, + event ); + } + else + { + retVal = pIntercept->dispatch().clEnqueueWriteBuffer( + command_queue, + buffer, + blocking_write, + offset, + cb, + ptr, + num_events_in_wait_list, + event_wait_list, + event ); + } + + CPU_PERFORMANCE_TIMING_END(); + DEVICE_PERFORMANCE_TIMING_END( event ); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT_EVENT( event ); + + if( blocking_write ) + { + DEVICE_PERFORMANCE_TIMING_CHECK(); + } + } + + FINISH_OR_FLUSH_AFTER_ENQUEUE( command_queue ); + CHECK_AUBCAPTURE_STOP( command_queue ); + + return retVal; + } + else + { + return dummyDispatch.clEnqueueWriteBuffer( + command_queue, + buffer, + blocking_write, + offset, + cb, + ptr, + num_events_in_wait_list, + event_wait_list, + event ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// OpenCL 1.1 +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clEnqueueWriteBufferRect)( + cl_command_queue command_queue, + cl_mem buffer, + cl_bool blocking_write, + const size_t *buffer_origin, + const size_t *host_origin, + const size_t *region, + size_t buffer_row_pitch, + size_t buffer_slice_pitch, + size_t host_row_pitch, + size_t host_slice_pitch, + const void *ptr, + cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + cl_int retVal = CL_SUCCESS; + + CHECK_AUBCAPTURE_START( command_queue ); + + if( pIntercept->nullEnqueue() == false ) + { + if( ( buffer_origin != NULL ) && + ( host_origin != NULL ) && + ( region != NULL ) ) + { + CALL_LOGGING_ENTER( + "queue = %p, buffer = %p, %s, buffer_origin = < %d, %d, %d >, host_origin = < %d, %d, %d >, region = < %d, %d, %d >, ptr = %p", + command_queue, + buffer, + blocking_write ? "blocking" : "non-blocking", + buffer_origin[0], buffer_origin[1], buffer_origin[2], + host_origin[0], host_origin[1], host_origin[2], + region[0], region[1], region[2], + ptr ); + } + else + { + CALL_LOGGING_ENTER( + "queue = %p, buffer = %p, %s, ptr = %p", + command_queue, + buffer, + blocking_write ? "blocking" : "non-blocking", + ptr ); + } + DEVICE_PERFORMANCE_TIMING_START( event ); + CPU_PERFORMANCE_TIMING_START(); + + ITT_ADD_PARAM_AS_METADATA( blocking_write ); + + retVal = pIntercept->dispatch().clEnqueueWriteBufferRect( + command_queue, + buffer, + blocking_write, + buffer_origin, + host_origin, + region, + buffer_row_pitch, + buffer_slice_pitch, + host_row_pitch, + host_slice_pitch, + ptr, + num_events_in_wait_list, + event_wait_list, + event ); + + CPU_PERFORMANCE_TIMING_END(); + DEVICE_PERFORMANCE_TIMING_END( event ); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT_EVENT( event ); + + if( blocking_write ) + { + DEVICE_PERFORMANCE_TIMING_CHECK(); + } + } + + FINISH_OR_FLUSH_AFTER_ENQUEUE( command_queue ); + CHECK_AUBCAPTURE_STOP( command_queue ); + + return retVal; + } + else + { + return dummyDispatch.clEnqueueWriteBufferRect( + command_queue, + buffer, + blocking_write, + buffer_origin, + host_origin, + region, + buffer_row_pitch, + buffer_slice_pitch, + host_row_pitch, + host_slice_pitch, + ptr, + num_events_in_wait_list, + event_wait_list, + event ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// OpenCL 1.2 +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clEnqueueFillBuffer)( + cl_command_queue command_queue, + cl_mem buffer, + const void* pattern, + size_t pattern_size, + size_t offset, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + cl_int retVal = CL_SUCCESS; + + CHECK_AUBCAPTURE_START( command_queue ); + + if( pIntercept->nullEnqueue() == false ) + { + CALL_LOGGING_ENTER(); + DEVICE_PERFORMANCE_TIMING_START( event ); + CPU_PERFORMANCE_TIMING_START(); + + retVal = pIntercept->dispatch().clEnqueueFillBuffer( + command_queue, + buffer, + pattern, + pattern_size, + offset, + size, + num_events_in_wait_list, + event_wait_list, + event ); + + CPU_PERFORMANCE_TIMING_END(); + DEVICE_PERFORMANCE_TIMING_END( event ); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT_EVENT( event ); + } + + FINISH_OR_FLUSH_AFTER_ENQUEUE( command_queue ); + CHECK_AUBCAPTURE_STOP( command_queue ); + + return retVal; + } + else + { + return dummyDispatch.clEnqueueFillBuffer( + command_queue, + buffer, + pattern, + pattern_size, + offset, + size, + num_events_in_wait_list, + event_wait_list, + event ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clEnqueueCopyBuffer)( + cl_command_queue command_queue, + cl_mem src_buffer, + cl_mem dst_buffer, + size_t src_offset, + size_t dst_offset, + size_t cb, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + cl_int retVal = CL_SUCCESS; + + CHECK_AUBCAPTURE_START( command_queue ); + + if( pIntercept->nullEnqueue() == false ) + { + CALL_LOGGING_ENTER("queue = %p, src_buffer = %p, dst_buffer = %p, src_offset = %u, dst_offset = %u, cb = %d", + command_queue, + src_buffer, + dst_buffer, + src_offset, + dst_offset, + cb ); + DEVICE_PERFORMANCE_TIMING_START( event ); + CPU_PERFORMANCE_TIMING_START(); + + if( pIntercept->config().OverrideCopyBuffer ) + { + retVal = pIntercept->CopyBuffer( + command_queue, + src_buffer, + dst_buffer, + src_offset, + dst_offset, + cb, + num_events_in_wait_list, + event_wait_list, + event ); + } + else + { + retVal = pIntercept->dispatch().clEnqueueCopyBuffer( + command_queue, + src_buffer, + dst_buffer, + src_offset, + dst_offset, + cb, + num_events_in_wait_list, + event_wait_list, + event ); + } + + CPU_PERFORMANCE_TIMING_END(); + DEVICE_PERFORMANCE_TIMING_END( event ); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT_EVENT( event ); + } + + FINISH_OR_FLUSH_AFTER_ENQUEUE( command_queue ); + CHECK_AUBCAPTURE_STOP( command_queue ); + + return retVal; + } + else + { + return dummyDispatch.clEnqueueCopyBuffer( + command_queue, + src_buffer, + dst_buffer, + src_offset, + dst_offset, + cb, + num_events_in_wait_list, + event_wait_list, + event ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// OpenCL 1.1 +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clEnqueueCopyBufferRect)( + cl_command_queue command_queue, + cl_mem src_buffer, + cl_mem dst_buffer, + const size_t *src_origin, + const size_t *dst_origin, + const size_t *region, + size_t src_row_pitch, + size_t src_slice_pitch, + size_t dst_row_pitch, + size_t dst_slice_pitch, + cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + cl_int retVal = CL_SUCCESS; + + CHECK_AUBCAPTURE_START( command_queue ); + + if( pIntercept->nullEnqueue() == false ) + { + if( ( src_origin != NULL ) && + ( dst_origin != NULL ) && + ( region != NULL ) ) + { + CALL_LOGGING_ENTER( + "queue = %p, src_buffer = %p, dst_buffer = %p, src_origin = < %d, %d, %d >, dst_origin = < %d, %d, %d >, region = < %d, %d, %d >", + command_queue, + src_buffer, + dst_buffer, + src_origin[0], src_origin[1], src_origin[2], + dst_origin[0], dst_origin[1], dst_origin[2], + region[0], region[1], region[2] ); + } + else + { + CALL_LOGGING_ENTER( + "queue = %p, src_buffer = %p, dst_buffer = %p", + command_queue, + src_buffer, + dst_buffer ); + } + DEVICE_PERFORMANCE_TIMING_START( event ); + CPU_PERFORMANCE_TIMING_START(); + + retVal = pIntercept->dispatch().clEnqueueCopyBufferRect( + command_queue, + src_buffer, + dst_buffer, + src_origin, + dst_origin, + region, + src_row_pitch, + src_slice_pitch, + dst_row_pitch, + dst_slice_pitch, + num_events_in_wait_list, + event_wait_list, + event ); + + CPU_PERFORMANCE_TIMING_END(); + DEVICE_PERFORMANCE_TIMING_END( event ); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT_EVENT( event ); + } + + FINISH_OR_FLUSH_AFTER_ENQUEUE( command_queue ); + CHECK_AUBCAPTURE_STOP( command_queue ); + + return retVal; + } + else + { + return dummyDispatch.clEnqueueCopyBufferRect( + command_queue, + src_buffer, + dst_buffer, + src_origin, + dst_origin, + region, + src_row_pitch, + src_slice_pitch, + dst_row_pitch, + dst_slice_pitch, + num_events_in_wait_list, + event_wait_list, + event ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clEnqueueReadImage)( + cl_command_queue command_queue, + cl_mem image, + cl_bool blocking_read, + const size_t* origin, + const size_t* region, + size_t row_pitch, + size_t slice_pitch, + void* ptr, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + cl_int retVal = CL_SUCCESS; + + CHECK_AUBCAPTURE_START( command_queue ); + + if( pIntercept->nullEnqueue() == false ) + { + if( ( origin != NULL ) && + ( region != NULL ) ) + { + CALL_LOGGING_ENTER( + "queue = %p, image = %p, %s, origin = < %d, %d, %d >, region = < %d, %d, %d >, ptr = %p", + command_queue, + image, + blocking_read ? "blocking" : "non-blocking", + origin[0], origin[1], origin[2], + region[0], region[1], region[2], + ptr ); + } + else + { + CALL_LOGGING_ENTER( + "queue = %p, image = %p, %s, ptr = %p", + command_queue, + image, + blocking_read ? "blocking" : "non-blocking", + ptr ); + } + DEVICE_PERFORMANCE_TIMING_START( event ); + CPU_PERFORMANCE_TIMING_START(); + + ITT_ADD_PARAM_AS_METADATA( blocking_read ); + + if( pIntercept->config().OverrideReadImage ) + { + retVal = pIntercept->ReadImage( + command_queue, + image, + blocking_read, + origin, + region, + row_pitch, + slice_pitch, + ptr, + num_events_in_wait_list, + event_wait_list, + event ); + } + else + { + retVal = pIntercept->dispatch().clEnqueueReadImage( + command_queue, + image, + blocking_read, + origin, + region, + row_pitch, + slice_pitch, + ptr, + num_events_in_wait_list, + event_wait_list, + event ); + } + + CPU_PERFORMANCE_TIMING_END(); + DEVICE_PERFORMANCE_TIMING_END( event ); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT_EVENT( event ); + + if( blocking_read ) + { + DEVICE_PERFORMANCE_TIMING_CHECK(); + } + } + + FINISH_OR_FLUSH_AFTER_ENQUEUE( command_queue ); + CHECK_AUBCAPTURE_STOP( command_queue ); + + return retVal; + } + else + { + return dummyDispatch.clEnqueueReadImage( + command_queue, + image, + blocking_read, + origin, + region, + row_pitch, + slice_pitch, + ptr, + num_events_in_wait_list, + event_wait_list, + event ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clEnqueueWriteImage)( + cl_command_queue command_queue, + cl_mem image, + cl_bool blocking_write, + const size_t* origin, + const size_t* region, + size_t input_row_pitch, + size_t input_slice_pitch, + const void* ptr, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + cl_int retVal = CL_SUCCESS; + + CHECK_AUBCAPTURE_START( command_queue ); + + if( pIntercept->nullEnqueue() == false ) + { + CALL_LOGGING_ENTER( + "%s, ptr = %p", + blocking_write ? "blocking" : "non-blocking", + ptr ); + DEVICE_PERFORMANCE_TIMING_START( event ); + CPU_PERFORMANCE_TIMING_START(); + + ITT_ADD_PARAM_AS_METADATA( blocking_write ); + + if( pIntercept->config().OverrideWriteImage ) + { + retVal = pIntercept->WriteImage( + command_queue, + image, + blocking_write, + origin, + region, + input_row_pitch, + input_slice_pitch, + ptr, + num_events_in_wait_list, + event_wait_list, + event ); + } + else + { + retVal = pIntercept->dispatch().clEnqueueWriteImage( + command_queue, + image, + blocking_write, + origin, + region, + input_row_pitch, + input_slice_pitch, + ptr, + num_events_in_wait_list, + event_wait_list, + event ); + } + + CPU_PERFORMANCE_TIMING_END(); + DEVICE_PERFORMANCE_TIMING_END( event ); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT_EVENT( event ); + + if( blocking_write ) + { + DEVICE_PERFORMANCE_TIMING_CHECK(); + } + } + + FINISH_OR_FLUSH_AFTER_ENQUEUE( command_queue ); + CHECK_AUBCAPTURE_STOP( command_queue ); + + return retVal; + } + else + { + return dummyDispatch.clEnqueueWriteImage( + command_queue, + image, + blocking_write, + origin, + region, + input_row_pitch, + input_slice_pitch, + ptr, + num_events_in_wait_list, + event_wait_list, + event ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// OpenCL 1.2 +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clEnqueueFillImage)( + cl_command_queue command_queue, + cl_mem image, + const void* fill_color, + const size_t* origin, + const size_t* region, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + cl_int retVal = CL_SUCCESS; + + CHECK_AUBCAPTURE_START( command_queue ); + + if( pIntercept->nullEnqueue() == false ) + { + CALL_LOGGING_ENTER(); + DEVICE_PERFORMANCE_TIMING_START( event ); + CPU_PERFORMANCE_TIMING_START(); + + retVal = pIntercept->dispatch().clEnqueueFillImage( + command_queue, + image, + fill_color, + origin, + region, + num_events_in_wait_list, + event_wait_list, + event ); + + CPU_PERFORMANCE_TIMING_END(); + DEVICE_PERFORMANCE_TIMING_END( event ); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT_EVENT( event ); + } + + FINISH_OR_FLUSH_AFTER_ENQUEUE( command_queue ); + CHECK_AUBCAPTURE_STOP( command_queue ); + + return retVal; + } + else + { + return dummyDispatch.clEnqueueFillImage( + command_queue, + image, + fill_color, + origin, + region, + num_events_in_wait_list, + event_wait_list, + event ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clEnqueueCopyImage)( + cl_command_queue command_queue, + cl_mem src_image, + cl_mem dst_image, + const size_t* src_origin, + const size_t* dst_origin, + const size_t* region, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + cl_int retVal = CL_SUCCESS; + + CHECK_AUBCAPTURE_START( command_queue ); + + if( pIntercept->nullEnqueue() == false ) + { + CALL_LOGGING_ENTER(); + DEVICE_PERFORMANCE_TIMING_START( event ); + CPU_PERFORMANCE_TIMING_START(); + + if( pIntercept->config().OverrideCopyImage ) + { + retVal = pIntercept->CopyImage( + command_queue, + src_image, + dst_image, + src_origin, + dst_origin, + region, + num_events_in_wait_list, + event_wait_list, + event ); + } + else + { + retVal = pIntercept->dispatch().clEnqueueCopyImage( + command_queue, + src_image, + dst_image, + src_origin, + dst_origin, + region, + num_events_in_wait_list, + event_wait_list, + event ); + } + + CPU_PERFORMANCE_TIMING_END(); + DEVICE_PERFORMANCE_TIMING_END( event ); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT_EVENT( event ); + } + + FINISH_OR_FLUSH_AFTER_ENQUEUE( command_queue ); + CHECK_AUBCAPTURE_STOP( command_queue ); + + return retVal; + } + else + { + return dummyDispatch.clEnqueueCopyImage( + command_queue, + src_image, + dst_image, + src_origin, + dst_origin, + region, + num_events_in_wait_list, + event_wait_list, + event ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clEnqueueCopyImageToBuffer)( + cl_command_queue command_queue, + cl_mem src_image, + cl_mem dst_buffer, + const size_t* src_origin, + const size_t* region, + size_t dst_offset, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + cl_int retVal = CL_SUCCESS; + + CHECK_AUBCAPTURE_START( command_queue ); + + if( pIntercept->nullEnqueue() == false ) + { + CALL_LOGGING_ENTER(); + DEVICE_PERFORMANCE_TIMING_START( event ); + CPU_PERFORMANCE_TIMING_START(); + + retVal = pIntercept->dispatch().clEnqueueCopyImageToBuffer( + command_queue, + src_image, + dst_buffer, + src_origin, + region, + dst_offset, + num_events_in_wait_list, + event_wait_list, + event ); + + CPU_PERFORMANCE_TIMING_END(); + DEVICE_PERFORMANCE_TIMING_END( event ); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT_EVENT( event ); + } + + FINISH_OR_FLUSH_AFTER_ENQUEUE( command_queue ); + CHECK_AUBCAPTURE_STOP( command_queue ); + + return retVal; + } + else + { + return dummyDispatch.clEnqueueCopyImageToBuffer( + command_queue, + src_image, + dst_buffer, + src_origin, + region, + dst_offset, + num_events_in_wait_list, + event_wait_list, + event ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clEnqueueCopyBufferToImage)( + cl_command_queue command_queue, + cl_mem src_buffer, + cl_mem dst_image, + size_t src_offset, + const size_t* dst_origin, + const size_t* region, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + cl_int retVal = CL_SUCCESS; + + CHECK_AUBCAPTURE_START( command_queue ); + + if( pIntercept->nullEnqueue() == false ) + { + CALL_LOGGING_ENTER(); + DEVICE_PERFORMANCE_TIMING_START( event ); + CPU_PERFORMANCE_TIMING_START(); + + retVal = pIntercept->dispatch().clEnqueueCopyBufferToImage( + command_queue, + src_buffer, + dst_image, + src_offset, + dst_origin, + region, + num_events_in_wait_list, + event_wait_list, + event ); + + CPU_PERFORMANCE_TIMING_END(); + DEVICE_PERFORMANCE_TIMING_END( event ); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT_EVENT( event ); + } + + FINISH_OR_FLUSH_AFTER_ENQUEUE( command_queue ); + CHECK_AUBCAPTURE_STOP( command_queue ); + + return retVal; + } + else + { + return dummyDispatch.clEnqueueCopyBufferToImage( + command_queue, + src_buffer, + dst_image, + src_offset, + dst_origin, + region, + num_events_in_wait_list, + event_wait_list, + event ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY void* CL_API_CALL CLIRN(clEnqueueMapBuffer)( + cl_command_queue command_queue, + cl_mem buffer, + cl_bool blocking_map, + cl_map_flags map_flags, + size_t offset, + size_t cb, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event, + cl_int* errcode_ret ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + void* retVal = NULL; + + CHECK_AUBCAPTURE_START( command_queue ); + + if( pIntercept->nullEnqueue() == false ) + { + cl_uint map_count = 0; + std::string eventWaitListString; + if( pIntercept->callLogging() ) + { + map_count = 0; + pIntercept->dispatch().clGetMemObjectInfo( + buffer, + CL_MEM_MAP_COUNT, + sizeof( map_count ), + &map_count, + NULL ); + if( num_events_in_wait_list ) + { + std::string eventString; + pIntercept->getEventListString( + num_events_in_wait_list, + event_wait_list, + eventString ); + eventWaitListString += ", event_wait_list = "; + eventWaitListString += eventString; + } + } + CALL_LOGGING_ENTER( + "[ map count = %d ] queue = %p, buffer = %p, %s, map_flags = %s (%llX), offset = %d, cb = %d%s", + map_count, + command_queue, + buffer, + blocking_map ? "blocking" : "non-blocking", + pIntercept->enumName().name_map_flags( map_flags ).c_str(), + map_flags, + offset, + cb, + eventWaitListString.c_str() ); + DEVICE_PERFORMANCE_TIMING_START( event ); + CHECK_ERROR_INIT( errcode_ret ); + CPU_PERFORMANCE_TIMING_START(); + + ITT_ADD_PARAM_AS_METADATA( blocking_map ); + + retVal = pIntercept->dispatch().clEnqueueMapBuffer( + command_queue, + buffer, + blocking_map, + map_flags, + offset, + cb, + num_events_in_wait_list, + event_wait_list, + event, + errcode_ret ); + + CPU_PERFORMANCE_TIMING_END(); + DEVICE_PERFORMANCE_TIMING_END( event ); + DUMP_BUFFER_AFTER_MAP( command_queue, buffer, blocking_map, map_flags, retVal, offset, cb ); + CHECK_ERROR( errcode_ret[0] ); + if( pIntercept->callLogging() ) + { + map_count = 0; + pIntercept->dispatch().clGetMemObjectInfo( + buffer, + CL_MEM_MAP_COUNT, + sizeof( map_count ), + &map_count, + NULL ); + } + CALL_LOGGING_EXIT_EVENT(event, "[ map count = %d ] returned %p", + map_count, + retVal ); + + if( blocking_map ) + { + DEVICE_PERFORMANCE_TIMING_CHECK(); + } + } + + FINISH_OR_FLUSH_AFTER_ENQUEUE( command_queue ); + CHECK_AUBCAPTURE_STOP( command_queue ); + + return retVal; + } + else + { + return dummyDispatch.clEnqueueMapBuffer( + command_queue, + buffer, + blocking_map, + map_flags, + offset, + cb, + num_events_in_wait_list, + event_wait_list, + event, + errcode_ret ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY void* CL_API_CALL CLIRN(clEnqueueMapImage)( + cl_command_queue command_queue, + cl_mem image, + cl_bool blocking_map, + cl_map_flags map_flags, + const size_t* origin, + const size_t* region, + size_t* image_row_pitch, + size_t* image_slice_pitch, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event, + cl_int* errcode_ret ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + void* retVal = NULL; + + CHECK_AUBCAPTURE_START( command_queue ); + + if( pIntercept->nullEnqueue() == false ) + { + cl_uint map_count = 0; + if( pIntercept->callLogging() ) + { + map_count = 0; + pIntercept->dispatch().clGetMemObjectInfo( + image, + CL_MEM_MAP_COUNT, + sizeof( map_count ), + &map_count, + NULL ); + } + if( ( origin != NULL ) && + ( region != NULL ) ) + { + CALL_LOGGING_ENTER( + "[ map count = %d ] queue = %p, image = %p, %s, map_flags = %s (%llX), origin = < %d, %d, %d >, region = < %d, %d, %d >", + map_count, + command_queue, + image, + blocking_map ? "blocking" : "non-blocking", + pIntercept->enumName().name_map_flags( map_flags ).c_str(), + map_flags, + origin[0], origin[1], origin[2], + region[0], region[1], region[2] ); + } + else + { + CALL_LOGGING_ENTER( + "[ map count = %d ] queue = %p, image = %p, %s, map_flags = %s (%llX)", + map_count, + command_queue, + image, + blocking_map ? "blocking" : "non-blocking", + pIntercept->enumName().name_map_flags( map_flags ).c_str(), + map_flags ); + } + DEVICE_PERFORMANCE_TIMING_START( event ); + CHECK_ERROR_INIT( errcode_ret ); + CPU_PERFORMANCE_TIMING_START(); + + ITT_ADD_PARAM_AS_METADATA( blocking_map ); + + retVal = pIntercept->dispatch().clEnqueueMapImage( + command_queue, + image, + blocking_map, + map_flags, + origin, + region, + image_row_pitch, + image_slice_pitch, + num_events_in_wait_list, + event_wait_list, + event, + errcode_ret ); + + CPU_PERFORMANCE_TIMING_END(); + DEVICE_PERFORMANCE_TIMING_END( event ); + CHECK_ERROR( errcode_ret[0] ); + if( pIntercept->callLogging() ) + { + map_count = 0; + pIntercept->dispatch().clGetMemObjectInfo( + image, + CL_MEM_MAP_COUNT, + sizeof( map_count ), + &map_count, + NULL ); + } + CALL_LOGGING_EXIT_EVENT(event, "[ map count = %d ] returned %p", + map_count, + retVal ); + + if( blocking_map ) + { + DEVICE_PERFORMANCE_TIMING_CHECK(); + } + } + + FINISH_OR_FLUSH_AFTER_ENQUEUE( command_queue ); + CHECK_AUBCAPTURE_STOP( command_queue ); + + return retVal; + } + else + { + return dummyDispatch.clEnqueueMapImage( + command_queue, + image, + blocking_map, + map_flags, + origin, + region, + image_row_pitch, + image_slice_pitch, + num_events_in_wait_list, + event_wait_list, + event, + errcode_ret ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clEnqueueUnmapMemObject)( + cl_command_queue command_queue, + cl_mem memobj, + void* mapped_ptr, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + cl_int retVal = CL_SUCCESS; + + DUMP_BUFFER_BEFORE_UNMAP( memobj, command_queue ); + CHECK_AUBCAPTURE_START( command_queue ); + + if( pIntercept->nullEnqueue() == false ) + { + cl_uint map_count = 0; + std::string eventWaitListString; + if( pIntercept->callLogging() ) + { + map_count = 0; + pIntercept->dispatch().clGetMemObjectInfo( + memobj, + CL_MEM_MAP_COUNT, + sizeof( map_count ), + &map_count, + NULL ); + if( num_events_in_wait_list ) + { + std::string eventString; + pIntercept->getEventListString( + num_events_in_wait_list, + event_wait_list, + eventString ); + eventWaitListString += ", event_wait_list = "; + eventWaitListString += eventString; + } + } + CALL_LOGGING_ENTER( + "[ map count = %d ] queue = %p, memobj = %p, mapped_ptr = %p%s", + map_count, + command_queue, + memobj, + mapped_ptr, + eventWaitListString.c_str() ); + DEVICE_PERFORMANCE_TIMING_START( event ); + CPU_PERFORMANCE_TIMING_START(); + + retVal = pIntercept->dispatch().clEnqueueUnmapMemObject( + command_queue, + memobj, + mapped_ptr, + num_events_in_wait_list, + event_wait_list, + event ); + + CPU_PERFORMANCE_TIMING_END(); + DEVICE_PERFORMANCE_TIMING_END( event ); + CHECK_ERROR( retVal ); + if( pIntercept->callLogging() ) + { + map_count = 0; + pIntercept->dispatch().clGetMemObjectInfo( + memobj, + CL_MEM_MAP_COUNT, + sizeof( map_count ), + &map_count, + NULL ); + } + CALL_LOGGING_EXIT_EVENT(event, "[ map count = %d ]", + map_count ); + } + + FINISH_OR_FLUSH_AFTER_ENQUEUE( command_queue ); + CHECK_AUBCAPTURE_STOP( command_queue ); + + return retVal; + } + else + { + return dummyDispatch.clEnqueueUnmapMemObject( + command_queue, + memobj, + mapped_ptr, + num_events_in_wait_list, + event_wait_list, + event ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// OpenCL 1.2 +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clEnqueueMigrateMemObjects)( + cl_command_queue command_queue, + cl_uint num_mem_objects, + const cl_mem* mem_objects, + cl_mem_migration_flags flags, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + cl_int retVal = CL_SUCCESS; + + CHECK_AUBCAPTURE_START( command_queue ); + + if( pIntercept->nullEnqueue() == false ) + { + CALL_LOGGING_ENTER(); + DEVICE_PERFORMANCE_TIMING_START( event ); + CPU_PERFORMANCE_TIMING_START(); + + retVal = pIntercept->dispatch().clEnqueueMigrateMemObjects( + command_queue, + num_mem_objects, + mem_objects, + flags, + num_events_in_wait_list, + event_wait_list, + event ); + + CPU_PERFORMANCE_TIMING_END(); + DEVICE_PERFORMANCE_TIMING_END( event ); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT_EVENT( event ); + } + + FINISH_OR_FLUSH_AFTER_ENQUEUE( command_queue ); + + return retVal; + } + else + { + return dummyDispatch.clEnqueueMigrateMemObjects( + command_queue, + num_mem_objects, + mem_objects, + flags, + num_events_in_wait_list, + event_wait_list, + event ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clEnqueueNDRangeKernel)( + cl_command_queue command_queue, + cl_kernel kernel, + cl_uint work_dim, + const size_t* global_work_offset, + const size_t* global_work_size, + const size_t* local_work_size, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + cl_int retVal = CL_SUCCESS; + + DUMP_BUFFERS_BEFORE_ENQUEUE( kernel, command_queue ); + DUMP_IMAGES_BEFORE_ENQUEUE( kernel, command_queue ); + CHECK_AUBCAPTURE_START_KERNEL( + kernel, + work_dim, + global_work_size, + local_work_size, + command_queue ); + + if( pIntercept->nullEnqueue() == false ) + { + if( pIntercept->config().NullLocalWorkSize ) + { + local_work_size = NULL; + } + pIntercept->overrideNullLocalWorkSize( + work_dim, + global_work_size, + local_work_size ); + + std::string argsString; + if( pIntercept->callLogging() ) + { + pIntercept->getEnqueueNDRangeKernelArgsString( + work_dim, + global_work_offset, + global_work_size, + local_work_size, + argsString ); + if( num_events_in_wait_list ) + { + std::string eventString; + pIntercept->getEventListString( + num_events_in_wait_list, + event_wait_list, + eventString ); + argsString += ", event_wait_list = "; + argsString += eventString; + } + } + CALL_LOGGING_ENTER_KERNEL( + kernel, + "queue = %p, kernel = %p, %s", + command_queue, + kernel, + argsString.c_str() ); + + DEVICE_PERFORMANCE_TIMING_START( event ); + SIMD_SURVEY_NDRANGE_KERNEL(kernel); + CPU_PERFORMANCE_TIMING_START(); + +// ITT_ADD_PARAM_AS_METADATA(command_queue); +// ITT_ADD_PARAM_AS_METADATA(kernel); + ITT_ADD_PARAM_AS_METADATA(work_dim); + ITT_ADD_ARRAY_PARAM_AS_METADATA(work_dim, global_work_offset); + ITT_ADD_ARRAY_PARAM_AS_METADATA(work_dim, global_work_size); + ITT_ADD_ARRAY_PARAM_AS_METADATA(work_dim, local_work_size); + ITT_ADD_ARRAY_PARAM_AS_METADATA(num_events_in_wait_list, event_wait_list); + + retVal = CL_INVALID_OPERATION; + + if( ( retVal != CL_SUCCESS ) && + pIntercept->config().OverrideBuiltinKernels ) + { + + retVal = pIntercept->NDRangeBuiltinKernel( + command_queue, + kernel, + work_dim, + global_work_offset, + global_work_size, + local_work_size, + num_events_in_wait_list, + event_wait_list, + event ); + } + + if( retVal != CL_SUCCESS ) + { + retVal = pIntercept->dispatch().clEnqueueNDRangeKernel( + command_queue, + kernel, + work_dim, + global_work_offset, + global_work_size, + local_work_size, + num_events_in_wait_list, + event_wait_list, + event ); + } + + CPU_PERFORMANCE_TIMING_END_KERNEL(kernel); + DEVICE_PERFORMANCE_TIMING_END_KERNEL(event, kernel, work_dim, global_work_size, local_work_size); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT_EVENT( event ); + } + + DUMP_BUFFERS_AFTER_ENQUEUE( kernel, command_queue ); + DUMP_IMAGES_AFTER_ENQUEUE( kernel, command_queue ); + FINISH_OR_FLUSH_AFTER_ENQUEUE( command_queue ); + CHECK_AUBCAPTURE_STOP( command_queue ); + + return retVal; + } + else + { + return dummyDispatch.clEnqueueNDRangeKernel( + command_queue, + kernel, + work_dim, + global_work_offset, + global_work_size, + local_work_size, + num_events_in_wait_list, + event_wait_list, + event ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clEnqueueTask)( + cl_command_queue command_queue, + cl_kernel kernel, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + cl_int retVal = CL_SUCCESS; + + CHECK_AUBCAPTURE_START_KERNEL( kernel, 0, NULL, NULL, command_queue ); + + if( pIntercept->nullEnqueue() == false ) + { + CALL_LOGGING_ENTER_KERNEL( kernel ); + DEVICE_PERFORMANCE_TIMING_START( event ); + CPU_PERFORMANCE_TIMING_START(); + + retVal = pIntercept->dispatch().clEnqueueTask( + command_queue, + kernel, + num_events_in_wait_list, + event_wait_list, + event ); + + CPU_PERFORMANCE_TIMING_END_KERNEL(kernel); + DEVICE_PERFORMANCE_TIMING_END_KERNEL(event, kernel, 0, NULL, NULL); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT_EVENT( event ); + } + + FINISH_OR_FLUSH_AFTER_ENQUEUE( command_queue ); + CHECK_AUBCAPTURE_STOP( command_queue ); + + return retVal; + } + else + { + return dummyDispatch.clEnqueueTask( + command_queue, + kernel, + num_events_in_wait_list, + event_wait_list, + event ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clEnqueueNativeKernel)( + cl_command_queue command_queue, + void (CL_CALLBACK *user_func)(void *), + void* args, + size_t cb_args, + cl_uint num_mem_objects, + const cl_mem* mem_list, + const void** args_mem_loc, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + cl_int retVal = CL_SUCCESS; + + CHECK_AUBCAPTURE_START( command_queue ); + + if( pIntercept->nullEnqueue() == false ) + { + CALL_LOGGING_ENTER(); + DEVICE_PERFORMANCE_TIMING_START( event ); + CPU_PERFORMANCE_TIMING_START(); + + retVal = pIntercept->dispatch().clEnqueueNativeKernel( + command_queue, + user_func, + args, + cb_args, + num_mem_objects, + mem_list, + args_mem_loc, + num_events_in_wait_list, + event_wait_list, + event ); + + CPU_PERFORMANCE_TIMING_END(); + DEVICE_PERFORMANCE_TIMING_END( event ); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT_EVENT( event ); + } + + FINISH_OR_FLUSH_AFTER_ENQUEUE( command_queue ); + CHECK_AUBCAPTURE_STOP( command_queue ); + + return retVal; + } + else + { + return dummyDispatch.clEnqueueNativeKernel( + command_queue, + user_func, + args, + cb_args, + num_mem_objects, + mem_list, + args_mem_loc, + num_events_in_wait_list, + event_wait_list, + event ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clEnqueueMarker)( + cl_command_queue command_queue, + cl_event* event ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + cl_int retVal = CL_SUCCESS; + + CHECK_AUBCAPTURE_START( command_queue ); + + if( pIntercept->nullEnqueue() == false ) + { + CALL_LOGGING_ENTER( "queue = %p", + command_queue ); + DEVICE_PERFORMANCE_TIMING_START( event ); + CPU_PERFORMANCE_TIMING_START(); + + retVal = pIntercept->dispatch().clEnqueueMarker( + command_queue, + event ); + + CPU_PERFORMANCE_TIMING_END(); + DEVICE_PERFORMANCE_TIMING_END( event ); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT_EVENT( event ); + } + + FINISH_OR_FLUSH_AFTER_ENQUEUE( command_queue ); + CHECK_AUBCAPTURE_STOP( command_queue ); + + return retVal; + } + else + { + return dummyDispatch.clEnqueueMarker( + command_queue, + event ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clEnqueueWaitForEvents)( + cl_command_queue command_queue, + cl_uint num_events, + const cl_event* event_list ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + cl_int retVal = CL_SUCCESS; + + CHECK_AUBCAPTURE_START( command_queue ); + + if( pIntercept->nullEnqueue() == false ) + { + std::string eventWaitListString; + if( pIntercept->callLogging() && + num_events ) + { + std::string eventString; + pIntercept->getEventListString( + num_events, + event_list, + eventString ); + eventWaitListString += ", event_list = "; + eventWaitListString += eventString; + } + CALL_LOGGING_ENTER( "queue = %p%s", + command_queue, + eventWaitListString.c_str() ); + CPU_PERFORMANCE_TIMING_START(); + + retVal = pIntercept->dispatch().clEnqueueWaitForEvents( + command_queue, + num_events, + event_list ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT(); + + DEVICE_PERFORMANCE_TIMING_CHECK(); + } + + FINISH_OR_FLUSH_AFTER_ENQUEUE( command_queue ); + CHECK_AUBCAPTURE_STOP( command_queue ); + + return retVal; + } + else + { + return dummyDispatch.clEnqueueWaitForEvents( + command_queue, + num_events, + event_list ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clEnqueueBarrier)( + cl_command_queue command_queue ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + cl_int retVal = CL_SUCCESS; + + CHECK_AUBCAPTURE_START( command_queue ); + + if( pIntercept->nullEnqueue() == false ) + { + CALL_LOGGING_ENTER( "queue = %p", + command_queue ); + CPU_PERFORMANCE_TIMING_START(); + + retVal = pIntercept->dispatch().clEnqueueBarrier( + command_queue ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT(); + } + + FINISH_OR_FLUSH_AFTER_ENQUEUE( command_queue ); + FLUSH_AFTER_ENQUEUE_BARRIER( command_queue ); + CHECK_AUBCAPTURE_STOP( command_queue ); + + return retVal; + } + else + { + return dummyDispatch.clEnqueueBarrier( + command_queue ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// OpenCL 1.2 +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clEnqueueMarkerWithWaitList)( + cl_command_queue command_queue, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + cl_int retVal = CL_SUCCESS; + + CHECK_AUBCAPTURE_START( command_queue ); + + if( pIntercept->nullEnqueue() == false ) + { + std::string eventWaitListString; + if( pIntercept->callLogging() && + num_events_in_wait_list ) + { + std::string eventString; + pIntercept->getEventListString( + num_events_in_wait_list, + event_wait_list, + eventString ); + eventWaitListString += ", event_wait_list = "; + eventWaitListString += eventString; + } + CALL_LOGGING_ENTER( "queue = %p%s", + command_queue, + eventWaitListString.c_str() ); + CPU_PERFORMANCE_TIMING_START(); + + retVal = pIntercept->dispatch().clEnqueueMarkerWithWaitList( + command_queue, + num_events_in_wait_list, + event_wait_list, + event ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT_EVENT( event ); + } + + FINISH_OR_FLUSH_AFTER_ENQUEUE( command_queue ); + CHECK_AUBCAPTURE_STOP( command_queue ); + + return retVal; + } + else + { + return dummyDispatch.clEnqueueMarkerWithWaitList( + command_queue, + num_events_in_wait_list, + event_wait_list, + event ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// OpenCL 1.2 +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clEnqueueBarrierWithWaitList)( + cl_command_queue command_queue, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + cl_int retVal = CL_SUCCESS; + + CHECK_AUBCAPTURE_START( command_queue ); + + if( pIntercept->nullEnqueue() == false ) + { + std::string eventWaitListString; + if( pIntercept->callLogging() && + num_events_in_wait_list ) + { + std::string eventString; + pIntercept->getEventListString( + num_events_in_wait_list, + event_wait_list, + eventString ); + eventWaitListString += ", event_wait_list = "; + eventWaitListString += eventString; + } + CALL_LOGGING_ENTER( "queue = %p%s", + command_queue, + eventWaitListString.c_str() ); + CPU_PERFORMANCE_TIMING_START(); + + retVal = pIntercept->dispatch().clEnqueueBarrierWithWaitList( + command_queue, + num_events_in_wait_list, + event_wait_list, + event ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT_EVENT( event ); + } + + FINISH_OR_FLUSH_AFTER_ENQUEUE( command_queue ); + FLUSH_AFTER_ENQUEUE_BARRIER( command_queue ); + CHECK_AUBCAPTURE_STOP( command_queue ); + + return retVal; + } + else + { + return dummyDispatch.clEnqueueBarrierWithWaitList( + command_queue, + num_events_in_wait_list, + event_wait_list, + event ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// Optional? +CL_API_ENTRY void* CL_API_CALL CLIRN(clGetExtensionFunctionAddress)( + const char* func_name ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept && + pIntercept->dispatch().clGetExtensionFunctionAddress ) + { + CALL_LOGGING_ENTER( "func_name = %s", func_name ); + CPU_PERFORMANCE_TIMING_START(); + + // First, check to see if this is an extension we know about. + void* retVal = pIntercept->getExtensionFunctionAddress( + NULL, + func_name ); + + // If it's not, call into the dispatch table as usual. + if( retVal == NULL ) + { + retVal = pIntercept->dispatch().clGetExtensionFunctionAddress( + func_name ); + } + + CPU_PERFORMANCE_TIMING_END(); + CALL_LOGGING_EXIT( "returned %p", retVal ); + + return retVal; + } + else + { + return dummyDispatch.clGetExtensionFunctionAddress( + func_name ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// Optional? +// OpenCL 1.2 +CL_API_ENTRY void* CL_API_CALL CLIRN(clGetExtensionFunctionAddressForPlatform)( + cl_platform_id platform, + const char* func_name ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept && + pIntercept->dispatch().clGetExtensionFunctionAddressForPlatform ) + { + std::string platformInfo; + if( pIntercept->callLogging() ) + { + pIntercept->getPlatformInfoString( + platform, + platformInfo ); + } + CALL_LOGGING_ENTER( "platform = [ %s ], func_name = %s", + platformInfo.c_str(), + func_name ); + CPU_PERFORMANCE_TIMING_START(); + + // First, check to see if this is an extension we know about. + void* retVal = pIntercept->getExtensionFunctionAddress( + platform, + func_name ); + + // If it's not, call into the dispatch table as usual. + if( retVal == NULL ) + { + retVal = pIntercept->dispatch().clGetExtensionFunctionAddressForPlatform( + platform, + func_name ); + } + + CPU_PERFORMANCE_TIMING_END(); + CALL_LOGGING_EXIT( "returned %p", retVal ); + + return retVal; + } + else + { + return dummyDispatch.clGetExtensionFunctionAddressForPlatform( + platform, + func_name ); + } +} + +// CL-GL Sharing + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_mem CL_API_CALL CLIRN(clCreateFromGLBuffer)( + cl_context context, + cl_mem_flags flags, + cl_GLuint bufobj, + int* errcode_ret) // Not cl_int*? +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept && + pIntercept->dispatch().clCreateFromGLBuffer ) + { + CALL_LOGGING_ENTER( + "flags = %s (%llX)", + pIntercept->enumName().name_mem_flags( flags ).c_str(), + flags ); + CHECK_ERROR_INIT( errcode_ret ); + CPU_PERFORMANCE_TIMING_START(); + + cl_mem retVal = pIntercept->dispatch().clCreateFromGLBuffer( + context, + flags, + bufobj, + errcode_ret); + + CPU_PERFORMANCE_TIMING_END(); + ADD_BUFFER( retVal ); + CHECK_ERROR( errcode_ret[0] ); + CALL_LOGGING_EXIT( "returned %p", retVal ); + + return retVal; + } + else + { + return dummyDispatch.clCreateFromGLBuffer( + context, + flags, + bufobj, + errcode_ret); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// Optional? +// OpenCL 1.2 +CL_API_ENTRY cl_mem CL_API_CALL CLIRN(clCreateFromGLTexture)( + cl_context context, + cl_mem_flags flags, + cl_GLenum target, + cl_GLint miplevel, + cl_GLuint texture, + cl_int* errcode_ret ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept && + pIntercept->dispatch().clCreateFromGLTexture ) + { + CALL_LOGGING_ENTER( + "flags = %s (%llX), " + "texture_target = %s (%d), " + "miplevel = %d, " + "texture = %d", + pIntercept->enumName().name_mem_flags( flags ).c_str(), + flags, + pIntercept->enumName().name_gl( target ).c_str(), + target, + miplevel, + texture ); + + CHECK_ERROR_INIT( errcode_ret ); + CPU_PERFORMANCE_TIMING_START(); + + cl_mem retVal = pIntercept->dispatch().clCreateFromGLTexture( + context, + flags, + target, + miplevel, + texture, + errcode_ret); + + CPU_PERFORMANCE_TIMING_END(); + ADD_IMAGE( retVal ); + CHECK_ERROR( errcode_ret[0] ); + + pIntercept->logCL_GLTextureDetails( retVal, target, miplevel, texture ); + + CALL_LOGGING_EXIT( "returned %p", retVal ); + + return retVal; + } + else + { + return dummyDispatch.clCreateFromGLTexture( + context, + flags, + target, + miplevel, + texture, + errcode_ret); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_mem CL_API_CALL CLIRN(clCreateFromGLTexture2D)( + cl_context context, + cl_mem_flags flags, + cl_GLenum target, + cl_GLint miplevel, + cl_GLuint texture, + cl_int* errcode_ret) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept && + pIntercept->dispatch().clCreateFromGLTexture2D ) + { + CALL_LOGGING_ENTER( + "flags = %s (%llX), " + "texture_target = %s (%d), " + "miplevel = %d, " + "texture = %d", + pIntercept->enumName().name_mem_flags( flags ).c_str(), + flags, + pIntercept->enumName().name_gl( target ).c_str(), + target, + miplevel, + texture ); + + CHECK_ERROR_INIT( errcode_ret ); + CPU_PERFORMANCE_TIMING_START(); + + cl_mem retVal = pIntercept->dispatch().clCreateFromGLTexture2D( + context, + flags, + target, + miplevel, + texture, + errcode_ret); + + CPU_PERFORMANCE_TIMING_END(); + ADD_IMAGE( retVal ); + CHECK_ERROR( errcode_ret[0] ); + + pIntercept->logCL_GLTextureDetails( retVal, target, miplevel, texture ); + + CALL_LOGGING_EXIT( "returned %p", retVal ); + + return retVal; + } + else + { + return dummyDispatch.clCreateFromGLTexture2D( + context, + flags, + target, + miplevel, + texture, + errcode_ret); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_mem CL_API_CALL CLIRN(clCreateFromGLTexture3D)( + cl_context context, + cl_mem_flags flags, + cl_GLenum target, + cl_GLint miplevel, + cl_GLuint texture, + cl_int* errcode_ret) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept && + pIntercept->dispatch().clCreateFromGLTexture3D ) + { + CALL_LOGGING_ENTER( + "flags = %s (%llX), " + "texture_target = %s (%d), " + "miplevel = %d, " + "texture = %d", + pIntercept->enumName().name_mem_flags( flags ).c_str(), + flags, + pIntercept->enumName().name_gl( target ).c_str(), + target, + miplevel, + texture ); + + CHECK_ERROR_INIT( errcode_ret ); + CPU_PERFORMANCE_TIMING_START(); + + cl_mem retVal = pIntercept->dispatch().clCreateFromGLTexture3D( + context, + flags, + target, + miplevel, + texture, + errcode_ret); + + CPU_PERFORMANCE_TIMING_END(); + ADD_IMAGE( retVal ); + CHECK_ERROR( errcode_ret[0] ); + + pIntercept->logCL_GLTextureDetails( retVal, target, miplevel, texture ); + + CALL_LOGGING_EXIT( "returned %p", retVal ); + + return retVal; + } + else + { + return dummyDispatch.clCreateFromGLTexture3D( + context, + flags, + target, + miplevel, + texture, + errcode_ret); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_mem CL_API_CALL CLIRN(clCreateFromGLRenderbuffer)( + cl_context context, + cl_mem_flags flags, + cl_GLuint renderbuffer, + cl_int* errcode_ret) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept && + pIntercept->dispatch().clCreateFromGLRenderbuffer ) + { + CALL_LOGGING_ENTER( + "flags = %s (%llX)", + pIntercept->enumName().name_mem_flags( flags ).c_str(), + flags ); + CHECK_ERROR_INIT( errcode_ret ); + CPU_PERFORMANCE_TIMING_START(); + + cl_mem retVal = pIntercept->dispatch().clCreateFromGLRenderbuffer( + context, + flags, + renderbuffer, + errcode_ret); + + CPU_PERFORMANCE_TIMING_END(); + ADD_IMAGE( retVal ); + CHECK_ERROR( errcode_ret[0] ); + CALL_LOGGING_EXIT( "returned %p", retVal ); + + return retVal; + } + else + { + return dummyDispatch.clCreateFromGLRenderbuffer( + context, + flags, + renderbuffer, + errcode_ret); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clGetGLObjectInfo)( + cl_mem memobj, + cl_gl_object_type* gl_object_type, + cl_GLuint* gl_object_name) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept && + pIntercept->dispatch().clGetGLObjectInfo ) + { + CALL_LOGGING_ENTER(); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = pIntercept->dispatch().clGetGLObjectInfo( + memobj, + gl_object_type, + gl_object_name); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT(); + + return retVal; + } + else + { + return dummyDispatch.clGetGLObjectInfo( + memobj, + gl_object_type, + gl_object_name); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clGetGLTextureInfo)( + cl_mem memobj, + cl_gl_texture_info param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept && + pIntercept->dispatch().clGetGLTextureInfo ) + { + CALL_LOGGING_ENTER(); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = pIntercept->dispatch().clGetGLTextureInfo( + memobj, + param_name, + param_value_size, + param_value, + param_value_size_ret); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT(); + + return retVal; + } + else + { + return dummyDispatch.clGetGLTextureInfo( + memobj, + param_name, + param_value_size, + param_value, + param_value_size_ret); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clEnqueueAcquireGLObjects)( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept && + pIntercept->dispatch().clEnqueueAcquireGLObjects ) + { + cl_int retVal = CL_SUCCESS; + + CHECK_AUBCAPTURE_START( command_queue ); + + if( pIntercept->nullEnqueue() == false ) + { + CALL_LOGGING_ENTER(); + DEVICE_PERFORMANCE_TIMING_START( event ); + CPU_PERFORMANCE_TIMING_START(); + + retVal = pIntercept->dispatch().clEnqueueAcquireGLObjects( + command_queue, + num_objects, + mem_objects, + num_events_in_wait_list, + event_wait_list, + event); + + CPU_PERFORMANCE_TIMING_END(); + DEVICE_PERFORMANCE_TIMING_END( event ); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT_EVENT( event ); + } + + FINISH_OR_FLUSH_AFTER_ENQUEUE( command_queue ); + CHECK_AUBCAPTURE_STOP( command_queue ); + + return retVal; + } + else + { + return dummyDispatch.clEnqueueAcquireGLObjects( + command_queue, + num_objects, + mem_objects, + num_events_in_wait_list, + event_wait_list, + event); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clEnqueueReleaseGLObjects)( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept && + pIntercept->dispatch().clEnqueueReleaseGLObjects ) + { + cl_int retVal = CL_SUCCESS; + + CHECK_AUBCAPTURE_START( command_queue ); + + if( pIntercept->nullEnqueue() == false ) + { + CALL_LOGGING_ENTER(); + DEVICE_PERFORMANCE_TIMING_START( event ); + CPU_PERFORMANCE_TIMING_START(); + + retVal = pIntercept->dispatch().clEnqueueReleaseGLObjects( + command_queue, + num_objects, + mem_objects, + num_events_in_wait_list, + event_wait_list, + event); + + CPU_PERFORMANCE_TIMING_END(); + DEVICE_PERFORMANCE_TIMING_END( event ); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT_EVENT( event ); + } + + FINISH_OR_FLUSH_AFTER_ENQUEUE( command_queue ); + CHECK_AUBCAPTURE_STOP( command_queue ); + + DEVICE_PERFORMANCE_TIMING_CHECK(); + + return retVal; + } + else + { + return dummyDispatch.clEnqueueReleaseGLObjects( + command_queue, + num_objects, + mem_objects, + num_events_in_wait_list, + event_wait_list, + event); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// OpenCL 2.0 +CL_API_ENTRY void* CL_API_CALL CLIRN(clSVMAlloc) ( + cl_context context, + cl_svm_mem_flags flags, + size_t size, + cl_uint alignment) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + CALL_LOGGING_ENTER( "flags = %s (%llX), size = %d, alignment = %d", + pIntercept->enumName().name_svm_mem_flags( flags ).c_str(), + flags, + size, + alignment ); + CPU_PERFORMANCE_TIMING_START(); + + void* retVal = pIntercept->dispatch().clSVMAlloc( + context, + flags, + size, + alignment ); + + CPU_PERFORMANCE_TIMING_END(); + ADD_SVM_ALLOCATION( retVal, size ); + // There is no error code returned from clSVMAlloc(), so strictly + // speaking we have no error to "check" here. Still, we'll invent + // one if clSVMAlloc() returned NULL, so something will get logged + // if ErrorLogging is enabled. + cl_int errorCode = ( retVal != NULL ) ? CL_SUCCESS : CL_INVALID_OPERATION; + CHECK_ERROR( errorCode ); + CALL_LOGGING_EXIT( "returned %p", retVal ); + + return retVal; + } + else + { + return dummyDispatch.clSVMAlloc( + context, + flags, + size, + alignment ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// OpenCL 2.0 +CL_API_ENTRY void CL_API_CALL CLIRN(clSVMFree) ( + cl_context context, + void* svm_pointer) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + CALL_LOGGING_ENTER( "svm_pointer = %p", + svm_pointer ); + CPU_PERFORMANCE_TIMING_START(); + + pIntercept->dispatch().clSVMFree( + context, + svm_pointer ); + + CPU_PERFORMANCE_TIMING_END(); + REMOVE_SVM_ALLOCATION( svm_pointer ); + CALL_LOGGING_EXIT(); + } + else + { + dummyDispatch.clSVMFree( + context, + svm_pointer ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// OpenCL 2.0 +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clEnqueueSVMFree) ( + cl_command_queue command_queue, + cl_uint num_svm_pointers, + void* svm_pointers [], + void (CL_CALLBACK* pfn_free_func)( + cl_command_queue queue, + cl_uint num_svm_pointers, + void* svm_pointers [], + void* user_data ), + void* user_data, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + cl_int retVal = CL_SUCCESS; + + CHECK_AUBCAPTURE_START( command_queue ); + + if( pIntercept->nullEnqueue() == false ) + { + CALL_LOGGING_ENTER(); + DEVICE_PERFORMANCE_TIMING_START( event ); + CPU_PERFORMANCE_TIMING_START(); + + retVal = pIntercept->dispatch().clEnqueueSVMFree( + command_queue, + num_svm_pointers, + svm_pointers, + pfn_free_func, + user_data, + num_events_in_wait_list, + event_wait_list, + event ); + + CPU_PERFORMANCE_TIMING_END(); + DEVICE_PERFORMANCE_TIMING_END( event ); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT_EVENT( event ); + } + + FINISH_OR_FLUSH_AFTER_ENQUEUE( command_queue ); + CHECK_AUBCAPTURE_STOP( command_queue ); + + return retVal; + } + else + { + return dummyDispatch.clEnqueueSVMFree( + command_queue, + num_svm_pointers, + svm_pointers, + pfn_free_func, + user_data, + num_events_in_wait_list, + event_wait_list, + event ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// OpenCL 2.0 +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clEnqueueSVMMemcpy) ( + cl_command_queue command_queue, + cl_bool blocking_copy, + void* dst_ptr, + const void* src_ptr, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + cl_int retVal = CL_SUCCESS; + + CHECK_AUBCAPTURE_START( command_queue ); + + if( pIntercept->nullEnqueue() == false ) + { + CALL_LOGGING_ENTER(); + DEVICE_PERFORMANCE_TIMING_START( event ); + CPU_PERFORMANCE_TIMING_START(); + + retVal = pIntercept->dispatch().clEnqueueSVMMemcpy( + command_queue, + blocking_copy, + dst_ptr, + src_ptr, + size, + num_events_in_wait_list, + event_wait_list, + event ); + + CPU_PERFORMANCE_TIMING_END(); + DEVICE_PERFORMANCE_TIMING_END( event ); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT_EVENT( event ); + } + + FINISH_OR_FLUSH_AFTER_ENQUEUE( command_queue ); + CHECK_AUBCAPTURE_STOP( command_queue ); + + return retVal; + } + else + { + return dummyDispatch.clEnqueueSVMMemcpy( + command_queue, + blocking_copy, + dst_ptr, + src_ptr, + size, + num_events_in_wait_list, + event_wait_list, + event ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// OpenCL 2.0 +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clEnqueueSVMMemFill) ( + cl_command_queue command_queue, + void* svm_ptr, + const void* pattern, + size_t pattern_size, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + cl_int retVal = CL_SUCCESS; + + CHECK_AUBCAPTURE_START( command_queue ); + + if( pIntercept->nullEnqueue() == false ) + { + CALL_LOGGING_ENTER(); + DEVICE_PERFORMANCE_TIMING_START( event ); + CPU_PERFORMANCE_TIMING_START(); + + retVal = pIntercept->dispatch().clEnqueueSVMMemFill( + command_queue, + svm_ptr, + pattern, + pattern_size, + size, + num_events_in_wait_list, + event_wait_list, + event ); + + CPU_PERFORMANCE_TIMING_END(); + DEVICE_PERFORMANCE_TIMING_END( event ); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT_EVENT( event ); + } + + FINISH_OR_FLUSH_AFTER_ENQUEUE( command_queue ); + CHECK_AUBCAPTURE_STOP( command_queue ); + + return retVal; + } + else + { + return dummyDispatch.clEnqueueSVMMemFill( + command_queue, + svm_ptr, + pattern, + pattern_size, + size, + num_events_in_wait_list, + event_wait_list, + event ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// OpenCL 2.0 +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clEnqueueSVMMap) ( + cl_command_queue command_queue, + cl_bool blocking_map, + cl_map_flags map_flags, + void* svm_ptr, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + cl_int retVal = CL_SUCCESS; + + CHECK_AUBCAPTURE_START( command_queue ); + + if( pIntercept->nullEnqueue() == false ) + { + CALL_LOGGING_ENTER(); + DEVICE_PERFORMANCE_TIMING_START( event ); + CPU_PERFORMANCE_TIMING_START(); + + retVal = pIntercept->dispatch().clEnqueueSVMMap( + command_queue, + blocking_map, + map_flags, + svm_ptr, + size, + num_events_in_wait_list, + event_wait_list, + event ); + + CPU_PERFORMANCE_TIMING_END(); + DEVICE_PERFORMANCE_TIMING_END( event ); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT_EVENT( event ); + } + + FINISH_OR_FLUSH_AFTER_ENQUEUE( command_queue ); + CHECK_AUBCAPTURE_STOP( command_queue ); + + return retVal; + } + else + { + return dummyDispatch.clEnqueueSVMMap( + command_queue, + blocking_map, + map_flags, + svm_ptr, + size, + num_events_in_wait_list, + event_wait_list, + event ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// OpenCL 2.0 +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clEnqueueSVMUnmap) ( + cl_command_queue command_queue, + void* svm_ptr, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + cl_int retVal = CL_SUCCESS; + + CHECK_AUBCAPTURE_START( command_queue ); + + if( pIntercept->nullEnqueue() == false ) + { + CALL_LOGGING_ENTER(); + DEVICE_PERFORMANCE_TIMING_START( event ); + CPU_PERFORMANCE_TIMING_START(); + + retVal = pIntercept->dispatch().clEnqueueSVMUnmap( + command_queue, + svm_ptr, + num_events_in_wait_list, + event_wait_list, + event ); + + CPU_PERFORMANCE_TIMING_END(); + DEVICE_PERFORMANCE_TIMING_END( event ); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT_EVENT( event ); + } + + FINISH_OR_FLUSH_AFTER_ENQUEUE( command_queue ); + CHECK_AUBCAPTURE_STOP( command_queue ); + + return retVal; + } + else + { + return dummyDispatch.clEnqueueSVMUnmap( + command_queue, + svm_ptr, + num_events_in_wait_list, + event_wait_list, + event ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// OpenCL 2.0 +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clSetKernelArgSVMPointer) ( + cl_kernel kernel, + cl_uint arg_index, + const void* arg_value) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + CALL_LOGGING_ENTER_KERNEL( + kernel, + "kernel = %p, index = %d, value = %p", + kernel, + arg_index, + arg_value ); + SET_KERNEL_ARG_SVM_POINTER( kernel, arg_index, arg_value ); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = pIntercept->dispatch().clSetKernelArgSVMPointer( + kernel, + arg_index, + arg_value ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT(); + + return retVal; + } + else + { + return dummyDispatch.clSetKernelArgSVMPointer( + kernel, + arg_index, + arg_value ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// OpenCL 2.0 +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clSetKernelExecInfo) ( + cl_kernel kernel, + cl_kernel_exec_info param_name, + size_t param_value_size, + const void* param_value) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + CALL_LOGGING_ENTER_KERNEL( kernel ); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = pIntercept->dispatch().clSetKernelExecInfo( + kernel, + param_name, + param_value_size, + param_value ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT(); + + return retVal; + } + else + { + return dummyDispatch.clSetKernelExecInfo( + kernel, + param_name, + param_value_size, + param_value ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// OpenCL 2.0 +CL_API_ENTRY cl_mem CL_API_CALL CLIRN(clCreatePipe) ( + cl_context context, + cl_mem_flags flags, + cl_uint pipe_packet_size, + cl_uint pipe_max_packets, + const cl_pipe_properties* properties, + cl_int* errcode_ret) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + CALL_LOGGING_ENTER(); + CHECK_ERROR_INIT( errcode_ret ); + CPU_PERFORMANCE_TIMING_START(); + + cl_mem retVal = pIntercept->dispatch().clCreatePipe( + context, + flags, + pipe_packet_size, + pipe_max_packets, + properties, + errcode_ret ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( errcode_ret[0] ); + CALL_LOGGING_EXIT(); + + return retVal; + } + else + { + return dummyDispatch.clCreatePipe( + context, + flags, + pipe_packet_size, + pipe_max_packets, + properties, + errcode_ret ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// OpenCL 2.0 +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clGetPipeInfo) ( + cl_mem pipe, + cl_pipe_info param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + CALL_LOGGING_ENTER( "mem = %p, param_name = %s (%08X)", + pipe, + pIntercept->enumName().name( param_name ).c_str(), + param_name ); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = pIntercept->dispatch().clGetPipeInfo( + pipe, + param_name, + param_value_size, + param_value, + param_value_size_ret ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT(); + + return retVal; + } + else + { + return dummyDispatch.clGetPipeInfo( + pipe, + param_name, + param_value_size, + param_value, + param_value_size_ret ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// OpenCL 2.0 +CL_API_ENTRY cl_command_queue CL_API_CALL CLIRN(clCreateCommandQueueWithProperties) ( + cl_context context, + cl_device_id device, + const cl_queue_properties* properties, + cl_int* errcode_ret) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + cl_queue_properties* newProperties = NULL; + cl_command_queue retVal = NULL; + + std::string deviceInfo; + std::string commandQueueProperties; + if( pIntercept->callLogging() ) + { + pIntercept->getDeviceInfoString( + 1, + &device, + deviceInfo ); + pIntercept->getCommandQueuePropertiesString( + properties, + commandQueueProperties ); + } + CALL_LOGGING_ENTER( "device = [ %s ], properties = [ %s ]", + deviceInfo.c_str(), + commandQueueProperties.c_str() ); + CREATE_COMMAND_QUEUE_OVERRIDE_INIT( properties, newProperties ); + CHECK_ERROR_INIT( errcode_ret ); + CPU_PERFORMANCE_TIMING_START(); + +#if defined(USE_MDAPI) + if( !pIntercept->config().DevicePerfCounterCustom.empty() ) + { + retVal = pIntercept->createMDAPICommandQueue( + context, + device, + properties, + errcode_ret ); + } +#endif + + if( ( retVal == NULL ) && newProperties ) + { + retVal = pIntercept->dispatch().clCreateCommandQueueWithProperties( + context, + device, + newProperties, + errcode_ret ); + } + if( retVal == NULL ) + { + retVal = pIntercept->dispatch().clCreateCommandQueueWithProperties( + context, + device, + properties, + errcode_ret ); + } + + CPU_PERFORMANCE_TIMING_END(); + CREATE_COMMAND_QUEUE_OVERRIDE_CLEANUP( retVal, newProperties ); + CHECK_ERROR( errcode_ret[0] ); + CALL_LOGGING_EXIT( "returned %p", retVal ); + + return retVal; + } + else + { + return dummyDispatch.clCreateCommandQueueWithProperties( + context, + device, + properties, + errcode_ret ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// cl_khr_create_command_queue +// This function should stay in sync with clCreateCommandQueueWithProperties, above. +CL_API_ENTRY cl_command_queue CL_API_CALL CLIRN(clCreateCommandQueueWithPropertiesKHR) ( + cl_context context, + cl_device_id device, + const cl_queue_properties_khr* properties, + cl_int* errcode_ret) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept && + pIntercept->dispatch().clCreateCommandQueueWithPropertiesKHR ) + { + cl_queue_properties* newProperties = NULL; + cl_command_queue retVal = NULL; + + std::string deviceInfo; + std::string commandQueueProperties; + if( pIntercept->callLogging() ) + { + pIntercept->getDeviceInfoString( + 1, + &device, + deviceInfo ); + pIntercept->getCommandQueuePropertiesString( + properties, + commandQueueProperties ); + } + CALL_LOGGING_ENTER( "device = [ %s ], properties = [ %s ]", + deviceInfo.c_str(), + commandQueueProperties.c_str() ); + CREATE_COMMAND_QUEUE_OVERRIDE_INIT( properties, newProperties ); + CHECK_ERROR_INIT( errcode_ret ); + CPU_PERFORMANCE_TIMING_START(); + +#if defined(USE_MDAPI) + if( !pIntercept->config().DevicePerfCounterCustom.empty() ) + { + retVal = pIntercept->createMDAPICommandQueue( + context, + device, + properties, + errcode_ret ); + } +#endif + + if( ( retVal == NULL ) && newProperties ) + { + retVal = pIntercept->dispatch().clCreateCommandQueueWithPropertiesKHR( + context, + device, + newProperties, + errcode_ret ); + } + if( retVal == NULL ) + { + retVal = pIntercept->dispatch().clCreateCommandQueueWithPropertiesKHR( + context, + device, + properties, + errcode_ret ); + } + + CPU_PERFORMANCE_TIMING_END(); + CREATE_COMMAND_QUEUE_OVERRIDE_CLEANUP( retVal, newProperties ); + CHECK_ERROR( errcode_ret[0] ); + CALL_LOGGING_EXIT( "returned %p", retVal ); + + return retVal; + } + else + { + if( errcode_ret ) + { + errcode_ret[0] = CL_INVALID_OPERATION; + } + return NULL; + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// OpenCL 2.0 +CL_API_ENTRY cl_sampler CL_API_CALL CLIRN(clCreateSamplerWithProperties) ( + cl_context context, + const cl_sampler_properties* sampler_properties, + cl_int* errcode_ret) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + std::string samplerProperties; + if( pIntercept->callLogging() ) + { + pIntercept->getSamplerPropertiesString( + sampler_properties, + samplerProperties ); + } + CALL_LOGGING_ENTER( "properties = [ %s ]", + samplerProperties.c_str() ); + CHECK_ERROR_INIT( errcode_ret ); + CPU_PERFORMANCE_TIMING_START(); + + cl_sampler retVal = pIntercept->dispatch().clCreateSamplerWithProperties( + context, + sampler_properties, + errcode_ret ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( errcode_ret[0] ); + CALL_LOGGING_EXIT( "returned %p", retVal ); + ADD_SAMPLER( retVal, samplerProperties ); + + return retVal; + } + else + { + return dummyDispatch.clCreateSamplerWithProperties( + context, + sampler_properties, + errcode_ret ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// OpenCL 2.1 +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clSetDefaultDeviceCommandQueue) ( + cl_context context, + cl_device_id device, + cl_command_queue command_queue ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + CALL_LOGGING_ENTER(); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = pIntercept->dispatch().clSetDefaultDeviceCommandQueue( + context, + device, + command_queue ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT(); + + return retVal; + } + else + { + return dummyDispatch.clSetDefaultDeviceCommandQueue( + context, + device, + command_queue ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// OpenCL 2.1 +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clGetDeviceAndHostTimer) ( + cl_device_id device, + cl_ulong* device_timestamp, + cl_ulong* host_timestamp ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + CALL_LOGGING_ENTER(); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = pIntercept->dispatch().clGetDeviceAndHostTimer( + device, + device_timestamp, + host_timestamp ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT(); + + return retVal; + } + else + { + return dummyDispatch.clGetDeviceAndHostTimer( + device, + device_timestamp, + host_timestamp ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// OpenCL 2.1 +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clGetHostTimer) ( + cl_device_id device, + cl_ulong* host_timestamp ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + CALL_LOGGING_ENTER(); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = pIntercept->dispatch().clGetHostTimer( + device, + host_timestamp ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT(); + + return retVal; + } + else + { + return dummyDispatch.clGetHostTimer( + device, + host_timestamp ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// OpenCL 2.1 +CL_API_ENTRY cl_program CL_API_CALL CLIRN(clCreateProgramWithIL) ( + cl_context context, + const void* il, + size_t length, + cl_int *errcode_ret) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + char* injectedSPIRV = NULL; + uint64_t hash = 0; + + COMPUTE_SPIRV_HASH( length, il, hash ); + INJECT_PROGRAM_SPIRV( length, il, injectedSPIRV, hash ); + + CALL_LOGGING_ENTER( "context = %p, length = %u", + context, + length ); + CHECK_ERROR_INIT( errcode_ret ); + CPU_PERFORMANCE_TIMING_START(); + + cl_program retVal = pIntercept->dispatch().clCreateProgramWithIL( + context, + il, + length, + errcode_ret ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( errcode_ret[0] ); + CALL_LOGGING_EXIT( "returned %p", retVal ); + + DUMP_PROGRAM_SPIRV( retVal, length, il, hash ); + SAVE_PROGRAM_HASH( retVal, hash ); + DELETE_INJECTED_SPIRV( injectedSPIRV ); + + return retVal; + } + else + { + return dummyDispatch.clCreateProgramWithIL( + context, + il, + length, + errcode_ret ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// cl_khr_il_program +// This function should stay in sync with clCreateProgramWithIL, above. +CL_API_ENTRY cl_program CL_API_CALL CLIRN(clCreateProgramWithILKHR) ( + cl_context context, + const void* il, + size_t length, + cl_int *errcode_ret) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept && + pIntercept->dispatch().clCreateProgramWithILKHR ) + { + char* injectedSPIRV = NULL; + uint64_t hash = 0; + + COMPUTE_SPIRV_HASH( length, il, hash ); + INJECT_PROGRAM_SPIRV( length, il, injectedSPIRV, hash ); + + CALL_LOGGING_ENTER( "context = %p, length = %u", + context, + length ); + CHECK_ERROR_INIT( errcode_ret ); + CPU_PERFORMANCE_TIMING_START(); + + cl_program retVal = pIntercept->dispatch().clCreateProgramWithILKHR( + context, + il, + length, + errcode_ret ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( errcode_ret[0] ); + CALL_LOGGING_EXIT( "returned %p", retVal ); + + DUMP_PROGRAM_SPIRV( retVal, length, il, hash ); + SAVE_PROGRAM_HASH( retVal, hash ); + DELETE_INJECTED_SPIRV( injectedSPIRV ); + + return retVal; + } + else + { + if( errcode_ret ) + { + errcode_ret[0] = CL_INVALID_OPERATION; + } + return NULL; + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// OpenCL 2.1 +CL_API_ENTRY cl_kernel CL_API_CALL CLIRN(clCloneKernel) ( + cl_kernel source_kernel, + cl_int* errcode_ret ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + CALL_LOGGING_ENTER(); + CHECK_ERROR_INIT( errcode_ret ); + CPU_PERFORMANCE_TIMING_START(); + + cl_kernel retVal = pIntercept->dispatch().clCloneKernel( + source_kernel, + errcode_ret ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( errcode_ret[0] ); + CALL_LOGGING_EXIT( "returned %p", retVal ); + + return retVal; + } + else + { + return dummyDispatch.clCloneKernel( + source_kernel, + errcode_ret ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// OpenCL 2.1 +CL_API_ENTRY cl_int CL_API_CALL CLIRN(clGetKernelSubGroupInfo) ( + cl_kernel kernel, + cl_device_id device, + cl_kernel_sub_group_info param_name, + size_t input_value_size, + const void* input_value, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + cl_int retVal = CL_SUCCESS; + + CALL_LOGGING_ENTER(); + CPU_PERFORMANCE_TIMING_START(); + + retVal = pIntercept->dispatch().clGetKernelSubGroupInfo( + kernel, + device, + param_name, + input_value_size, + input_value, + param_value_size, + param_value, + param_value_size_ret ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT(); + + return retVal; + } + else + { + return dummyDispatch.clGetKernelSubGroupInfo( + kernel, + device, + param_name, + input_value_size, + input_value, + param_value_size, + param_value, + param_value_size_ret ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// cl_khr_subgroups +// This function should stay in sync with clGetKernelSubGroupInfo, above. +CL_API_ENTRY cl_int CL_API_CALL clGetKernelSubGroupInfoKHR( + cl_kernel kernel, + cl_device_id device, + cl_kernel_sub_group_info param_name, + size_t input_value_size, + const void* input_value, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept && + pIntercept->dispatch().clGetKernelSubGroupInfoKHR ) + { + cl_int retVal = CL_SUCCESS; + + CALL_LOGGING_ENTER(); + CPU_PERFORMANCE_TIMING_START(); + + retVal = pIntercept->dispatch().clGetKernelSubGroupInfoKHR( + kernel, + device, + param_name, + input_value_size, + input_value, + param_value_size, + param_value, + param_value_size_ret ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT(); + + return retVal; + } + else + { + return CL_INVALID_OPERATION; + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// OpenCL 2.1 +CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMMigrateMem( + cl_command_queue command_queue, + cl_uint num_svm_pointers, + const void** svm_pointers, + const size_t* sizes, + cl_mem_migration_flags flags, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + cl_int retVal = CL_SUCCESS; + + CHECK_AUBCAPTURE_START( command_queue ); + + if( pIntercept->nullEnqueue() == false ) + { + CALL_LOGGING_ENTER(); + DEVICE_PERFORMANCE_TIMING_START( event ); + CPU_PERFORMANCE_TIMING_START(); + + retVal = pIntercept->dispatch().clEnqueueSVMMigrateMem( + command_queue, + num_svm_pointers, + svm_pointers, + sizes, + flags, + num_events_in_wait_list, + event_wait_list, + event ); + + CPU_PERFORMANCE_TIMING_END(); + DEVICE_PERFORMANCE_TIMING_END( event ); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT_EVENT( event ); + } + + FINISH_OR_FLUSH_AFTER_ENQUEUE( command_queue ); + + return retVal; + } + else + { + return dummyDispatch.clEnqueueSVMMigrateMem( + command_queue, + num_svm_pointers, + svm_pointers, + sizes, + flags, + num_events_in_wait_list, + event_wait_list, + event ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// cl_khr_gl_sharing +CL_API_ENTRY cl_int CL_API_CALL clGetGLContextInfoKHR( + const cl_context_properties *properties, + cl_gl_context_info param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept && + pIntercept->dispatch().clGetGLContextInfoKHR ) + { + CALL_LOGGING_ENTER(); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = pIntercept->dispatch().clGetGLContextInfoKHR( + properties, + param_name, + param_value_size, + param_value, + param_value_size_ret); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT(); + + return retVal; + } + else + { + return CL_INVALID_OPERATION; + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// cl_khr_gl_event +CL_API_ENTRY cl_event CL_API_CALL clCreateEventFromGLsyncKHR( + cl_context context, + cl_GLsync sync, + cl_int* errcode_ret) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept && + pIntercept->dispatch().clCreateEventFromGLsyncKHR ) + { + CALL_LOGGING_ENTER(); + CHECK_ERROR_INIT( errcode_ret ); + CPU_PERFORMANCE_TIMING_START(); + + cl_event retVal = pIntercept->dispatch().clCreateEventFromGLsyncKHR( + context, + sync, + errcode_ret); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( errcode_ret[0] ); + CALL_LOGGING_EXIT( "returned %p", retVal ); + + return retVal; + } + else + { + if( errcode_ret ) + { + errcode_ret[0] = CL_INVALID_OPERATION; + } + return NULL; + } +} + +#if defined(_WIN32) + +/////////////////////////////////////////////////////////////////////////////// +// +// cl_khr_d3d10_sharing +CL_API_ENTRY cl_int CL_API_CALL clGetDeviceIDsFromD3D10KHR( + cl_platform_id platform, + cl_d3d10_device_source_khr d3d_device_source, + void* d3d_object, + cl_d3d10_device_set_khr d3d_device_set, + cl_uint num_entries, + cl_device_id* devices, + cl_uint* num_devices) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept && + pIntercept->dispatch().clGetDeviceIDsFromD3D10KHR ) + { + CALL_LOGGING_ENTER(); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = pIntercept->dispatch().clGetDeviceIDsFromD3D10KHR( + platform, + d3d_device_source, + d3d_object, + d3d_device_set, + num_entries, + devices, + num_devices); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT(); + + return retVal; + } + else + { + return CL_INVALID_OPERATION; + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// cl_khr_d3d10_sharing +CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D10BufferKHR( + cl_context context, + cl_mem_flags flags, + ID3D10Buffer* resource, + cl_int* errcode_ret) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept && + pIntercept->dispatch().clCreateFromD3D10BufferKHR ) + { + CALL_LOGGING_ENTER( + "flags = %s (%llX)", + pIntercept->enumName().name_mem_flags( flags ).c_str(), + flags ); + CHECK_ERROR_INIT( errcode_ret ); + CPU_PERFORMANCE_TIMING_START(); + + cl_mem retVal = pIntercept->dispatch().clCreateFromD3D10BufferKHR( + context, + flags, + resource, + errcode_ret); + + CPU_PERFORMANCE_TIMING_END(); + ADD_BUFFER( retVal ); + CHECK_ERROR( errcode_ret[0] ); + CALL_LOGGING_EXIT( "returned %p", retVal ); + + return retVal; + } + else + { + if( errcode_ret ) + { + errcode_ret[0] = CL_INVALID_OPERATION; + } + return NULL; + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// cl_khr_d3d10_sharing +CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D10Texture2DKHR( + cl_context context, + cl_mem_flags flags, + ID3D10Texture2D* resource, + UINT subresource, + cl_int* errcode_ret) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept && + pIntercept->dispatch().clCreateFromD3D10Texture2DKHR ) + { + CALL_LOGGING_ENTER( + "flags = %s (%llX)", + pIntercept->enumName().name_mem_flags( flags ).c_str(), + flags ); + CHECK_ERROR_INIT( errcode_ret ); + CPU_PERFORMANCE_TIMING_START(); + + cl_mem retVal = pIntercept->dispatch().clCreateFromD3D10Texture2DKHR( + context, + flags, + resource, + subresource, + errcode_ret); + + CPU_PERFORMANCE_TIMING_END(); + ADD_IMAGE( retVal ); + CHECK_ERROR( errcode_ret[0] ); + CALL_LOGGING_EXIT( "returned %p", retVal ); + + return retVal; + } + else + { + if( errcode_ret ) + { + errcode_ret[0] = CL_INVALID_OPERATION; + } + return NULL; + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// cl_khr_d3d10_sharing +CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D10Texture3DKHR( + cl_context context, + cl_mem_flags flags, + ID3D10Texture3D* resource, + UINT subresource, + cl_int* errcode_ret) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept && + pIntercept->dispatch().clCreateFromD3D10Texture3DKHR ) + { + CALL_LOGGING_ENTER( + "flags = %s (%llX)", + pIntercept->enumName().name_mem_flags( flags ).c_str(), + flags ); + CHECK_ERROR_INIT( errcode_ret ); + CPU_PERFORMANCE_TIMING_START(); + + cl_mem retVal = pIntercept->dispatch().clCreateFromD3D10Texture3DKHR( + context, + flags, + resource, + subresource, + errcode_ret); + + CPU_PERFORMANCE_TIMING_END(); + ADD_IMAGE( retVal ); + CHECK_ERROR( errcode_ret[0] ); + CALL_LOGGING_EXIT( "returned %p", retVal ); + + return retVal; + } + else + { + if( errcode_ret ) + { + errcode_ret[0] = CL_INVALID_OPERATION; + } + return NULL; + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// cl_khr_d3d10_sharing +CL_API_ENTRY cl_int CL_API_CALL clEnqueueAcquireD3D10ObjectsKHR( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept && + pIntercept->dispatch().clEnqueueAcquireD3D10ObjectsKHR ) + { + cl_int retVal = CL_SUCCESS; + + CHECK_AUBCAPTURE_START( command_queue ); + + if( pIntercept->nullEnqueue() == false ) + { + CALL_LOGGING_ENTER(); + DEVICE_PERFORMANCE_TIMING_START( event ); + CPU_PERFORMANCE_TIMING_START(); + + retVal = pIntercept->dispatch().clEnqueueAcquireD3D10ObjectsKHR( + command_queue, + num_objects, + mem_objects, + num_events_in_wait_list, + event_wait_list, + event); + + CPU_PERFORMANCE_TIMING_END(); + DEVICE_PERFORMANCE_TIMING_END( event ); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT_EVENT( event ); + } + + FINISH_OR_FLUSH_AFTER_ENQUEUE( command_queue ); + CHECK_AUBCAPTURE_STOP( command_queue ); + + return retVal; + } + else + { + return CL_INVALID_OPERATION; + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// cl_khr_d3d10_sharing +CL_API_ENTRY cl_int CL_API_CALL clEnqueueReleaseD3D10ObjectsKHR( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept && + pIntercept->dispatch().clEnqueueReleaseD3D10ObjectsKHR ) + { + cl_int retVal = CL_SUCCESS; + + CHECK_AUBCAPTURE_START( command_queue ); + + if( pIntercept->nullEnqueue() == false ) + { + CALL_LOGGING_ENTER(); + DEVICE_PERFORMANCE_TIMING_START( event ); + CPU_PERFORMANCE_TIMING_START(); + + retVal = pIntercept->dispatch().clEnqueueReleaseD3D10ObjectsKHR( + command_queue, + num_objects, + mem_objects, + num_events_in_wait_list, + event_wait_list, + event); + + CPU_PERFORMANCE_TIMING_END(); + DEVICE_PERFORMANCE_TIMING_END( event ); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT_EVENT( event ); + } + + FINISH_OR_FLUSH_AFTER_ENQUEUE( command_queue ); + CHECK_AUBCAPTURE_STOP( command_queue ); + + DEVICE_PERFORMANCE_TIMING_CHECK(); + + return retVal; + } + else + { + return CL_INVALID_OPERATION; + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// cl_khr_d3d11_sharing +CL_API_ENTRY cl_int CL_API_CALL clGetDeviceIDsFromD3D11KHR( + cl_platform_id platform, + cl_d3d11_device_source_khr d3d_device_source, + void* d3d_object, + cl_d3d11_device_set_khr d3d_device_set, + cl_uint num_entries, + cl_device_id* devices, + cl_uint* num_devices) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept && + pIntercept->dispatch().clGetDeviceIDsFromD3D11KHR ) + { + CALL_LOGGING_ENTER(); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = pIntercept->dispatch().clGetDeviceIDsFromD3D11KHR( + platform, + d3d_device_source, + d3d_object, + d3d_device_set, + num_entries, + devices, + num_devices); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT(); + + return retVal; + } + else + { + return CL_INVALID_OPERATION; + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// cl_khr_d3d11_sharing +CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D11BufferKHR( + cl_context context, + cl_mem_flags flags, + ID3D11Buffer* resource, + cl_int* errcode_ret) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept && + pIntercept->dispatch().clCreateFromD3D11BufferKHR ) + { + CALL_LOGGING_ENTER( + "flags = %s (%llX)", + pIntercept->enumName().name_mem_flags( flags ).c_str(), + flags ); + CHECK_ERROR_INIT( errcode_ret ); + CPU_PERFORMANCE_TIMING_START(); + + cl_mem retVal = pIntercept->dispatch().clCreateFromD3D11BufferKHR( + context, + flags, + resource, + errcode_ret); + + CPU_PERFORMANCE_TIMING_END(); + ADD_BUFFER( retVal ); + CHECK_ERROR( errcode_ret[0] ); + CALL_LOGGING_EXIT( "returned %p", retVal ); + + return retVal; + } + else + { + if( errcode_ret ) + { + errcode_ret[0] = CL_INVALID_OPERATION; + } + return NULL; + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// cl_khr_d3d11_sharing +CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D11Texture2DKHR( + cl_context context, + cl_mem_flags flags, + ID3D11Texture2D* resource, + UINT subresource, + cl_int* errcode_ret) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept && + pIntercept->dispatch().clCreateFromD3D11Texture2DKHR ) + { + CALL_LOGGING_ENTER( + "flags = %s (%llX)", + pIntercept->enumName().name_mem_flags( flags ).c_str(), + flags ); + CHECK_ERROR_INIT( errcode_ret ); + CPU_PERFORMANCE_TIMING_START(); + + cl_mem retVal = pIntercept->dispatch().clCreateFromD3D11Texture2DKHR( + context, + flags, + resource, + subresource, + errcode_ret); + + CPU_PERFORMANCE_TIMING_END(); + ADD_IMAGE( retVal ); + CHECK_ERROR( errcode_ret[0] ); + CALL_LOGGING_EXIT( "returned %p", retVal ); + + return retVal; + } + else + { + if( errcode_ret ) + { + errcode_ret[0] = CL_INVALID_OPERATION; + } + return NULL; + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// cl_khr_d3d11_sharing +CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D11Texture3DKHR( + cl_context context, + cl_mem_flags flags, + ID3D11Texture3D* resource, + UINT subresource, + cl_int* errcode_ret) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept && + pIntercept->dispatch().clCreateFromD3D11Texture3DKHR ) + { + CALL_LOGGING_ENTER( + "flags = %s (%llX)", + pIntercept->enumName().name_mem_flags( flags ).c_str(), + flags ); + CHECK_ERROR_INIT( errcode_ret ); + CPU_PERFORMANCE_TIMING_START(); + + cl_mem retVal = pIntercept->dispatch().clCreateFromD3D11Texture3DKHR( + context, + flags, + resource, + subresource, + errcode_ret); + + CPU_PERFORMANCE_TIMING_END(); + ADD_IMAGE( retVal ); + CHECK_ERROR( errcode_ret[0] ); + CALL_LOGGING_EXIT( "returned %p", retVal ); + + return retVal; + } + else + { + if( errcode_ret ) + { + errcode_ret[0] = CL_INVALID_OPERATION; + } + return NULL; + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// cl_khr_d3d11_sharing +CL_API_ENTRY cl_int CL_API_CALL clEnqueueAcquireD3D11ObjectsKHR( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept && + pIntercept->dispatch().clEnqueueAcquireD3D11ObjectsKHR ) + { + cl_int retVal = CL_SUCCESS; + + CHECK_AUBCAPTURE_START( command_queue ); + + if( pIntercept->nullEnqueue() == false ) + { + CALL_LOGGING_ENTER(); + DEVICE_PERFORMANCE_TIMING_START( event ); + CPU_PERFORMANCE_TIMING_START(); + + retVal = pIntercept->dispatch().clEnqueueAcquireD3D11ObjectsKHR( + command_queue, + num_objects, + mem_objects, + num_events_in_wait_list, + event_wait_list, + event); + + CPU_PERFORMANCE_TIMING_END(); + DEVICE_PERFORMANCE_TIMING_END( event ); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT_EVENT( event ); + } + + FINISH_OR_FLUSH_AFTER_ENQUEUE( command_queue ); + CHECK_AUBCAPTURE_STOP( command_queue ); + + return retVal; + } + else + { + return CL_INVALID_OPERATION; + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// cl_khr_d3d11_sharing +CL_API_ENTRY cl_int CL_API_CALL clEnqueueReleaseD3D11ObjectsKHR( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept && + pIntercept->dispatch().clEnqueueReleaseD3D11ObjectsKHR ) + { + cl_int retVal = CL_SUCCESS; + + CHECK_AUBCAPTURE_START( command_queue ); + + if( pIntercept->nullEnqueue() == false ) + { + CALL_LOGGING_ENTER(); + DEVICE_PERFORMANCE_TIMING_START( event ); + CPU_PERFORMANCE_TIMING_START(); + + retVal = pIntercept->dispatch().clEnqueueReleaseD3D11ObjectsKHR( + command_queue, + num_objects, + mem_objects, + num_events_in_wait_list, + event_wait_list, + event); + + CPU_PERFORMANCE_TIMING_END(); + DEVICE_PERFORMANCE_TIMING_END( event ); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT_EVENT( event ); + } + + FINISH_OR_FLUSH_AFTER_ENQUEUE( command_queue ); + CHECK_AUBCAPTURE_STOP( command_queue ); + + DEVICE_PERFORMANCE_TIMING_CHECK(); + + return retVal; + } + else + { + return CL_INVALID_OPERATION; + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// cl_khr_dx9_media_sharing +CL_API_ENTRY cl_int CL_API_CALL clGetDeviceIDsFromDX9MediaAdapterKHR( + cl_platform_id platform, + cl_uint num_media_adapters, + cl_dx9_media_adapter_type_khr* media_adapters_type, + void* media_adapters, + cl_dx9_media_adapter_set_khr media_adapter_set, + cl_uint num_entries, + cl_device_id* devices, + cl_uint* num_devices) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept && + pIntercept->dispatch().clGetDeviceIDsFromDX9MediaAdapterKHR ) + { + CALL_LOGGING_ENTER(); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = pIntercept->dispatch().clGetDeviceIDsFromDX9MediaAdapterKHR( + platform, + num_media_adapters, + media_adapters_type, + media_adapters, + media_adapter_set, + num_entries, + devices, + num_devices); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT(); + + return retVal; + } + else + { + return CL_INVALID_OPERATION; + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// cl_khr_dx9_media_sharing +CL_API_ENTRY cl_mem CL_API_CALL clCreateFromDX9MediaSurfaceKHR( + cl_context context, + cl_mem_flags flags, + cl_dx9_media_adapter_type_khr adapter_type, + void* surface_info, + cl_uint plane, + cl_int* errcode_ret) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept && + pIntercept->dispatch().clCreateFromDX9MediaSurfaceKHR ) + { + CALL_LOGGING_ENTER( + "flags = %s (%llX)", + pIntercept->enumName().name_mem_flags( flags ).c_str(), + flags ); + CHECK_ERROR_INIT( errcode_ret ); + CPU_PERFORMANCE_TIMING_START(); + + cl_mem retVal = pIntercept->dispatch().clCreateFromDX9MediaSurfaceKHR( + context, + flags, + adapter_type, + surface_info, + plane, + errcode_ret); + + CPU_PERFORMANCE_TIMING_END(); + ADD_IMAGE( retVal ); + CHECK_ERROR( errcode_ret[0] ); + CALL_LOGGING_EXIT( "returned %p", retVal ); + + return retVal; + } + else + { + if( errcode_ret ) + { + errcode_ret[0] = CL_INVALID_OPERATION; + } + return NULL; + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// cl_khr_dx9_media_sharing +CL_API_ENTRY cl_int CL_API_CALL clEnqueueAcquireDX9MediaSurfacesKHR( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept && + pIntercept->dispatch().clEnqueueAcquireDX9MediaSurfacesKHR ) + { + cl_int retVal = CL_SUCCESS; + + CHECK_AUBCAPTURE_START( command_queue ); + + if( pIntercept->nullEnqueue() == false ) + { + CALL_LOGGING_ENTER(); + DEVICE_PERFORMANCE_TIMING_START( event ); + CPU_PERFORMANCE_TIMING_START(); + + retVal = pIntercept->dispatch().clEnqueueAcquireDX9MediaSurfacesKHR( + command_queue, + num_objects, + mem_objects, + num_events_in_wait_list, + event_wait_list, + event); + + CPU_PERFORMANCE_TIMING_END(); + DEVICE_PERFORMANCE_TIMING_END( event ); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT_EVENT( event ); + } + + FINISH_OR_FLUSH_AFTER_ENQUEUE( command_queue ); + CHECK_AUBCAPTURE_STOP( command_queue ); + + return retVal; + } + else + { + return CL_INVALID_OPERATION; + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// cl_khr_dx9_media_sharing +CL_API_ENTRY cl_int CL_API_CALL clEnqueueReleaseDX9MediaSurfacesKHR( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept && + pIntercept->dispatch().clEnqueueReleaseDX9MediaSurfacesKHR ) + { + cl_int retVal = CL_SUCCESS; + + CHECK_AUBCAPTURE_START( command_queue ); + + if( pIntercept->nullEnqueue() == false ) + { + CALL_LOGGING_ENTER(); + DEVICE_PERFORMANCE_TIMING_START( event ); + CPU_PERFORMANCE_TIMING_START(); + + retVal = pIntercept->dispatch().clEnqueueReleaseDX9MediaSurfacesKHR( + command_queue, + num_objects, + mem_objects, + num_events_in_wait_list, + event_wait_list, + event); + + CPU_PERFORMANCE_TIMING_END(); + DEVICE_PERFORMANCE_TIMING_END( event ); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT_EVENT( event ); + } + + FINISH_OR_FLUSH_AFTER_ENQUEUE( command_queue ); + CHECK_AUBCAPTURE_STOP( command_queue ); + + DEVICE_PERFORMANCE_TIMING_CHECK(); + + return retVal; + } + else + { + return CL_INVALID_OPERATION; + } +} +#endif + +#if defined(_WIN32) +/////////////////////////////////////////////////////////////////////////////// +// +// cl_intel_dx9_media_sharing Extension +CL_API_ENTRY cl_int CL_API_CALL clGetDeviceIDsFromDX9INTEL( + cl_platform_id platform, + cl_dx9_device_source_intel d3d_device_source, + void *dx9_object, + cl_dx9_device_set_intel d3d_device_set, + cl_uint num_entries, + cl_device_id* devices, + cl_uint* num_devices ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept && + pIntercept->dispatch().clGetDeviceIDsFromDX9INTEL ) + { + CALL_LOGGING_ENTER(); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = pIntercept->dispatch().clGetDeviceIDsFromDX9INTEL( + platform, + d3d_device_source, + dx9_object, + d3d_device_set, + num_entries, + devices, + num_devices); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT(); + + return retVal; + } + else + { + return CL_INVALID_OPERATION; + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// cl_intel_dx9_media_sharing Extension +CL_API_ENTRY cl_mem CL_API_CALL clCreateFromDX9MediaSurfaceINTEL( + cl_context context, + cl_mem_flags flags, + IDirect3DSurface9* resource, + HANDLE sharedHandle, + UINT plane, + cl_int* errcode_ret ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept && + pIntercept->dispatch().clCreateFromDX9MediaSurfaceINTEL ) + { + CALL_LOGGING_ENTER( + "flags = %s (%llX)", + pIntercept->enumName().name_mem_flags( flags ).c_str(), + flags ); + CHECK_ERROR_INIT( errcode_ret ); + CPU_PERFORMANCE_TIMING_START(); + + cl_mem retVal = pIntercept->dispatch().clCreateFromDX9MediaSurfaceINTEL( + context, + flags, + resource, + sharedHandle, + plane, + errcode_ret); + + CPU_PERFORMANCE_TIMING_END(); + ADD_IMAGE( retVal ); + CHECK_ERROR( errcode_ret[0] ); + CALL_LOGGING_EXIT( "returned %p", retVal ); + + return retVal; + } + else + { + if( errcode_ret ) + { + errcode_ret[0] = CL_INVALID_OPERATION; + } + return NULL; + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// cl_intel_dx9_media_sharing Extension +CL_API_ENTRY cl_int CL_API_CALL clEnqueueAcquireDX9ObjectsINTEL( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept && + pIntercept->dispatch().clEnqueueAcquireDX9ObjectsINTEL ) + { + cl_int retVal = CL_SUCCESS; + + CHECK_AUBCAPTURE_START( command_queue ); + + if( pIntercept->nullEnqueue() == false ) + { + CALL_LOGGING_ENTER(); + DEVICE_PERFORMANCE_TIMING_START( event ); + CPU_PERFORMANCE_TIMING_START(); + + retVal = pIntercept->dispatch().clEnqueueAcquireDX9ObjectsINTEL( + command_queue, + num_objects, + mem_objects, + num_events_in_wait_list, + event_wait_list, + event); + + CPU_PERFORMANCE_TIMING_END(); + DEVICE_PERFORMANCE_TIMING_END( event ); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT_EVENT( event ); + } + + FINISH_OR_FLUSH_AFTER_ENQUEUE( command_queue ); + CHECK_AUBCAPTURE_STOP( command_queue ); + + return retVal; + } + else + { + return CL_INVALID_OPERATION; + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// cl_intel_dx9_media_sharing Extension +CL_API_ENTRY cl_int CL_API_CALL clEnqueueReleaseDX9ObjectsINTEL( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept && + pIntercept->dispatch().clEnqueueReleaseDX9ObjectsINTEL ) + { + cl_int retVal = CL_SUCCESS; + + CHECK_AUBCAPTURE_START( command_queue ); + + if( pIntercept->nullEnqueue() == false ) + { + CALL_LOGGING_ENTER(); + DEVICE_PERFORMANCE_TIMING_START( event ); + CPU_PERFORMANCE_TIMING_START(); + + retVal = pIntercept->dispatch().clEnqueueReleaseDX9ObjectsINTEL( + command_queue, + num_objects, + mem_objects, + num_events_in_wait_list, + event_wait_list, + event); + + CPU_PERFORMANCE_TIMING_END(); + DEVICE_PERFORMANCE_TIMING_END( event ); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT_EVENT( event ); + } + + FINISH_OR_FLUSH_AFTER_ENQUEUE( command_queue ); + CHECK_AUBCAPTURE_STOP( command_queue ); + + DEVICE_PERFORMANCE_TIMING_CHECK(); + + return retVal; + } + else + { + return CL_INVALID_OPERATION; + } +} +#endif + +/////////////////////////////////////////////////////////////////////////////// +// +// Unofficial MDAPI extension: +CL_API_ENTRY cl_command_queue CL_API_CALL clCreatePerfCountersCommandQueueINTEL( + cl_context context, + cl_device_id device, + cl_command_queue_properties properties, + cl_uint configuration, + cl_int* errcode_ret ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept && + pIntercept->dispatch().clCreatePerfCountersCommandQueueINTEL ) + { + // We don't have to do this, since profiling must be enabled + // for a perf counters command queue, but it doesn't hurt to + // add it, either. + if( pIntercept->config().DevicePerformanceTiming || + pIntercept->config().ITTPerformanceTiming || + pIntercept->config().ChromePerformanceTiming || + pIntercept->config().SIMDSurvey || + !pIntercept->config().DevicePerfCounterCustom.empty() ) + { + properties |= (cl_command_queue_properties)CL_QUEUE_PROFILING_ENABLE; + } + + CALL_LOGGING_ENTER(); + CHECK_ERROR_INIT( errcode_ret ); + CPU_PERFORMANCE_TIMING_START(); + + cl_command_queue retVal = pIntercept->dispatch().clCreatePerfCountersCommandQueueINTEL( + context, + device, + properties, + configuration, + errcode_ret ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( errcode_ret[0] ); + ITT_REGISTER_COMMAND_QUEUE( retVal, true ); + CHROME_REGISTER_COMMAND_QUEUE( retVal ); + CALL_LOGGING_EXIT( "returned %p", retVal ); + + return retVal; + } + else + { + if( errcode_ret ) + { + errcode_ret[0] = CL_INVALID_OPERATION; + } + return NULL; + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// Unofficial MDAPI extension: +CL_API_ENTRY cl_int CL_API_CALL clSetPerformanceConfigurationINTEL( + cl_device_id device, + cl_uint count, + cl_uint* offsets, + cl_uint* values ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept && + pIntercept->dispatch().clSetPerformanceConfigurationINTEL ) + { + CALL_LOGGING_ENTER(); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = pIntercept->dispatch().clSetPerformanceConfigurationINTEL( + device, + count, + offsets, + values ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT(); + + return retVal; + } + else + { + return dummyDispatch.clSetPerformanceConfigurationINTEL( + device, + count, + offsets, + values ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// cl_intel_accelerator +CL_API_ENTRY cl_accelerator_intel CL_API_CALL clCreateAcceleratorINTEL( + cl_context context, + cl_accelerator_type_intel accelerator_type, + size_t descriptor_size, + const void* descriptor, + cl_int* errcode_ret ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept && + pIntercept->dispatch().clCreateAcceleratorINTEL ) + { + if( ( accelerator_type == CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL ) && + ( descriptor_size >= sizeof( cl_motion_estimation_desc_intel ) ) ) + { + cl_motion_estimation_desc_intel* desc = + (cl_motion_estimation_desc_intel*)descriptor; + CALL_LOGGING_ENTER( "cl_motion_estimation_desc: mb_block_type = %d, subpixel_mode = %d, sad_adjust_mode = %d, search_path_type = %d", + desc->mb_block_type, + desc->subpixel_mode, + desc->sad_adjust_mode, + desc->search_path_type ); + } + else + { + CALL_LOGGING_ENTER( "accelerator_type = %u", accelerator_type ); + } + CHECK_ERROR_INIT( errcode_ret ); + CPU_PERFORMANCE_TIMING_START(); + + cl_accelerator_intel retVal = pIntercept->dispatch().clCreateAcceleratorINTEL( + context, + accelerator_type, + descriptor_size, + descriptor, + errcode_ret); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( errcode_ret[0] ); + CALL_LOGGING_EXIT( "returned %p", retVal ); + + return retVal; + } + else + { + if( errcode_ret ) + { + errcode_ret[0] = CL_INVALID_OPERATION; + } + return NULL; + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// cl_intel_accelerator +CL_API_ENTRY cl_int CL_API_CALL clGetAcceleratorInfoINTEL( + cl_accelerator_intel accelerator, + cl_accelerator_info_intel param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + CALL_LOGGING_ENTER( "param_name = %s (%X)", + pIntercept->enumName().name( param_name ).c_str(), + param_name ); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = pIntercept->dispatch().clGetAcceleratorInfoINTEL( + accelerator, + param_name, + param_value_size, + param_value, + param_value_size_ret ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT(); + + return retVal; + } + else + { + return CL_INVALID_OPERATION; + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// cl_intel_accelerator +CL_API_ENTRY cl_int CL_API_CALL clRetainAcceleratorINTEL( + cl_accelerator_intel accelerator ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + cl_uint ref_count = 0; + if( pIntercept->callLogging() ) + { + ref_count = 0; + pIntercept->dispatch().clGetAcceleratorInfoINTEL( + accelerator, + CL_ACCELERATOR_REFERENCE_COUNT_INTEL, + sizeof( ref_count ), + &ref_count, + NULL ); + } + CALL_LOGGING_ENTER( "[ ref count = %d ] accelerator = %p", + ref_count, + accelerator ); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = pIntercept->dispatch().clRetainAcceleratorINTEL( + accelerator ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + if( pIntercept->callLogging() ) + { + ref_count = 0; + pIntercept->dispatch().clGetAcceleratorInfoINTEL( + accelerator, + CL_ACCELERATOR_REFERENCE_COUNT_INTEL, + sizeof( ref_count ), + &ref_count, + NULL ); + } + CALL_LOGGING_EXIT( "[ ref count = %d ]", + ref_count ); + + return retVal; + } + else + { + return CL_INVALID_OPERATION; + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// cl_intel_accelerator +CL_API_ENTRY cl_int CL_API_CALL clReleaseAcceleratorINTEL( + cl_accelerator_intel accelerator ) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept ) + { + cl_uint ref_count = 0; + if( pIntercept->callLogging() ) + { + ref_count = 0; + pIntercept->dispatch().clGetAcceleratorInfoINTEL( + accelerator, + CL_ACCELERATOR_REFERENCE_COUNT_INTEL, + sizeof( ref_count ), + &ref_count, + NULL ); + } + CALL_LOGGING_ENTER( "[ ref count = %d ] accelerator = %p", + ref_count, + accelerator ); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = pIntercept->dispatch().clReleaseAcceleratorINTEL( + accelerator ); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + if( pIntercept->callLogging() && ref_count != 0 ) + { + // This isn't strictly correct, but it's pretty close, and it + // avoids crashes in some cases for bad implementations. + --ref_count; + } + CALL_LOGGING_EXIT( "[ ref count = %d ]", + ref_count ); + + return retVal; + } + else + { + return CL_INVALID_OPERATION; + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// cl_intel_va_api_media_sharing +CL_API_ENTRY cl_int CL_API_CALL clGetDeviceIDsFromVA_APIMediaAdapterINTEL( + cl_platform_id platform, + cl_va_api_device_source_intel media_adapter_type, + void *media_adapter, + cl_va_api_device_set_intel media_adapter_set, + cl_uint num_entries, + cl_device_id *devices, + cl_uint *num_devices) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept && + pIntercept->dispatch().clGetDeviceIDsFromVA_APIMediaAdapterINTEL ) + { + CALL_LOGGING_ENTER(); + CPU_PERFORMANCE_TIMING_START(); + + cl_int retVal = pIntercept->dispatch().clGetDeviceIDsFromVA_APIMediaAdapterINTEL( + platform, + media_adapter_type, + media_adapter, + media_adapter_set, + num_entries, + devices, + num_devices); + + CPU_PERFORMANCE_TIMING_END(); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT(); + + return retVal; + } + else + { + return CL_INVALID_OPERATION; + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// cl_intel_va_api_media_sharing +CL_API_ENTRY cl_mem CL_API_CALL clCreateFromVA_APIMediaSurfaceINTEL( + cl_context context, + cl_mem_flags flags, + VASurfaceID *surface, + cl_uint plane, + cl_int *errcode_ret) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept && + pIntercept->dispatch().clCreateFromVA_APIMediaSurfaceINTEL ) + { + CALL_LOGGING_ENTER( + "flags = %s (%llX)", + pIntercept->enumName().name_mem_flags( flags ).c_str(), + flags ); + CHECK_ERROR_INIT( errcode_ret ); + CPU_PERFORMANCE_TIMING_START(); + + cl_mem retVal = pIntercept->dispatch().clCreateFromVA_APIMediaSurfaceINTEL( + context, + flags, + surface, + plane, + errcode_ret); + + CPU_PERFORMANCE_TIMING_END(); + ADD_IMAGE( retVal ); + CHECK_ERROR( errcode_ret[0] ); + CALL_LOGGING_EXIT( "returned %p", retVal ); + + return retVal; + } + else + { + if( errcode_ret ) + { + errcode_ret[0] = CL_INVALID_OPERATION; + } + return NULL; + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// cl_intel_va_api_media_sharing +CL_API_ENTRY cl_int CL_API_CALL clEnqueueAcquireVA_APIMediaSurfacesINTEL( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem *mem_objects, + cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept && + pIntercept->dispatch().clEnqueueAcquireVA_APIMediaSurfacesINTEL ) + { + cl_int retVal = CL_SUCCESS; + + CHECK_AUBCAPTURE_START( command_queue ); + + if( pIntercept->nullEnqueue() == false ) + { + CALL_LOGGING_ENTER(); + DEVICE_PERFORMANCE_TIMING_START( event ); + CPU_PERFORMANCE_TIMING_START(); + + retVal = pIntercept->dispatch().clEnqueueAcquireVA_APIMediaSurfacesINTEL( + command_queue, + num_objects, + mem_objects, + num_events_in_wait_list, + event_wait_list, + event); + + CPU_PERFORMANCE_TIMING_END(); + DEVICE_PERFORMANCE_TIMING_END( event ); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT_EVENT( event ); + } + + FINISH_OR_FLUSH_AFTER_ENQUEUE( command_queue ); + CHECK_AUBCAPTURE_STOP( command_queue ); + + return retVal; + } + else + { + return CL_INVALID_OPERATION; + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// cl_intel_va_api_media_sharing +CL_API_ENTRY cl_int CL_API_CALL clEnqueueReleaseVA_APIMediaSurfacesINTEL( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem *mem_objects, + cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) +{ + CLIntercept* pIntercept = GetIntercept(); + + if( pIntercept && + pIntercept->dispatch().clEnqueueReleaseVA_APIMediaSurfacesINTEL ) + { + cl_int retVal = CL_SUCCESS; + + CHECK_AUBCAPTURE_START( command_queue ); + + if( pIntercept->nullEnqueue() == false ) + { + CALL_LOGGING_ENTER(); + DEVICE_PERFORMANCE_TIMING_START( event ); + CPU_PERFORMANCE_TIMING_START(); + + retVal = pIntercept->dispatch().clEnqueueReleaseVA_APIMediaSurfacesINTEL( + command_queue, + num_objects, + mem_objects, + num_events_in_wait_list, + event_wait_list, + event); + + CPU_PERFORMANCE_TIMING_END(); + DEVICE_PERFORMANCE_TIMING_END( event ); + CHECK_ERROR( retVal ); + CALL_LOGGING_EXIT_EVENT( event ); + } + + FINISH_OR_FLUSH_AFTER_ENQUEUE( command_queue ); + CHECK_AUBCAPTURE_STOP( command_queue ); + + DEVICE_PERFORMANCE_TIMING_CHECK(); + + return retVal; + } + else + { + return CL_INVALID_OPERATION; + } +} + +#if defined(__APPLE__) +#include "OS/OS_mac_interpose.h" +#endif diff --git a/Src/dispatch.h b/Src/dispatch.h new file mode 100644 index 00000000..fe52c27e --- /dev/null +++ b/Src/dispatch.h @@ -0,0 +1,1220 @@ +/* +// Copyright (c) 2018 Intel Corporation +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +*/ + +#pragma once + +#include "common.h" +#include "cli_ext.h" + +#define CLI_API_ENTRY CL_API_ENTRY +#define CLI_API_CALL CL_API_CALL + +struct CLdispatch +{ + cl_int (CLI_API_CALL *clGetPlatformIDs) ( + cl_uint num_entries, + cl_platform_id* platforms, + cl_uint* num_platforms ); + + cl_int (CLI_API_CALL *clGetPlatformInfo) ( + cl_platform_id platform, + cl_platform_info param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret ); + + cl_int (CLI_API_CALL *clGetDeviceIDs) ( + cl_platform_id platform, + cl_device_type device_type, + cl_uint num_entries, + cl_device_id* devices, + cl_uint* num_devices ); + + cl_int (CLI_API_CALL *clGetDeviceInfo) ( + cl_device_id device, + cl_device_info param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret ); + + // OpenCL 1.2 + cl_int (CLI_API_CALL *clCreateSubDevices) ( + cl_device_id in_device, + const cl_device_partition_property* properties, + cl_uint num_devices, + cl_device_id* out_devices, + cl_uint* num_devices_ret ); + + // OpenCL 1.2 + cl_int (CLI_API_CALL *clRetainDevice) ( + cl_device_id device ); + + // OpenCL 1.2 + cl_int (CLI_API_CALL *clReleaseDevice) ( + cl_device_id device ); + + cl_context (CLI_API_CALL *clCreateContext) ( + const cl_context_properties* properties, + cl_uint num_devices, + const cl_device_id* devices, + void (CL_CALLBACK *pfn_notify)(const char *, const void *, size_t, void *), + void* user_data, + cl_int* errcode_ret ); + + cl_context (CLI_API_CALL *clCreateContextFromType) ( + const cl_context_properties* properties, + cl_device_type device_type, + void (CL_CALLBACK *pfn_notify)(const char *, const void *, size_t, void *), + void* user_data, + cl_int* errcode_ret ); + + cl_int (CLI_API_CALL *clRetainContext) ( + cl_context context ); + + cl_int (CLI_API_CALL *clReleaseContext) ( + cl_context context ); + + cl_int (CLI_API_CALL *clGetContextInfo) ( + cl_context context, + cl_context_info param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret ); + + cl_command_queue (CLI_API_CALL *clCreateCommandQueue) ( + cl_context context, + cl_device_id device, + cl_command_queue_properties properties, + cl_int* errcode_ret ); + + cl_int (CLI_API_CALL *clRetainCommandQueue) ( + cl_command_queue command_queue ); + + cl_int (CLI_API_CALL *clReleaseCommandQueue) ( + cl_command_queue command_queue ); + + cl_int (CLI_API_CALL *clGetCommandQueueInfo) ( + cl_command_queue command_queue, + cl_command_queue_info param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret ); + + // deprecated OpenCL 1.0 + cl_int (CLI_API_CALL *clSetCommandQueueProperty) ( + cl_command_queue command_queue, + cl_command_queue_properties properties, + cl_bool enable, + cl_command_queue_properties* old_properties ); + + cl_mem (CLI_API_CALL *clCreateBuffer) ( + cl_context context, + cl_mem_flags flags, + size_t size, + void* host_ptr, + cl_int* errcode_ret ); + + // OpenCL 1.1 + cl_mem (CLI_API_CALL *clCreateSubBuffer) ( + cl_mem buffer, + cl_mem_flags flags, + cl_buffer_create_type buffer_create_type, + const void *buffer_create_info, + cl_int *errcode_ret ); + + // OpenCL 1.2 + cl_mem (CLI_API_CALL *clCreateImage) ( + cl_context context, + cl_mem_flags flags, + const cl_image_format* image_format, + const cl_image_desc* image_desc, + void* host_ptr, + cl_int* errcode_ret ); + + // deprecated OpenCL 1.1 + cl_mem (CLI_API_CALL *clCreateImage2D) ( + cl_context context, + cl_mem_flags flags, + const cl_image_format* image_format, + size_t image_width, + size_t image_height, + size_t image_row_pitch, + void* host_ptr, + cl_int* errcode_ret ); + + // deprecated OpenCL 1.1 + cl_mem (CLI_API_CALL *clCreateImage3D) ( + cl_context context, + cl_mem_flags flags, + const cl_image_format* image_format, + size_t image_width, + size_t image_height, + size_t image_depth, + size_t image_row_pitch, + size_t image_slice_pitch, + void* host_ptr, + cl_int* errcode_ret ); + + cl_int (CLI_API_CALL *clRetainMemObject) ( + cl_mem memobj ); + + cl_int (CLI_API_CALL *clReleaseMemObject) ( + cl_mem memobj ); + + cl_int (CLI_API_CALL *clGetSupportedImageFormats) ( + cl_context context, + cl_mem_flags flags, + cl_mem_object_type image_type, + cl_uint num_entries, + cl_image_format* image_formats, + cl_uint* num_image_formats ); + + cl_int (CLI_API_CALL *clGetMemObjectInfo) ( + cl_mem memobj, + cl_mem_info param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret ); + + cl_int (CLI_API_CALL *clGetImageInfo) ( + cl_mem image, + cl_image_info param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret ); + + // OpenCL 1.1 + cl_int (CLI_API_CALL *clSetMemObjectDestructorCallback) ( + cl_mem memobj, + void (CL_CALLBACK *pfn_notify)( cl_mem, void* ), + void *user_data ); + + cl_sampler (CLI_API_CALL *clCreateSampler) ( + cl_context context, + cl_bool normalized_coords, + cl_addressing_mode addressing_mode, + cl_filter_mode filter_mode, + cl_int* errcode_ret ); + + cl_int (CLI_API_CALL *clRetainSampler) ( + cl_sampler sampler ); + + cl_int (CLI_API_CALL *clReleaseSampler) ( + cl_sampler sampler ); + + cl_int (CLI_API_CALL *clGetSamplerInfo) ( + cl_sampler sampler, + cl_sampler_info param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret ); + + cl_program (CLI_API_CALL *clCreateProgramWithSource) ( + cl_context context, + cl_uint count, + const char** strings, + const size_t* lengths, + cl_int* errcode_ret ); + + cl_program (CLI_API_CALL *clCreateProgramWithBinary) ( + cl_context context, + cl_uint num_devices, + const cl_device_id* device_list, + const size_t* lengths, + const unsigned char** binaries, + cl_int* binary_status, + cl_int* errcode_ret ); + + // OpenCL 1.2 + cl_program (CLI_API_CALL *clCreateProgramWithBuiltInKernels) ( + cl_context context, + cl_uint num_devices, + const cl_device_id* device_list, + const char* kernel_names, + cl_int* errcode_ret); + + cl_int (CLI_API_CALL *clRetainProgram) ( + cl_program program ); + + cl_int (CLI_API_CALL *clReleaseProgram) ( + cl_program program ); + + cl_int (CLI_API_CALL *clBuildProgram) ( + cl_program program, + cl_uint num_devices, + const cl_device_id* device_list, + const char* options, + void (CL_CALLBACK *pfn_notify)(cl_program program, void* user_data), + void* user_data ); + + // OpenCL 1.2 + cl_int (CLI_API_CALL *clCompileProgram) ( + cl_program program, + cl_uint num_devices, + const cl_device_id* device_list, + const char* options, + cl_uint num_input_headers, + const cl_program* input_headers, + const char** header_include_names, + void (CL_CALLBACK *pfn_notify)(cl_program program , void* user_data), + void* user_data ); + + // OpenCL 1.2 + cl_program (CLI_API_CALL *clLinkProgram) ( + cl_context context, + cl_uint num_devices, + const cl_device_id* device_list, + const char* options, + cl_uint num_input_programs, + const cl_program* input_programs, + void (CL_CALLBACK *pfn_notify)(cl_program program, void* user_data), + void* user_data, + cl_int* errcode_ret ); + + // OpenCL 2.2 + cl_int (CLI_API_CALL *clSetProgramReleaseCallback) ( + cl_program program, + void (CL_CALLBACK *pfn_notify)(cl_program program, void* user_data), + void* user_data ); + + // OpenCL 2.2 + cl_int (CLI_API_CALL *clSetProgramSpecializationConstant) ( + cl_program program, + cl_uint spec_id, + size_t spec_size, + const void* spec_value ); + + // OpenCL 1.2 + cl_int (CLI_API_CALL *clUnloadPlatformCompiler) ( + cl_platform_id platform ); + + // deprecated OpenCL 1.1 + cl_int (CLI_API_CALL *clUnloadCompiler) ( void ); + + cl_int (CLI_API_CALL *clGetProgramInfo) ( + cl_program program, + cl_program_info param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret ); + + cl_int (CLI_API_CALL *clGetProgramBuildInfo) ( + cl_program program, + cl_device_id device, + cl_program_build_info param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret ); + + cl_kernel (CLI_API_CALL *clCreateKernel) ( + cl_program program, + const char* kernel_name, + cl_int* errcode_ret ); + + cl_int (CLI_API_CALL *clCreateKernelsInProgram) ( + cl_program program, + cl_uint num_kernels, + cl_kernel* kernels, + cl_uint* num_kernels_ret ); + + cl_int (CLI_API_CALL *clRetainKernel) ( + cl_kernel kernel ); + + cl_int (CLI_API_CALL *clReleaseKernel) ( + cl_kernel kernel ); + + cl_int (CLI_API_CALL *clSetKernelArg) ( + cl_kernel kernel, + cl_uint arg_index, + size_t arg_size, + const void* arg_value ); + + cl_int (CLI_API_CALL *clGetKernelInfo) ( + cl_kernel kernel, + cl_kernel_info param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret ); + + // OpenCL 1.2 + cl_int (CLI_API_CALL *clGetKernelArgInfo) ( + cl_kernel kernel, + cl_uint arg_indx, + cl_kernel_arg_info param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret ); + + cl_int (CLI_API_CALL *clGetKernelWorkGroupInfo) ( + cl_kernel kernel, + cl_device_id device, + cl_kernel_work_group_info param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret ); + + cl_int (CLI_API_CALL *clWaitForEvents) ( + cl_uint num_events, + const cl_event* event_list ); + + cl_int (CLI_API_CALL *clGetEventInfo) ( + cl_event event, + cl_event_info param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret ); + + // OpenCL 1.1 + cl_event (CLI_API_CALL *clCreateUserEvent) ( + cl_context context, + cl_int *errcode_ret ); + + cl_int (CLI_API_CALL *clRetainEvent) ( + cl_event event ); + + cl_int (CLI_API_CALL *clReleaseEvent) ( + cl_event event ); + + // OpenCL 1.1 + cl_int (CLI_API_CALL *clSetUserEventStatus) ( + cl_event event, + cl_int execution_status ); + + // OpenCL 1.1 + cl_int (CLI_API_CALL *clSetEventCallback) ( + cl_event event, + cl_int command_exec_callback_type, + void (CL_CALLBACK *pfn_notify)( cl_event, cl_int, void * ), + void *user_data ); + + cl_int (CLI_API_CALL *clGetEventProfilingInfo) ( + cl_event event, + cl_profiling_info param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret ); + + cl_int (CLI_API_CALL *clFlush) ( + cl_command_queue command_queue ); + + cl_int (CLI_API_CALL *clFinish) ( + cl_command_queue command_queue ); + + cl_int (CLI_API_CALL *clEnqueueReadBuffer) ( + cl_command_queue command_queue, + cl_mem buffer, + cl_bool blocking_read, + size_t offset, + size_t cb, + void* ptr, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event ); + + // OpenCL 1.1 + cl_int (CLI_API_CALL *clEnqueueReadBufferRect) ( + cl_command_queue command_queue, + cl_mem buffer, + cl_bool blocking_read, + const size_t *buffer_origin, + const size_t *host_origin, + const size_t *region, + size_t buffer_row_pitch, + size_t buffer_slice_pitch, + size_t host_row_pitch, + size_t host_slice_pitch, + void *ptr, + cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event ); + + cl_int (CLI_API_CALL *clEnqueueWriteBuffer) ( + cl_command_queue command_queue, + cl_mem buffer, + cl_bool blocking_write, + size_t offset, + size_t cb, + const void* ptr, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event ); + + // OpenCL 1.1 + cl_int (CLI_API_CALL *clEnqueueWriteBufferRect) ( + cl_command_queue command_queue, + cl_mem buffer, + cl_bool blocking_write, + const size_t *buffer_origin, + const size_t *host_origin, + const size_t *region, + size_t buffer_row_pitch, + size_t buffer_slice_pitch, + size_t host_row_pitch, + size_t host_slice_pitch, + const void *ptr, + cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event ); + + // OpenCL 1.2 + cl_int (CLI_API_CALL *clEnqueueFillBuffer) ( + cl_command_queue command_queue, + cl_mem buffer, + const void* pattern, + size_t pattern_size, + size_t offset, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event ); + + cl_int (CLI_API_CALL *clEnqueueCopyBuffer) ( + cl_command_queue command_queue, + cl_mem src_buffer, + cl_mem dst_buffer, + size_t src_offset, + size_t dst_offset, + size_t cb, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event ); + + // OpenCL 1.1 + cl_int (CLI_API_CALL *clEnqueueCopyBufferRect) ( + cl_command_queue command_queue, + cl_mem src_buffer, + cl_mem dst_buffer, + const size_t *src_origin, + const size_t *dst_origin, + const size_t *region, + size_t src_row_pitch, + size_t src_slice_pitch, + size_t dst_row_pitch, + size_t dst_slice_pitch, + cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event ); + + cl_int (CLI_API_CALL *clEnqueueReadImage) ( + cl_command_queue command_queue, + cl_mem image, + cl_bool blocking_read, + const size_t* origin, + const size_t* region, + size_t row_pitch, + size_t slice_pitch, + void* ptr, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event ); + + cl_int (CLI_API_CALL *clEnqueueWriteImage) ( + cl_command_queue command_queue, + cl_mem image, + cl_bool blocking_write, + const size_t* origin, + const size_t* region, + size_t input_row_pitch, + size_t input_slice_pitch, + const void* ptr, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event ); + + // OpenCL 1.2 + cl_int (CLI_API_CALL *clEnqueueFillImage) ( + cl_command_queue command_queue, + cl_mem image, + const void* fill_color, + const size_t* origin, + const size_t* region, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event ); + + cl_int (CLI_API_CALL *clEnqueueCopyImage) ( + cl_command_queue command_queue, + cl_mem src_image, + cl_mem dst_image, + const size_t* src_origin, + const size_t* dst_origin, + const size_t* region, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event ); + + cl_int (CLI_API_CALL *clEnqueueCopyImageToBuffer) ( + cl_command_queue command_queue, + cl_mem src_image, + cl_mem dst_buffer, + const size_t* src_origin, + const size_t* region, + size_t dst_offset, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event ); + + cl_int (CLI_API_CALL *clEnqueueCopyBufferToImage) ( + cl_command_queue command_queue, + cl_mem src_buffer, + cl_mem dst_image, + size_t src_offset, + const size_t* dst_origin, + const size_t* region, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event ); + + void* (CLI_API_CALL *clEnqueueMapBuffer) ( + cl_command_queue command_queue, + cl_mem buffer, + cl_bool blocking_map, + cl_map_flags map_flags, + size_t offset, + size_t cb, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event, + cl_int* errcode_ret ); + + void* (CLI_API_CALL *clEnqueueMapImage) ( + cl_command_queue command_queue, + cl_mem image, + cl_bool blocking_map, + cl_map_flags map_flags, + const size_t* origin, + const size_t* region, + size_t* image_row_pitch, + size_t* image_slice_pitch, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event, + cl_int* errcode_ret ); + + cl_int (CLI_API_CALL *clEnqueueUnmapMemObject) ( + cl_command_queue command_queue, + cl_mem memobj, + void* mapped_ptr, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event ); + + // OpenCL 1.2 + cl_int (CLI_API_CALL *clEnqueueMigrateMemObjects) ( + cl_command_queue command_queue, + cl_uint num_mem_objects, + const cl_mem* mem_objects, + cl_mem_migration_flags flags, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event ); + + cl_int (CLI_API_CALL *clEnqueueNDRangeKernel) ( + cl_command_queue command_queue, + cl_kernel kernel, + cl_uint work_dim, + const size_t* global_work_offset, + const size_t* global_work_size, + const size_t* local_work_size, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event ); + + cl_int (CLI_API_CALL *clEnqueueTask) ( + cl_command_queue command_queue, + cl_kernel kernel, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event ); + + cl_int (CLI_API_CALL *clEnqueueNativeKernel) ( + cl_command_queue command_queue, + void (CL_CALLBACK *user_func)(void *), + void* args, + size_t cb_args, + cl_uint num_mem_objects, + const cl_mem* mem_list, + const void** args_mem_loc, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event ); + + // deprecated OpenCL 1.1 + cl_int (CLI_API_CALL *clEnqueueMarker) ( + cl_command_queue command_queue, + cl_event* event ); + + // deprecated OpenCL 1.1 + cl_int (CLI_API_CALL *clEnqueueWaitForEvents) ( + cl_command_queue command_queue, + cl_uint num_events, + const cl_event* event_list ); + + // deprecated OpenCL 1.1 + cl_int (CLI_API_CALL *clEnqueueBarrier) ( + cl_command_queue command_queue ); + + // OpenCL 1.2 + cl_int (CLI_API_CALL *clEnqueueMarkerWithWaitList) ( + cl_command_queue command_queue, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event ); + + // OpenCL 1.2 + cl_int (CLI_API_CALL *clEnqueueBarrierWithWaitList) ( + cl_command_queue command_queue, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event ); + + // Optional? + // deprecated OpenCL 1.1 + void* (CLI_API_CALL *clGetExtensionFunctionAddress) ( + const char* func_name ); + + // Optional? + // OpenCL 1.2 + void* (CLI_API_CALL *clGetExtensionFunctionAddressForPlatform)( + cl_platform_id platform, + const char* func_name ); + + // CL-GL Sharing + + cl_mem (CLI_API_CALL *clCreateFromGLBuffer) ( + cl_context context, + cl_mem_flags flags, + cl_GLuint bufobj, + int* errcode_ret); // Not cl_int*? + + // OpenCL 1.2 + cl_mem (CLI_API_CALL *clCreateFromGLTexture) ( + cl_context context, + cl_mem_flags flags, + cl_GLenum target, + cl_GLint miplevel, + cl_GLuint texture, + cl_int* errcode_ret ); + + // deprecated OpenCL 1.1 + cl_mem (CLI_API_CALL *clCreateFromGLTexture2D) ( + cl_context context, + cl_mem_flags flags, + cl_GLenum target, + cl_GLint miplevel, + cl_GLuint texture, + cl_int* errcode_ret); + + // deprecated OpenCL 1.1 + cl_mem (CLI_API_CALL *clCreateFromGLTexture3D) ( + cl_context context, + cl_mem_flags flags, + cl_GLenum target, + cl_GLint miplevel, + cl_GLuint texture, + cl_int* errcode_ret); + + cl_mem (CLI_API_CALL *clCreateFromGLRenderbuffer) ( + cl_context context, + cl_mem_flags flags, + cl_GLuint renderbuffer, + cl_int* errcode_ret); + + cl_int (CLI_API_CALL *clGetGLObjectInfo) ( + cl_mem memobj, + cl_gl_object_type* gl_object_type, + cl_GLuint* gl_object_name); + + cl_int (CLI_API_CALL *clGetGLTextureInfo) ( + cl_mem memobj, + cl_gl_texture_info param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret); + + cl_int (CLI_API_CALL *clEnqueueAcquireGLObjects) ( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event); + + cl_int (CLI_API_CALL *clEnqueueReleaseGLObjects) ( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event); + + // OpenCL 2.0 + + void* (CLI_API_CALL *clSVMAlloc) ( + cl_context context, + cl_svm_mem_flags flags, + size_t size, + cl_uint alignment); + + void (CLI_API_CALL *clSVMFree) ( + cl_context context, + void* svm_pointer); + + cl_int (CLI_API_CALL *clEnqueueSVMFree) ( + cl_command_queue command_queue, + cl_uint num_svm_pointers, + void* svm_pointers [], + void (CL_CALLBACK* pfn_free_func)( + cl_command_queue queue, + cl_uint num_svm_pointers, + void* svm_pointers [], + void* user_data ), + void* user_data, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event); + + cl_int (CLI_API_CALL *clEnqueueSVMMemcpy) ( + cl_command_queue command_queue, + cl_bool blocking_copy, + void* dst_ptr, + const void* src_ptr, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event); + + cl_int (CLI_API_CALL *clEnqueueSVMMemFill) ( + cl_command_queue command_queue, + void* svm_ptr, + const void* pattern, + size_t pattern_size, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event); + + cl_int (CLI_API_CALL *clEnqueueSVMMap) ( + cl_command_queue command_queue, + cl_bool blocking_map, + cl_map_flags map_flags, + void* svm_ptr, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event); + + cl_int (CLI_API_CALL *clEnqueueSVMUnmap) ( + cl_command_queue command_queue, + void* svm_ptr, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event); + + cl_int (CLI_API_CALL *clSetKernelArgSVMPointer) ( + cl_kernel kernel, + cl_uint arg_index, + const void* arg_value); + + cl_int (CLI_API_CALL *clSetKernelExecInfo) ( + cl_kernel kernel, + cl_kernel_exec_info param_name, + size_t param_value_size, + const void* param_value); + + cl_mem (CLI_API_CALL *clCreatePipe) ( + cl_context context, + cl_mem_flags flags, + cl_uint pipe_packet_size, + cl_uint pipe_max_packets, + const cl_pipe_properties* properties, + cl_int* errcode_ret); + + cl_int (CLI_API_CALL *clGetPipeInfo) ( + cl_mem pipe, + cl_pipe_info param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret); + + cl_command_queue (CLI_API_CALL *clCreateCommandQueueWithProperties) ( + cl_context context, + cl_device_id device, + const cl_queue_properties* properties, + cl_int* errcode_ret); + + cl_sampler (CLI_API_CALL *clCreateSamplerWithProperties) ( + cl_context context, + const cl_sampler_properties* sampler_properties, + cl_int* errcode_ret); + + // OpenCL 2.1 + + cl_int (CLI_API_CALL *clSetDefaultDeviceCommandQueue) ( + cl_context context, + cl_device_id device, + cl_command_queue command_queue ); + + cl_int (CLI_API_CALL *clGetDeviceAndHostTimer) ( + cl_device_id device, + cl_ulong* device_timestamp, + cl_ulong* host_timestamp ); + + cl_int (CLI_API_CALL *clGetHostTimer) ( + cl_device_id device, + cl_ulong* host_timestamp ); + + cl_program (CLI_API_CALL *clCreateProgramWithIL) ( + cl_context context, + const void *il, + size_t length, + cl_int *errcode_ret); + + cl_kernel (CLI_API_CALL *clCloneKernel) ( + cl_kernel source_kernel, + cl_int* errcode_ret ); + + cl_int (CLI_API_CALL *clGetKernelSubGroupInfo) ( + cl_kernel kernel, + cl_device_id device, + cl_kernel_sub_group_info param_name, + size_t input_value_size, + const void* input_value, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret ); + + cl_int (CLI_API_CALL *clEnqueueSVMMigrateMem) ( + cl_command_queue command_queue, + cl_uint num_svm_pointers, + const void** svm_pointers, + const size_t* sizes, + cl_mem_migration_flags flags, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event ); + + // These are Khronos Extensions. + // They aren't exported from the ICD or from this DLL, but we'll still + // put a pointer to them in the CLIntercept dispatch table. + + // cl_khr_gl_sharing + cl_int (CLI_API_CALL *clGetGLContextInfoKHR) ( + const cl_context_properties *properties, + cl_gl_context_info param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret); + + // cl_khr_gl_event + cl_event (CLI_API_CALL *clCreateEventFromGLsyncKHR) ( + cl_context context, + cl_GLsync sync, + cl_int* errcode_ret); + +#if defined(_WIN32) + // cl_khr_d3d10_sharing + cl_int (CLI_API_CALL *clGetDeviceIDsFromD3D10KHR) ( + cl_platform_id platform, + cl_d3d10_device_source_khr d3d_device_source, + void* d3d_object, + cl_d3d10_device_set_khr d3d_device_set, + cl_uint num_entries, + cl_device_id* devices, + cl_uint* num_devices); + + // cl_khr_d3d10_sharing + cl_mem (CLI_API_CALL *clCreateFromD3D10BufferKHR) ( + cl_context context, + cl_mem_flags flags, + ID3D10Buffer* resource, + cl_int* errcode_ret); + + // cl_khr_d3d10_sharing + cl_mem (CLI_API_CALL *clCreateFromD3D10Texture2DKHR) ( + cl_context context, + cl_mem_flags flags, + ID3D10Texture2D* resource, + UINT subresource, + cl_int* errcode_ret); + + // cl_khr_d3d10_sharing + cl_mem (CLI_API_CALL *clCreateFromD3D10Texture3DKHR) ( + cl_context context, + cl_mem_flags flags, + ID3D10Texture3D* resource, + UINT subresource, + cl_int* errcode_ret); + + // cl_khr_d3d10_sharing + cl_int (CLI_API_CALL *clEnqueueAcquireD3D10ObjectsKHR) ( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event); + + // cl_khr_d3d10_sharing + cl_int (CLI_API_CALL *clEnqueueReleaseD3D10ObjectsKHR) ( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event); + + // cl_khr_d3d11_sharing + cl_int (CLI_API_CALL *clGetDeviceIDsFromD3D11KHR) ( + cl_platform_id platform, + cl_d3d11_device_source_khr d3d_device_source, + void* d3d_object, + cl_d3d11_device_set_khr d3d_device_set, + cl_uint num_entries, + cl_device_id* devices, + cl_uint* num_devices); + + // cl_khr_d3d11_sharing + cl_mem (CLI_API_CALL *clCreateFromD3D11BufferKHR) ( + cl_context context, + cl_mem_flags flags, + ID3D11Buffer* resource, + cl_int* errcode_ret); + + // cl_khr_d3d11_sharing + cl_mem (CLI_API_CALL *clCreateFromD3D11Texture2DKHR) ( + cl_context context, + cl_mem_flags flags, + ID3D11Texture2D* resource, + UINT subresource, + cl_int* errcode_ret); + + // cl_khr_d3d11_sharing + cl_mem (CLI_API_CALL *clCreateFromD3D11Texture3DKHR) ( + cl_context context, + cl_mem_flags flags, + ID3D11Texture3D* resource, + UINT subresource, + cl_int* errcode_ret); + + // cl_khr_d3d11_sharing + cl_int (CLI_API_CALL *clEnqueueAcquireD3D11ObjectsKHR) ( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event); + + // cl_khr_d3d11_sharing + cl_int (CLI_API_CALL *clEnqueueReleaseD3D11ObjectsKHR) ( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event); + + // cl_khr_dx9_media_sharing + cl_int (CLI_API_CALL *clGetDeviceIDsFromDX9MediaAdapterKHR) ( + cl_platform_id platform, + cl_uint num_media_adapters, + cl_dx9_media_adapter_type_khr* media_adapters_type, + void* media_adapters, + cl_dx9_media_adapter_set_khr media_adapter_set, + cl_uint num_entries, + cl_device_id* devices, + cl_uint* num_devices); + + // cl_khr_dx9_media_sharing + cl_mem (CLI_API_CALL *clCreateFromDX9MediaSurfaceKHR) ( + cl_context context, + cl_mem_flags flags, + cl_dx9_media_adapter_type_khr adapter_type, + void* surface_info, + cl_uint plane, + cl_int* errcode_ret); + + // cl_khr_dx9_media_sharing + cl_int (CLI_API_CALL *clEnqueueAcquireDX9MediaSurfacesKHR) ( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event); + + // cl_khr_dx9_media_sharing + cl_int (CLI_API_CALL *clEnqueueReleaseDX9MediaSurfacesKHR) ( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event); +#endif + + // cl_khr_il_program + cl_program (CLI_API_CALL *clCreateProgramWithILKHR) ( + cl_context context, + const void *il, + size_t length, + cl_int *errcode_ret); + + // cl_khr_subgroups + cl_int (CLI_API_CALL *clGetKernelSubGroupInfoKHR) ( + cl_kernel kernel, + cl_device_id device, + cl_kernel_sub_group_info param_name, + size_t input_value_size, + const void* input_value, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret); + + // cl_khr_create_command_queue + cl_command_queue (CLI_API_CALL *clCreateCommandQueueWithPropertiesKHR) ( + cl_context context, + cl_device_id device, + const cl_queue_properties_khr* properties, + cl_int* errcode_ret); + + // These are Intel Vendor Extensions. + // They aren't exported from the ICD or from this DLL, but we'll still + // put a pointer to them in the CLIntercept dispatch table. + +#if defined(_WIN32) + // cl_intel_dx9_media_sharing + cl_int (CLI_API_CALL *clGetDeviceIDsFromDX9INTEL) ( + cl_platform_id platform, + cl_dx9_device_source_intel d3d_device_source, + void *dx9_object, + cl_dx9_device_set_intel d3d_device_set, + cl_uint num_entries, + cl_device_id* devices, + cl_uint* num_devices ); + + // cl_intel_dx9_media_sharing + cl_mem (CLI_API_CALL *clCreateFromDX9MediaSurfaceINTEL) ( + cl_context context, + cl_mem_flags flags, + IDirect3DSurface9* resource, + HANDLE sharedHandle, + UINT plane, + cl_int* errcode_ret ); + + // cl_intel_dx9_media_sharing + cl_int (CLI_API_CALL *clEnqueueAcquireDX9ObjectsINTEL) ( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event ); + + // cl_intel_dx9_media_sharing + cl_int (CLI_API_CALL *clEnqueueReleaseDX9ObjectsINTEL) ( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event ); +#endif + + // Unofficial MDAPI extension: + cl_command_queue (CLI_API_CALL *clCreatePerfCountersCommandQueueINTEL) ( + cl_context context, + cl_device_id device, + cl_command_queue_properties properties, + cl_uint configuration, + cl_int* errcode_ret); + + // Unofficial MDAPI extension: + cl_int (CL_API_CALL *clSetPerformanceConfigurationINTEL)( + cl_device_id device, + cl_uint count, + cl_uint* offsets, + cl_uint* values ); + + // cl_intel_accelerator + cl_accelerator_intel (CLI_API_CALL *clCreateAcceleratorINTEL) ( + cl_context context, + cl_accelerator_type_intel accelerator_type, + size_t descriptor_size, + const void* descriptor, + cl_int* errcode_ret ); + + // cl_intel_accelerator + cl_int (CLI_API_CALL *clGetAcceleratorInfoINTEL) ( + cl_accelerator_intel accelerator, + cl_accelerator_info_intel param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret ); + + // cl_intel_accelerator + cl_int (CLI_API_CALL *clRetainAcceleratorINTEL) ( + cl_accelerator_intel accelerator ); + + // cl_intel_accelerator + cl_int (CLI_API_CALL *clReleaseAcceleratorINTEL) ( + cl_accelerator_intel accelerator ); + + // cl_intel_va_api_media_sharing + cl_int (CLI_API_CALL *clGetDeviceIDsFromVA_APIMediaAdapterINTEL) ( + cl_platform_id platform, + cl_va_api_device_source_intel media_adapter_type, + void *media_adapter, + cl_va_api_device_set_intel media_adapter_set, + cl_uint num_entries, + cl_device_id *devices, + cl_uint *num_devices); + + // cl_intel_va_api_media_sharing + cl_mem (CLI_API_CALL *clCreateFromVA_APIMediaSurfaceINTEL) ( + cl_context context, + cl_mem_flags flags, + VASurfaceID *surface, + cl_uint plane, + cl_int *errcode_ret); + + // cl_intel_va_api_media_sharing + cl_int (CLI_API_CALL *clEnqueueAcquireVA_APIMediaSurfacesINTEL) ( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem *mem_objects, + cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event); + + // cl_intel_va_api_media_sharing + cl_int (CLI_API_CALL *clEnqueueReleaseVA_APIMediaSurfacesINTEL) ( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem *mem_objects, + cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event); +}; + +extern CLdispatch dummyDispatch; diff --git a/Src/enummap.cpp b/Src/enummap.cpp new file mode 100644 index 00000000..0d30cc12 --- /dev/null +++ b/Src/enummap.cpp @@ -0,0 +1,973 @@ +/* +// Copyright (c) 2018 Intel Corporation +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +*/ + +#include "enummap.h" + +#include "common.h" + +/* if( _map.find( _enum ) != _map.end() ) fprintf(stderr, "Already found an entry for %08X (%d): new %s, old %s\n", _enum, _enum, #_enum, _map[ _enum ].c_str() ); \ */ + +#define ADD_ENUM_NAME( _map, _enum ) \ +{ \ + CLI_ASSERT( _map.find( _enum ) == _map.end() ); \ + _map[ _enum ] = #_enum; \ +} + +CEnumNameMap::CEnumNameMap() +{ + /* Error Codes */ + ADD_ENUM_NAME( m_cl_int, CL_SUCCESS ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_NOT_FOUND ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_NOT_AVAILABLE ); + ADD_ENUM_NAME( m_cl_int, CL_COMPILER_NOT_AVAILABLE ); + ADD_ENUM_NAME( m_cl_int, CL_MEM_OBJECT_ALLOCATION_FAILURE ); + ADD_ENUM_NAME( m_cl_int, CL_OUT_OF_RESOURCES ); + ADD_ENUM_NAME( m_cl_int, CL_OUT_OF_HOST_MEMORY ); + ADD_ENUM_NAME( m_cl_int, CL_PROFILING_INFO_NOT_AVAILABLE ); + ADD_ENUM_NAME( m_cl_int, CL_MEM_COPY_OVERLAP ); + ADD_ENUM_NAME( m_cl_int, CL_IMAGE_FORMAT_MISMATCH ); + ADD_ENUM_NAME( m_cl_int, CL_IMAGE_FORMAT_NOT_SUPPORTED ); + ADD_ENUM_NAME( m_cl_int, CL_BUILD_PROGRAM_FAILURE ); + ADD_ENUM_NAME( m_cl_int, CL_MAP_FAILURE ); + ADD_ENUM_NAME( m_cl_int, CL_MISALIGNED_SUB_BUFFER_OFFSET ); + ADD_ENUM_NAME( m_cl_int, CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST ); + ADD_ENUM_NAME( m_cl_int, CL_COMPILE_PROGRAM_FAILURE ); + ADD_ENUM_NAME( m_cl_int, CL_LINKER_NOT_AVAILABLE ); + ADD_ENUM_NAME( m_cl_int, CL_LINK_PROGRAM_FAILURE ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_PARTITION_FAILED ); + ADD_ENUM_NAME( m_cl_int, CL_KERNEL_ARG_INFO_NOT_AVAILABLE ); + + ADD_ENUM_NAME( m_cl_int, CL_INVALID_VALUE ); + ADD_ENUM_NAME( m_cl_int, CL_INVALID_DEVICE_TYPE ); + ADD_ENUM_NAME( m_cl_int, CL_INVALID_PLATFORM ); + ADD_ENUM_NAME( m_cl_int, CL_INVALID_DEVICE ); + ADD_ENUM_NAME( m_cl_int, CL_INVALID_CONTEXT ); + ADD_ENUM_NAME( m_cl_int, CL_INVALID_QUEUE_PROPERTIES ); + ADD_ENUM_NAME( m_cl_int, CL_INVALID_COMMAND_QUEUE ); + ADD_ENUM_NAME( m_cl_int, CL_INVALID_HOST_PTR ); + ADD_ENUM_NAME( m_cl_int, CL_INVALID_MEM_OBJECT ); + ADD_ENUM_NAME( m_cl_int, CL_INVALID_IMAGE_FORMAT_DESCRIPTOR ); + ADD_ENUM_NAME( m_cl_int, CL_INVALID_IMAGE_SIZE ); + ADD_ENUM_NAME( m_cl_int, CL_INVALID_SAMPLER ); + ADD_ENUM_NAME( m_cl_int, CL_INVALID_BINARY ); + ADD_ENUM_NAME( m_cl_int, CL_INVALID_BUILD_OPTIONS ); + ADD_ENUM_NAME( m_cl_int, CL_INVALID_PROGRAM ); + ADD_ENUM_NAME( m_cl_int, CL_INVALID_PROGRAM_EXECUTABLE ); + ADD_ENUM_NAME( m_cl_int, CL_INVALID_KERNEL_NAME ); + ADD_ENUM_NAME( m_cl_int, CL_INVALID_KERNEL_DEFINITION ); + ADD_ENUM_NAME( m_cl_int, CL_INVALID_KERNEL ); + ADD_ENUM_NAME( m_cl_int, CL_INVALID_ARG_INDEX ); + ADD_ENUM_NAME( m_cl_int, CL_INVALID_ARG_VALUE ); + ADD_ENUM_NAME( m_cl_int, CL_INVALID_ARG_SIZE ); + ADD_ENUM_NAME( m_cl_int, CL_INVALID_KERNEL_ARGS ); + ADD_ENUM_NAME( m_cl_int, CL_INVALID_WORK_DIMENSION ); + ADD_ENUM_NAME( m_cl_int, CL_INVALID_WORK_GROUP_SIZE ); + ADD_ENUM_NAME( m_cl_int, CL_INVALID_WORK_ITEM_SIZE ); + ADD_ENUM_NAME( m_cl_int, CL_INVALID_GLOBAL_OFFSET ); + ADD_ENUM_NAME( m_cl_int, CL_INVALID_EVENT_WAIT_LIST ); + ADD_ENUM_NAME( m_cl_int, CL_INVALID_EVENT ); + ADD_ENUM_NAME( m_cl_int, CL_INVALID_OPERATION ); + ADD_ENUM_NAME( m_cl_int, CL_INVALID_GL_OBJECT ); + ADD_ENUM_NAME( m_cl_int, CL_INVALID_BUFFER_SIZE ); + ADD_ENUM_NAME( m_cl_int, CL_INVALID_MIP_LEVEL ); + ADD_ENUM_NAME( m_cl_int, CL_INVALID_GLOBAL_WORK_SIZE ); + ADD_ENUM_NAME( m_cl_int, CL_INVALID_PROPERTY ); + ADD_ENUM_NAME( m_cl_int, CL_INVALID_IMAGE_DESCRIPTOR ); + ADD_ENUM_NAME( m_cl_int, CL_INVALID_COMPILER_OPTIONS ); + ADD_ENUM_NAME( m_cl_int, CL_INVALID_LINKER_OPTIONS ); + ADD_ENUM_NAME( m_cl_int, CL_INVALID_DEVICE_PARTITION_COUNT ); + ADD_ENUM_NAME( m_cl_int, CL_INVALID_PIPE_SIZE ); + ADD_ENUM_NAME( m_cl_int, CL_INVALID_DEVICE_QUEUE ); + ADD_ENUM_NAME( m_cl_int, CL_INVALID_SPEC_ID ); + ADD_ENUM_NAME( m_cl_int, CL_MAX_SIZE_RESTRICTION_EXCEEDED ); + + /* OpenCL Version */ + //CL_VERSION_1_0 1 + //CL_VERSION_1_1 1 + //CL_VERSION_1_2 1 + //CL_VERSION_2_0 1 + //CL_VERSION_2_1 1 + //CL_VERSION_2_2 1 + + /* cl_bool */ + ADD_ENUM_NAME( m_cl_bool, CL_FALSE ); + ADD_ENUM_NAME( m_cl_bool, CL_TRUE ); + //CL_BLOCKING CL_TRUE + //CL_NON_BLOCKING CL_FALSE + + /* cl_platform_info */ + ADD_ENUM_NAME( m_cl_int, CL_PLATFORM_PROFILE ); + ADD_ENUM_NAME( m_cl_int, CL_PLATFORM_VERSION ); + ADD_ENUM_NAME( m_cl_int, CL_PLATFORM_NAME ); + ADD_ENUM_NAME( m_cl_int, CL_PLATFORM_VENDOR ); + ADD_ENUM_NAME( m_cl_int, CL_PLATFORM_EXTENSIONS ); + ADD_ENUM_NAME( m_cl_int, CL_PLATFORM_HOST_TIMER_RESOLUTION ); + + /* cl_device_type - bitfield */ + ADD_ENUM_NAME( m_cl_device_type, CL_DEVICE_TYPE_DEFAULT ); + ADD_ENUM_NAME( m_cl_device_type, CL_DEVICE_TYPE_CPU ); + ADD_ENUM_NAME( m_cl_device_type, CL_DEVICE_TYPE_GPU ); + ADD_ENUM_NAME( m_cl_device_type, CL_DEVICE_TYPE_ACCELERATOR ); + ADD_ENUM_NAME( m_cl_device_type, CL_DEVICE_TYPE_CUSTOM ); + ADD_ENUM_NAME( m_cl_device_type, CL_DEVICE_TYPE_ALL ); + + /* cl_device_info */ + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_TYPE ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_VENDOR_ID ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_MAX_COMPUTE_UNITS ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_MAX_WORK_GROUP_SIZE ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_MAX_WORK_ITEM_SIZES ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_MAX_CLOCK_FREQUENCY ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_ADDRESS_BITS ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_MAX_READ_IMAGE_ARGS ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_MAX_WRITE_IMAGE_ARGS ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_MAX_MEM_ALLOC_SIZE ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_IMAGE2D_MAX_WIDTH ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_IMAGE2D_MAX_HEIGHT ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_IMAGE3D_MAX_WIDTH ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_IMAGE3D_MAX_HEIGHT ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_IMAGE3D_MAX_DEPTH ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_IMAGE_SUPPORT ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_MAX_PARAMETER_SIZE ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_MAX_SAMPLERS ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_MEM_BASE_ADDR_ALIGN ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_SINGLE_FP_CONFIG ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_GLOBAL_MEM_SIZE ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_MAX_CONSTANT_ARGS ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_LOCAL_MEM_TYPE ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_LOCAL_MEM_SIZE ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_ERROR_CORRECTION_SUPPORT ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_PROFILING_TIMER_RESOLUTION ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_ENDIAN_LITTLE ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_AVAILABLE ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_COMPILER_AVAILABLE ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_EXECUTION_CAPABILITIES ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_QUEUE_PROPERTIES ); + // Same value as CL_DEVICE_QUEUE_PROPERTIES: + //CL_DEVICE_QUEUE_ON_HOST_PROPERTIES 0x102A + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_NAME ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_VENDOR ); + ADD_ENUM_NAME( m_cl_int, CL_DRIVER_VERSION ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_PROFILE ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_VERSION ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_EXTENSIONS ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_PLATFORM ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_DOUBLE_FP_CONFIG ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_HALF_FP_CONFIG ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_HOST_UNIFIED_MEMORY ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_OPENCL_C_VERSION ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_LINKER_AVAILABLE ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_BUILT_IN_KERNELS ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_IMAGE_MAX_BUFFER_SIZE ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_PARENT_DEVICE ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_PARTITION_MAX_SUB_DEVICES ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_PARTITION_PROPERTIES ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_PARTITION_AFFINITY_DOMAIN ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_PARTITION_TYPE ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_REFERENCE_COUNT ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_PREFERRED_INTEROP_USER_SYNC ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_PRINTF_BUFFER_SIZE ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_IMAGE_PITCH_ALIGNMENT ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_MAX_ON_DEVICE_QUEUES ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_MAX_ON_DEVICE_EVENTS ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_SVM_CAPABILITIES ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_MAX_PIPE_ARGS ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_PIPE_MAX_PACKET_SIZE ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_IL_VERSION ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_MAX_NUM_SUB_GROUPS ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS ); + + /* cl_device_fp_config - bitfield */ + ADD_ENUM_NAME( m_cl_device_fp_config, CL_FP_DENORM ); + ADD_ENUM_NAME( m_cl_device_fp_config, CL_FP_INF_NAN ); + ADD_ENUM_NAME( m_cl_device_fp_config, CL_FP_ROUND_TO_NEAREST ); + ADD_ENUM_NAME( m_cl_device_fp_config, CL_FP_ROUND_TO_ZERO ); + ADD_ENUM_NAME( m_cl_device_fp_config, CL_FP_ROUND_TO_INF ); + ADD_ENUM_NAME( m_cl_device_fp_config, CL_FP_FMA ); + ADD_ENUM_NAME( m_cl_device_fp_config, CL_FP_SOFT_FLOAT ); + ADD_ENUM_NAME( m_cl_device_fp_config, CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT ); + + /* cl_device_mem_cache_type */ + ADD_ENUM_NAME( m_cl_device_mem_cache_type, CL_NONE ); + ADD_ENUM_NAME( m_cl_device_mem_cache_type, CL_READ_ONLY_CACHE ); + ADD_ENUM_NAME( m_cl_device_mem_cache_type, CL_READ_WRITE_CACHE ); + + /* cl_device_local_mem_type */ + ADD_ENUM_NAME( m_cl_device_local_mem_type, CL_LOCAL ); + ADD_ENUM_NAME( m_cl_device_local_mem_type, CL_GLOBAL ); + + /* cl_device_exec_capabilities - bitfield */ + ADD_ENUM_NAME( m_cl_device_exec_capabilities, CL_EXEC_KERNEL ); + ADD_ENUM_NAME( m_cl_device_exec_capabilities, CL_EXEC_NATIVE_KERNEL ); + + /* cl_command_queue_properties - bitfield */ + ADD_ENUM_NAME( m_cl_command_queue_properties, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE ); + ADD_ENUM_NAME( m_cl_command_queue_properties, CL_QUEUE_PROFILING_ENABLE ); + ADD_ENUM_NAME( m_cl_command_queue_properties, CL_QUEUE_ON_DEVICE ); + ADD_ENUM_NAME( m_cl_command_queue_properties, CL_QUEUE_ON_DEVICE_DEFAULT ); + + /* cl_context_info */ + ADD_ENUM_NAME( m_cl_int, CL_CONTEXT_REFERENCE_COUNT ); + ADD_ENUM_NAME( m_cl_int, CL_CONTEXT_DEVICES ); + ADD_ENUM_NAME( m_cl_int, CL_CONTEXT_PROPERTIES ); + ADD_ENUM_NAME( m_cl_int, CL_CONTEXT_NUM_DEVICES ); + + /* cl_context_properties */ + ADD_ENUM_NAME( m_cl_int, CL_CONTEXT_PLATFORM ); + ADD_ENUM_NAME( m_cl_int, CL_CONTEXT_INTEROP_USER_SYNC ); + + /* cl_device_partition_property */ + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_PARTITION_EQUALLY ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_PARTITION_BY_COUNTS ); + //CL_DEVICE_PARTITION_BY_COUNTS_LIST_END 0x0 + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN ); + + /* cl_device_affinity_domain */ + ADD_ENUM_NAME( m_cl_device_affinity_domain, CL_DEVICE_AFFINITY_DOMAIN_NUMA ); + ADD_ENUM_NAME( m_cl_device_affinity_domain, CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE ); + ADD_ENUM_NAME( m_cl_device_affinity_domain, CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE ); + ADD_ENUM_NAME( m_cl_device_affinity_domain, CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE ); + ADD_ENUM_NAME( m_cl_device_affinity_domain, CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE ); + ADD_ENUM_NAME( m_cl_device_affinity_domain, CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE ); + + /* cl_device_svm_capabilities */ + ADD_ENUM_NAME( m_cl_device_svm_capabilities, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER ); + ADD_ENUM_NAME( m_cl_device_svm_capabilities, CL_DEVICE_SVM_FINE_GRAIN_BUFFER ); + ADD_ENUM_NAME( m_cl_device_svm_capabilities, CL_DEVICE_SVM_FINE_GRAIN_SYSTEM ); + ADD_ENUM_NAME( m_cl_device_svm_capabilities, CL_DEVICE_SVM_ATOMICS ); + + /* cl_command_queue_info */ + ADD_ENUM_NAME( m_cl_int, CL_QUEUE_CONTEXT ); + ADD_ENUM_NAME( m_cl_int, CL_QUEUE_DEVICE ); + ADD_ENUM_NAME( m_cl_int, CL_QUEUE_REFERENCE_COUNT ); + ADD_ENUM_NAME( m_cl_int, CL_QUEUE_PROPERTIES ); + ADD_ENUM_NAME( m_cl_int, CL_QUEUE_SIZE ); + ADD_ENUM_NAME( m_cl_int, CL_QUEUE_DEVICE_DEFAULT ); + + /* cl_mem_flags - bitfield */ + ADD_ENUM_NAME( m_cl_mem_flags, CL_MEM_READ_WRITE ); + ADD_ENUM_NAME( m_cl_mem_flags, CL_MEM_WRITE_ONLY ); + ADD_ENUM_NAME( m_cl_mem_flags, CL_MEM_READ_ONLY ); + ADD_ENUM_NAME( m_cl_mem_flags, CL_MEM_USE_HOST_PTR ); + ADD_ENUM_NAME( m_cl_mem_flags, CL_MEM_ALLOC_HOST_PTR ); + ADD_ENUM_NAME( m_cl_mem_flags, CL_MEM_COPY_HOST_PTR ); + // reserved (1 << 6) + ADD_ENUM_NAME( m_cl_mem_flags, CL_MEM_HOST_WRITE_ONLY ); + ADD_ENUM_NAME( m_cl_mem_flags, CL_MEM_HOST_READ_ONLY ); + ADD_ENUM_NAME( m_cl_mem_flags, CL_MEM_HOST_NO_ACCESS ); + ADD_ENUM_NAME( m_cl_mem_flags, CL_MEM_KERNEL_READ_AND_WRITE ); + + /* cl_mem_migration_flags - bitfield */ + ADD_ENUM_NAME( m_cl_mem_migration_flags, CL_MIGRATE_MEM_OBJECT_HOST ); + ADD_ENUM_NAME( m_cl_mem_migration_flags, CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED ); + + /* cl_channel_order */ + ADD_ENUM_NAME( m_cl_int, CL_R ); + ADD_ENUM_NAME( m_cl_int, CL_A ); + ADD_ENUM_NAME( m_cl_int, CL_RG ); + ADD_ENUM_NAME( m_cl_int, CL_RA ); + ADD_ENUM_NAME( m_cl_int, CL_RGB ); + ADD_ENUM_NAME( m_cl_int, CL_RGBA ); + ADD_ENUM_NAME( m_cl_int, CL_BGRA ); + ADD_ENUM_NAME( m_cl_int, CL_ARGB ); + ADD_ENUM_NAME( m_cl_int, CL_INTENSITY ); + ADD_ENUM_NAME( m_cl_int, CL_LUMINANCE ); + ADD_ENUM_NAME( m_cl_int, CL_Rx ); + ADD_ENUM_NAME( m_cl_int, CL_RGx ); + ADD_ENUM_NAME( m_cl_int, CL_RGBx ); + ADD_ENUM_NAME( m_cl_int, CL_DEPTH ); + ADD_ENUM_NAME( m_cl_int, CL_DEPTH_STENCIL ); + ADD_ENUM_NAME( m_cl_int, CL_sRGB ); + ADD_ENUM_NAME( m_cl_int, CL_sRGBx ); + ADD_ENUM_NAME( m_cl_int, CL_sRGBA ); + ADD_ENUM_NAME( m_cl_int, CL_sBGRA ); + ADD_ENUM_NAME( m_cl_int, CL_ABGR ); + + /* cl_channel_type */ + ADD_ENUM_NAME( m_cl_int, CL_SNORM_INT8 ); + ADD_ENUM_NAME( m_cl_int, CL_SNORM_INT16 ); + ADD_ENUM_NAME( m_cl_int, CL_UNORM_INT8 ); + ADD_ENUM_NAME( m_cl_int, CL_UNORM_INT16 ); + ADD_ENUM_NAME( m_cl_int, CL_UNORM_SHORT_565 ); + ADD_ENUM_NAME( m_cl_int, CL_UNORM_SHORT_555 ); + ADD_ENUM_NAME( m_cl_int, CL_UNORM_INT_101010 ); + ADD_ENUM_NAME( m_cl_int, CL_SIGNED_INT8 ); + ADD_ENUM_NAME( m_cl_int, CL_SIGNED_INT16 ); + ADD_ENUM_NAME( m_cl_int, CL_SIGNED_INT32 ); + ADD_ENUM_NAME( m_cl_int, CL_UNSIGNED_INT8 ); + ADD_ENUM_NAME( m_cl_int, CL_UNSIGNED_INT16 ); + ADD_ENUM_NAME( m_cl_int, CL_UNSIGNED_INT32 ); + ADD_ENUM_NAME( m_cl_int, CL_HALF_FLOAT ); + ADD_ENUM_NAME( m_cl_int, CL_FLOAT ); + ADD_ENUM_NAME( m_cl_int, CL_UNORM_INT24 ); + ADD_ENUM_NAME( m_cl_int, CL_UNORM_INT_101010_2 ); + + /* cl_mem_object_type */ + ADD_ENUM_NAME( m_cl_int, CL_MEM_OBJECT_BUFFER ); + ADD_ENUM_NAME( m_cl_int, CL_MEM_OBJECT_IMAGE2D ); + ADD_ENUM_NAME( m_cl_int, CL_MEM_OBJECT_IMAGE3D ); + ADD_ENUM_NAME( m_cl_int, CL_MEM_OBJECT_IMAGE2D_ARRAY ); + ADD_ENUM_NAME( m_cl_int, CL_MEM_OBJECT_IMAGE1D ); + ADD_ENUM_NAME( m_cl_int, CL_MEM_OBJECT_IMAGE1D_ARRAY ); + ADD_ENUM_NAME( m_cl_int, CL_MEM_OBJECT_IMAGE1D_BUFFER ); + ADD_ENUM_NAME( m_cl_int, CL_MEM_OBJECT_PIPE ); + + /* cl_mem_info */ + ADD_ENUM_NAME( m_cl_int, CL_MEM_TYPE ); + ADD_ENUM_NAME( m_cl_int, CL_MEM_FLAGS ); + ADD_ENUM_NAME( m_cl_int, CL_MEM_SIZE ); + ADD_ENUM_NAME( m_cl_int, CL_MEM_HOST_PTR ); + ADD_ENUM_NAME( m_cl_int, CL_MEM_MAP_COUNT ); + ADD_ENUM_NAME( m_cl_int, CL_MEM_REFERENCE_COUNT ); + ADD_ENUM_NAME( m_cl_int, CL_MEM_CONTEXT ); + ADD_ENUM_NAME( m_cl_int, CL_MEM_ASSOCIATED_MEMOBJECT ); + ADD_ENUM_NAME( m_cl_int, CL_MEM_OFFSET ); + ADD_ENUM_NAME( m_cl_int, CL_MEM_USES_SVM_POINTER ); + + /* cl_image_info */ + ADD_ENUM_NAME( m_cl_int, CL_IMAGE_FORMAT ); + ADD_ENUM_NAME( m_cl_int, CL_IMAGE_ELEMENT_SIZE ); + ADD_ENUM_NAME( m_cl_int, CL_IMAGE_ROW_PITCH ); + ADD_ENUM_NAME( m_cl_int, CL_IMAGE_SLICE_PITCH ); + ADD_ENUM_NAME( m_cl_int, CL_IMAGE_WIDTH ); + ADD_ENUM_NAME( m_cl_int, CL_IMAGE_HEIGHT ); + ADD_ENUM_NAME( m_cl_int, CL_IMAGE_DEPTH ); + ADD_ENUM_NAME( m_cl_int, CL_IMAGE_ARRAY_SIZE ); + ADD_ENUM_NAME( m_cl_int, CL_IMAGE_BUFFER ); + ADD_ENUM_NAME( m_cl_int, CL_IMAGE_NUM_MIP_LEVELS ); + ADD_ENUM_NAME( m_cl_int, CL_IMAGE_NUM_SAMPLES ); + + /* cl_pipe_info */ + ADD_ENUM_NAME( m_cl_int, CL_PIPE_PACKET_SIZE ); + ADD_ENUM_NAME( m_cl_int, CL_PIPE_MAX_PACKETS ); + + /* cl_addressing_mode */ + ADD_ENUM_NAME( m_cl_int, CL_ADDRESS_NONE ); + ADD_ENUM_NAME( m_cl_int, CL_ADDRESS_CLAMP_TO_EDGE ); + ADD_ENUM_NAME( m_cl_int, CL_ADDRESS_CLAMP ); + ADD_ENUM_NAME( m_cl_int, CL_ADDRESS_REPEAT ); + ADD_ENUM_NAME( m_cl_int, CL_ADDRESS_MIRRORED_REPEAT ); + + /* cl_filter_mode */ + ADD_ENUM_NAME( m_cl_int, CL_FILTER_NEAREST ); + ADD_ENUM_NAME( m_cl_int, CL_FILTER_LINEAR ); + + /* cl_sampler_info */ + ADD_ENUM_NAME( m_cl_int, CL_SAMPLER_REFERENCE_COUNT ); + ADD_ENUM_NAME( m_cl_int, CL_SAMPLER_CONTEXT ); + ADD_ENUM_NAME( m_cl_int, CL_SAMPLER_NORMALIZED_COORDS ); + ADD_ENUM_NAME( m_cl_int, CL_SAMPLER_ADDRESSING_MODE ); + ADD_ENUM_NAME( m_cl_int, CL_SAMPLER_FILTER_MODE ); + ADD_ENUM_NAME( m_cl_int, CL_SAMPLER_MIP_FILTER_MODE ); + ADD_ENUM_NAME( m_cl_int, CL_SAMPLER_LOD_MIN ); + ADD_ENUM_NAME( m_cl_int, CL_SAMPLER_LOD_MAX ); + + /* cl_map_flags - bitfield */ + ADD_ENUM_NAME( m_cl_map_flags, CL_MAP_READ ); + ADD_ENUM_NAME( m_cl_map_flags, CL_MAP_WRITE ); + ADD_ENUM_NAME( m_cl_map_flags, CL_MAP_WRITE_INVALIDATE_REGION ); + + /* cl_program_info */ + ADD_ENUM_NAME( m_cl_int, CL_PROGRAM_REFERENCE_COUNT ); + ADD_ENUM_NAME( m_cl_int, CL_PROGRAM_CONTEXT ); + ADD_ENUM_NAME( m_cl_int, CL_PROGRAM_NUM_DEVICES ); + ADD_ENUM_NAME( m_cl_int, CL_PROGRAM_DEVICES ); + ADD_ENUM_NAME( m_cl_int, CL_PROGRAM_SOURCE ); + ADD_ENUM_NAME( m_cl_int, CL_PROGRAM_BINARY_SIZES ); + ADD_ENUM_NAME( m_cl_int, CL_PROGRAM_BINARIES ); + ADD_ENUM_NAME( m_cl_int, CL_PROGRAM_NUM_KERNELS ); + ADD_ENUM_NAME( m_cl_int, CL_PROGRAM_KERNEL_NAMES ); + ADD_ENUM_NAME( m_cl_int, CL_PROGRAM_IL ); + ADD_ENUM_NAME( m_cl_int, CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT ); + ADD_ENUM_NAME( m_cl_int, CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT ); + + /* cl_program_build_info */ + ADD_ENUM_NAME( m_cl_int, CL_PROGRAM_BUILD_STATUS ); + ADD_ENUM_NAME( m_cl_int, CL_PROGRAM_BUILD_OPTIONS ); + ADD_ENUM_NAME( m_cl_int, CL_PROGRAM_BUILD_LOG ); + ADD_ENUM_NAME( m_cl_int, CL_PROGRAM_BINARY_TYPE ); + ADD_ENUM_NAME( m_cl_int, CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE ); + + /* cl_program_binary_type */ + ADD_ENUM_NAME( m_cl_program_binary_type, CL_PROGRAM_BINARY_TYPE_NONE ); + ADD_ENUM_NAME( m_cl_program_binary_type, CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT ); + ADD_ENUM_NAME( m_cl_program_binary_type, CL_PROGRAM_BINARY_TYPE_LIBRARY ); + ADD_ENUM_NAME( m_cl_program_binary_type, CL_PROGRAM_BINARY_TYPE_EXECUTABLE ); + + /* cl_build_status */ + ADD_ENUM_NAME( m_cl_build_status, CL_BUILD_SUCCESS ); + ADD_ENUM_NAME( m_cl_build_status, CL_BUILD_NONE ); + ADD_ENUM_NAME( m_cl_build_status, CL_BUILD_ERROR ); + ADD_ENUM_NAME( m_cl_build_status, CL_BUILD_IN_PROGRESS ); + + /* cl_kernel_info */ + ADD_ENUM_NAME( m_cl_int, CL_KERNEL_FUNCTION_NAME ); + ADD_ENUM_NAME( m_cl_int, CL_KERNEL_NUM_ARGS ); + ADD_ENUM_NAME( m_cl_int, CL_KERNEL_REFERENCE_COUNT ); + ADD_ENUM_NAME( m_cl_int, CL_KERNEL_CONTEXT ); + ADD_ENUM_NAME( m_cl_int, CL_KERNEL_PROGRAM ); + ADD_ENUM_NAME( m_cl_int, CL_KERNEL_ATTRIBUTES ); + ADD_ENUM_NAME( m_cl_int, CL_KERNEL_MAX_NUM_SUB_GROUPS ); + ADD_ENUM_NAME( m_cl_int, CL_KERNEL_COMPILE_NUM_SUB_GROUPS ); + + /* cl_kernel_arg_info */ + ADD_ENUM_NAME( m_cl_int, CL_KERNEL_ARG_ADDRESS_QUALIFIER ); + ADD_ENUM_NAME( m_cl_int, CL_KERNEL_ARG_ACCESS_QUALIFIER ); + ADD_ENUM_NAME( m_cl_int, CL_KERNEL_ARG_TYPE_NAME ); + ADD_ENUM_NAME( m_cl_int, CL_KERNEL_ARG_TYPE_QUALIFIER ); + ADD_ENUM_NAME( m_cl_int, CL_KERNEL_ARG_NAME ); + + /* cl_kernel_arg_address_qualifier */ + ADD_ENUM_NAME( m_cl_int, CL_KERNEL_ARG_ADDRESS_GLOBAL ); + ADD_ENUM_NAME( m_cl_int, CL_KERNEL_ARG_ADDRESS_LOCAL ); + ADD_ENUM_NAME( m_cl_int, CL_KERNEL_ARG_ADDRESS_CONSTANT ); + ADD_ENUM_NAME( m_cl_int, CL_KERNEL_ARG_ADDRESS_PRIVATE ); + + /* cl_kernel_arg_access_qualifier */ + ADD_ENUM_NAME( m_cl_int, CL_KERNEL_ARG_ACCESS_READ_ONLY ); + ADD_ENUM_NAME( m_cl_int, CL_KERNEL_ARG_ACCESS_WRITE_ONLY ); + ADD_ENUM_NAME( m_cl_int, CL_KERNEL_ARG_ACCESS_READ_WRITE ); + ADD_ENUM_NAME( m_cl_int, CL_KERNEL_ARG_ACCESS_NONE ); + + /* cl_kernel_arg_type_qualifer */ + ADD_ENUM_NAME( m_cl_kernel_arg_type_qualifier, CL_KERNEL_ARG_TYPE_NONE ); + ADD_ENUM_NAME( m_cl_kernel_arg_type_qualifier, CL_KERNEL_ARG_TYPE_CONST ); + ADD_ENUM_NAME( m_cl_kernel_arg_type_qualifier, CL_KERNEL_ARG_TYPE_RESTRICT ); + ADD_ENUM_NAME( m_cl_kernel_arg_type_qualifier, CL_KERNEL_ARG_TYPE_VOLATILE ); + ADD_ENUM_NAME( m_cl_kernel_arg_type_qualifier, CL_KERNEL_ARG_TYPE_PIPE ); + + /* cl_kernel_work_group_info */ + ADD_ENUM_NAME( m_cl_int, CL_KERNEL_WORK_GROUP_SIZE ); + ADD_ENUM_NAME( m_cl_int, CL_KERNEL_COMPILE_WORK_GROUP_SIZE ); + ADD_ENUM_NAME( m_cl_int, CL_KERNEL_LOCAL_MEM_SIZE ); + ADD_ENUM_NAME( m_cl_int, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE ); + ADD_ENUM_NAME( m_cl_int, CL_KERNEL_PRIVATE_MEM_SIZE ); + ADD_ENUM_NAME( m_cl_int, CL_KERNEL_GLOBAL_WORK_SIZE ); + + /* cl_kernel_sub_group_info */ + ADD_ENUM_NAME( m_cl_int, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE ); + ADD_ENUM_NAME( m_cl_int, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE ); + ADD_ENUM_NAME( m_cl_int, CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT ); + + /* cl_kernel_exec_info */ + ADD_ENUM_NAME( m_cl_int, CL_KERNEL_EXEC_INFO_SVM_PTRS ); + ADD_ENUM_NAME( m_cl_int, CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM ); + + /* cl_event_info */ + ADD_ENUM_NAME( m_cl_int, CL_EVENT_COMMAND_QUEUE ); + ADD_ENUM_NAME( m_cl_int, CL_EVENT_COMMAND_TYPE ); + ADD_ENUM_NAME( m_cl_int, CL_EVENT_REFERENCE_COUNT ); + ADD_ENUM_NAME( m_cl_int, CL_EVENT_COMMAND_EXECUTION_STATUS ); + ADD_ENUM_NAME( m_cl_int, CL_EVENT_CONTEXT ); + + /* cl_command_type */ + ADD_ENUM_NAME( m_cl_int, CL_COMMAND_NDRANGE_KERNEL ); + ADD_ENUM_NAME( m_cl_int, CL_COMMAND_TASK ); + ADD_ENUM_NAME( m_cl_int, CL_COMMAND_NATIVE_KERNEL ); + ADD_ENUM_NAME( m_cl_int, CL_COMMAND_READ_BUFFER ); + ADD_ENUM_NAME( m_cl_int, CL_COMMAND_WRITE_BUFFER ); + ADD_ENUM_NAME( m_cl_int, CL_COMMAND_COPY_BUFFER ); + ADD_ENUM_NAME( m_cl_int, CL_COMMAND_READ_IMAGE ); + ADD_ENUM_NAME( m_cl_int, CL_COMMAND_WRITE_IMAGE ); + ADD_ENUM_NAME( m_cl_int, CL_COMMAND_COPY_IMAGE ); + ADD_ENUM_NAME( m_cl_int, CL_COMMAND_COPY_IMAGE_TO_BUFFER ); + ADD_ENUM_NAME( m_cl_int, CL_COMMAND_COPY_BUFFER_TO_IMAGE ); + ADD_ENUM_NAME( m_cl_int, CL_COMMAND_MAP_BUFFER ); + ADD_ENUM_NAME( m_cl_int, CL_COMMAND_MAP_IMAGE ); + ADD_ENUM_NAME( m_cl_int, CL_COMMAND_UNMAP_MEM_OBJECT ); + ADD_ENUM_NAME( m_cl_int, CL_COMMAND_MARKER ); + ADD_ENUM_NAME( m_cl_int, CL_COMMAND_ACQUIRE_GL_OBJECTS ); + ADD_ENUM_NAME( m_cl_int, CL_COMMAND_RELEASE_GL_OBJECTS ); + ADD_ENUM_NAME( m_cl_int, CL_COMMAND_READ_BUFFER_RECT ); + ADD_ENUM_NAME( m_cl_int, CL_COMMAND_WRITE_BUFFER_RECT ); + ADD_ENUM_NAME( m_cl_int, CL_COMMAND_COPY_BUFFER_RECT ); + ADD_ENUM_NAME( m_cl_int, CL_COMMAND_USER ); + ADD_ENUM_NAME( m_cl_int, CL_COMMAND_BARRIER ); + ADD_ENUM_NAME( m_cl_int, CL_COMMAND_MIGRATE_MEM_OBJECTS ); + ADD_ENUM_NAME( m_cl_int, CL_COMMAND_FILL_BUFFER ); + ADD_ENUM_NAME( m_cl_int, CL_COMMAND_FILL_IMAGE ); + ADD_ENUM_NAME( m_cl_int, CL_COMMAND_SVM_FREE ); + ADD_ENUM_NAME( m_cl_int, CL_COMMAND_SVM_MEMCPY ); + ADD_ENUM_NAME( m_cl_int, CL_COMMAND_SVM_MEMFILL ); + ADD_ENUM_NAME( m_cl_int, CL_COMMAND_SVM_MAP ); + ADD_ENUM_NAME( m_cl_int, CL_COMMAND_SVM_UNMAP ); + + /* command execution status */ + ADD_ENUM_NAME( m_cl_command_exec_status, CL_COMPLETE ); + ADD_ENUM_NAME( m_cl_command_exec_status, CL_RUNNING ); + ADD_ENUM_NAME( m_cl_command_exec_status, CL_SUBMITTED ); + ADD_ENUM_NAME( m_cl_command_exec_status, CL_QUEUED ); + + /* cl_buffer_create_type */ + ADD_ENUM_NAME( m_cl_int, CL_BUFFER_CREATE_TYPE_REGION ); + + /* cl_profiling_info */ + ADD_ENUM_NAME( m_cl_int, CL_PROFILING_COMMAND_QUEUED ); + ADD_ENUM_NAME( m_cl_int, CL_PROFILING_COMMAND_SUBMIT ); + ADD_ENUM_NAME( m_cl_int, CL_PROFILING_COMMAND_START ); + ADD_ENUM_NAME( m_cl_int, CL_PROFILING_COMMAND_END ); + ADD_ENUM_NAME( m_cl_int, CL_PROFILING_COMMAND_COMPLETE ); + + /* cl_svm_mem_flags */ + ADD_ENUM_NAME( m_cl_svm_mem_flags, CL_MEM_READ_WRITE ); + ADD_ENUM_NAME( m_cl_svm_mem_flags, CL_MEM_WRITE_ONLY ); + ADD_ENUM_NAME( m_cl_svm_mem_flags, CL_MEM_READ_ONLY ); + ADD_ENUM_NAME( m_cl_svm_mem_flags, CL_MEM_SVM_FINE_GRAIN_BUFFER ); + ADD_ENUM_NAME( m_cl_svm_mem_flags, CL_MEM_SVM_ATOMICS ); + + // Intel Extensions + + // Unofficial kernel profiling extension: + ADD_ENUM_NAME( m_cl_int, CL_CONTEXT_KERNEL_PROFILING_MODES_COUNT_INTEL ); + ADD_ENUM_NAME( m_cl_int, CL_CONTEXT_KERNEL_PROFILING_MODE_INFO_INTEL ); + ADD_ENUM_NAME( m_cl_int, CL_KERNEL_IL_SYMBOLS_INTEL ); + ADD_ENUM_NAME( m_cl_int, CL_KERNEL_BINARY_PROGRAM_INTEL ); + + // Unofficial extension (for now) for VTune Debug Info: + ADD_ENUM_NAME( m_cl_int, CL_PROGRAM_DEBUG_INFO_INTEL ); + ADD_ENUM_NAME( m_cl_int, CL_PROGRAM_DEBUG_INFO_SIZES_INTEL ); + ADD_ENUM_NAME( m_cl_int, CL_KERNEL_BINARIES_INTEL ); + ADD_ENUM_NAME( m_cl_int, CL_KERNEL_BINARY_SIZES_INTEL ); + + // VME and VA + + // clGetDeviceInfo + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_ME_VERSION_INTEL ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_TRANSFORM_MASK_MAX_WIDTH_INTEL ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_TRANSFORM_MASK_MAX_HEIGHT_INTEL ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_TRANSFORM_FILTER_MAX_WIDTH_INTEL ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_TRANSFORM_FILTER_MAX_HEIGHT_INTEL ); + + // Error Codes + ADD_ENUM_NAME( m_cl_int, CL_INVALID_ACCELERATOR_INTEL ); + ADD_ENUM_NAME( m_cl_int, CL_INVALID_ACCELERATOR_TYPE_INTEL ); + ADD_ENUM_NAME( m_cl_int, CL_INVALID_ACCELERATOR_DESC_INTEL ); + ADD_ENUM_NAME( m_cl_int, CL_ACCELERATOR_TYPE_NOT_SUPPORTED_INTEL ); + + // cl_accelerator_type_intel + //CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL 0x0 + + // cl_accelerator_info_intel + ADD_ENUM_NAME( m_cl_int, CL_ACCELERATOR_DESCRIPTOR_INTEL ); + ADD_ENUM_NAME( m_cl_int, CL_ACCELERATOR_REFERENCE_COUNT_INTEL ); + ADD_ENUM_NAME( m_cl_int, CL_ACCELERATOR_CONTEXT_INTEL ); + ADD_ENUM_NAME( m_cl_int, CL_ACCELERATOR_TYPE_INTEL ); + + // cl_motion_detect_desc_intel flags + //CL_ME_MB_TYPE_16x16_INTEL 0x0 + //CL_ME_MB_TYPE_8x8_INTEL 0x1 + //CL_ME_MB_TYPE_4x4_INTEL 0x2 + + //CL_ME_SUBPIXEL_MODE_INTEGER_INTEL 0x0 + //CL_ME_SUBPIXEL_MODE_HPEL_INTEL 0x1 + //CL_ME_SUBPIXEL_MODE_QPEL_INTEL 0x2 + + //CL_ME_SAD_ADJUST_MODE_NONE_INTEL 0x0 + //CL_ME_SAD_ADJUST_MODE_HAAR_INTEL 0x1 + + //CL_ME_SEARCH_PATH_RADIUS_2_2_INTEL 0x0 + //CL_ME_SEARCH_PATH_RADIUS_4_4_INTEL 0x1 + //CL_ME_SEARCH_PATH_RADIUS_16_12_INTEL 0x5 + + //CL_ME_CHROMA_INTRA_PREDICT_ENABLED_INTEL 0x1 + //CL_ME_LUMA_INTRA_PREDICT_ENABLED_INTEL 0x2 + + //CL_ME_COST_PENALTY_NONE_INTEL 0x0 + //CL_ME_COST_PENALTY_LOW_INTEL 0x1 + //CL_ME_COST_PENALTY_NORMAL_INTEL 0x2 + //CL_ME_COST_PENALTY_HIGH_INTEL 0x3 + + //CL_ME_COST_PRECISION_QPEL_INTEL 0x0 + //CL_ME_COST_PRECISION_HPEL_INTEL 0x1 + //CL_ME_COST_PRECISION_PEL_INTEL 0x2 + //CL_ME_COST_PRECISION_DPEL_INTEL 0x3 + + //CL_ME_VERSION_LEGACY_INTEL 0x0 + //CL_ME_VERSION_ADVANCED_VER_1_INTEL 0x1 + + // cl_intel_egl_image_yuv + ADD_ENUM_NAME( m_cl_int, CL_EGL_YUV_PLANE_INTEL ); + + // cl_intel_simultaneous_sharing + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL ); + + // cl_intel_thread_local_exec + ADD_ENUM_NAME( m_cl_command_queue_properties, CL_QUEUE_THREAD_LOCAL_EXEC_ENABLE_INTEL ); + + // cl_intel_va_api_media_sharing + + ADD_ENUM_NAME( m_cl_int, CL_VA_API_DISPLAY_INTEL ); + ADD_ENUM_NAME( m_cl_int, CL_PREFERRED_DEVICES_FOR_VA_API_INTEL ); + ADD_ENUM_NAME( m_cl_int, CL_ALL_DEVICES_FOR_VA_API_INTEL ); + ADD_ENUM_NAME( m_cl_int, CL_CONTEXT_VA_API_DISPLAY_INTEL ); + ADD_ENUM_NAME( m_cl_int, CL_MEM_VA_API_SURFACE_INTEL ); + ADD_ENUM_NAME( m_cl_int, CL_IMAGE_VA_API_PLANE_INTEL ); + ADD_ENUM_NAME( m_cl_int, CL_COMMAND_ACQUIRE_VA_API_MEDIA_SURFACES_INTEL ); + ADD_ENUM_NAME( m_cl_int, CL_COMMAND_RELEASE_VA_API_MEDIA_SURFACES_INTEL ); + + // Error Codes + ADD_ENUM_NAME( m_cl_int, CL_INVALID_VA_API_MEDIA_ADAPTER_INTEL ); + ADD_ENUM_NAME( m_cl_int, CL_INVALID_VA_API_MEDIA_SURFACE_INTEL ); + ADD_ENUM_NAME( m_cl_int, CL_VA_API_MEDIA_SURFACE_ALREADY_ACQUIRED_INTEL ); + ADD_ENUM_NAME( m_cl_int, CL_VA_API_MEDIA_SURFACE_NOT_ACQUIRED_INTEL ); + + // cl_intel_packed_yuv + ADD_ENUM_NAME( m_cl_int, CL_YUYV_INTEL ); + ADD_ENUM_NAME( m_cl_int, CL_UYVY_INTEL ); + ADD_ENUM_NAME( m_cl_int, CL_YVYU_INTEL ); + ADD_ENUM_NAME( m_cl_int, CL_VYUY_INTEL ); + + // cl_intel_planar_yuv + + ADD_ENUM_NAME( m_cl_int, CL_NV12_INTEL ); + + ADD_ENUM_NAME( m_cl_mem_flags, CL_MEM_NO_ACCESS_INTEL ); + ADD_ENUM_NAME( m_cl_mem_flags, CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL ); + + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_PLANAR_YUV_MAX_WIDTH_INTEL ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_PLANAR_YUV_MAX_HEIGHT_INTEL ); + + // cl_intel_required_subgroup_size + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_SUB_GROUP_SIZES_INTEL ); + ADD_ENUM_NAME( m_cl_int, CL_KERNEL_SPILL_MEM_SIZE_INTEL ); + ADD_ENUM_NAME( m_cl_int, CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL ); + + // cl_intel_driver_diagnostics + ADD_ENUM_NAME( m_cl_int, CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL ); + + // cl_intelx_video_enhancement + // This is the base-functionality VEBox extension. + // Note: These are preview enum names and values! + + // cl_device_info + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_VE_VERSION_INTEL ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_VE_ENGINE_COUNT_INTEL ); + + // cl_queue_properties - TBD: is this a general purpose enum or a bit? + ADD_ENUM_NAME( m_cl_int, CL_QUEUE_VE_ENABLE_INTEL ); + + // attribute_ids for cl_vebox_attrib_desc_intel + ADD_ENUM_NAME( m_cl_int, CL_VE_ACCELERATOR_ATTRIB_DENOISE_INTEL ); + ADD_ENUM_NAME( m_cl_int, CL_VE_ACCELERATOR_ATTRIB_DEINTERLACE_INTEL ); + ADD_ENUM_NAME( m_cl_int, CL_VE_ACCELERATOR_ATTRIB_HOT_PIXEL_CORR_INTEL ); + + // cl_accelerator_info_intel + ADD_ENUM_NAME( m_cl_int, CL_VE_ACCELERATOR_HISTOGRAMS_INTEL ); + ADD_ENUM_NAME( m_cl_int, CL_VE_ACCELERATOR_STATISTICS_INTEL ); + ADD_ENUM_NAME( m_cl_int, CL_VE_ACCELERATOR_STMM_INPUT_INTEL ); + ADD_ENUM_NAME( m_cl_int, CL_VE_ACCELERATOR_STMM_OUTPUT_INTEL ); + + // cl_intelx_ve_color_pipeline + // Note: These are preview enum names and values! + + // cl_device_info + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_VE_COLOR_PIPE_VERSION_INTEL ); + + // attribute_ids for cl_vebox_attrib_desc_intel + ADD_ENUM_NAME( m_cl_int, CL_VE_ACCELERATOR_ATTRIB_STD_STE_INTEL ); + ADD_ENUM_NAME( m_cl_int, CL_VE_ACCELERATOR_ATTRIB_GAMUT_COMP_INTEL ); + ADD_ENUM_NAME( m_cl_int, CL_VE_ACCELERATOR_ATTRIB_GECC_INTEL ); + ADD_ENUM_NAME( m_cl_int, CL_VE_ACCELERATOR_ATTRIB_ACE_INTEL ); + ADD_ENUM_NAME( m_cl_int, CL_VE_ACCELERATOR_ATTRIB_ACE_ADV_INTEL ); + ADD_ENUM_NAME( m_cl_int, CL_VE_ACCELERATOR_ATTRIB_TCC_INTEL ); + ADD_ENUM_NAME( m_cl_int, CL_VE_ACCELERATOR_ATTRIB_PROC_AMP_INTEL ); + ADD_ENUM_NAME( m_cl_int, CL_VE_ACCELERATOR_ATTRIB_BACK_END_CSC_INTEL ); + ADD_ENUM_NAME( m_cl_int, CL_VE_ACCELERATOR_ATTRIB_AOI_ALPHA_INTEL ); + ADD_ENUM_NAME( m_cl_int, CL_VE_ACCELERATOR_ATTRIB_CCM_INTEL ); + ADD_ENUM_NAME( m_cl_int, CL_VE_ACCELERATOR_ATTRIB_FWD_GAMMA_CORRECT_INTEL ); + ADD_ENUM_NAME( m_cl_int, CL_VE_ACCELERATOR_ATTRIB_FRONT_END_CSC_INTEL ); + + // cl_intelx_ve_camera_pipeline + // Note, these are preview enum names and values! + + // cl_device_info + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_VE_CAMERA_PIPE_VERSION_INTEL ); + + // attribute_ids for cl_vebox_attrib_desc_intel + ADD_ENUM_NAME( m_cl_int, CL_VE_ACCELERATOR_ATTRIB_BLACK_LEVEL_CORR_INTEL ); + ADD_ENUM_NAME( m_cl_int, CL_VE_ACCELERATOR_ATTRIB_DEMOSAIC_INTEL ); + ADD_ENUM_NAME( m_cl_int, CL_VE_ACCELERATOR_ATTRIB_WHITE_BALANCE_CORR_INTEL ); + ADD_ENUM_NAME( m_cl_int, CL_VE_ACCELERATOR_ATTRIB_VIGNETTE_INTEL ); + + // HEVC PAK + // Note, this extension is still in development! + + // cl_device_info + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_PAK_VERSION_INTEL ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_PAK_AVAILABLE_CODECS_INTEL ); + + // cl_queue_properties / cl_command_queue_info + ADD_ENUM_NAME( m_cl_int, CL_QUEUE_PAK_ENABLE_INTEL ); + + // cl_accelerator_info_intel + ADD_ENUM_NAME( m_cl_int, CL_PAK_CTU_COUNT_INTEL ); + ADD_ENUM_NAME( m_cl_int, CL_PAK_CTU_WIDTH_INTEL ); + ADD_ENUM_NAME( m_cl_int, CL_PAK_CTU_HEIGHT_INTEL ); + ADD_ENUM_NAME( m_cl_int, CL_PAK_MAX_INTRA_DEPTH_INTEL ); + ADD_ENUM_NAME( m_cl_int, CL_PAK_MAX_INTER_DEPTH_INTEL ); + ADD_ENUM_NAME( m_cl_int, CL_PAK_NUM_CUS_PER_CTU_INTEL ); + ADD_ENUM_NAME( m_cl_int, CL_PAK_MV_BUFFER_SIZE_INTEL ); + + // Error Codes + // These are currently all mapped to CL_INVALID_VALUE. + // Need official error code assignment. + //ADD_ENUM_NAME( m_cl_int, CL_INVALID_PAK_CTU_SIZE_INTEL ); + //ADD_ENUM_NAME( m_cl_int, CL_INVALID_PAK_TU_SIZE_INTEL ); + //ADD_ENUM_NAME( m_cl_int, CL_INVALID_PAK_TU_INTRA_DEPTH_INTEL ); + //ADD_ENUM_NAME( m_cl_int, CL_INVALID_PAK_TU_INTER_DEPTH_INTEL ); + //ADD_ENUM_NAME( m_cl_int, CL_INVALID_PAK_BITRATE_RANGE_INTEL ); + //ADD_ENUM_NAME( m_cl_int, CL_INVALID_PAK_INSERTION_INTEL ); + //ADD_ENUM_NAME( m_cl_int, CL_INVALID_PAK_CTU_POSITION_INTEL ); + //ADD_ENUM_NAME( m_cl_int, CL_INVALID_PAK_REFERENCE_IMAGE_INDEX_INTEL ); + + // Altera Extensions: + + // cl_altera_device_temperature + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_CORE_TEMPERATURE_ALTERA ); + + // cl_altera_compiler_mode + ADD_ENUM_NAME( m_cl_int, CL_CONTEXT_COMPILER_MODE_ALTERA ); + ADD_ENUM_NAME( m_cl_int, CL_CONTEXT_PROGRAM_EXE_LIBRARY_ROOT_ALTERA ); + ADD_ENUM_NAME( m_cl_int, CL_CONTEXT_OFFLINE_DEVICE_ALTERA ); + + // These are enums from the Khronos cl_gl.h header file: + + // cl_gl_object_type + ADD_ENUM_NAME( m_cl_int, CL_GL_OBJECT_BUFFER ); + ADD_ENUM_NAME( m_cl_int, CL_GL_OBJECT_TEXTURE2D ); + ADD_ENUM_NAME( m_cl_int, CL_GL_OBJECT_TEXTURE3D ); + ADD_ENUM_NAME( m_cl_int, CL_GL_OBJECT_RENDERBUFFER ); + ADD_ENUM_NAME( m_cl_int, CL_GL_OBJECT_TEXTURE2D_ARRAY ); + ADD_ENUM_NAME( m_cl_int, CL_GL_OBJECT_TEXTURE1D ); + ADD_ENUM_NAME( m_cl_int, CL_GL_OBJECT_TEXTURE1D_ARRAY ); + ADD_ENUM_NAME( m_cl_int, CL_GL_OBJECT_TEXTURE_BUFFER ); + + // cl_gl_texture_info + ADD_ENUM_NAME( m_cl_int, CL_GL_TEXTURE_TARGET ); + ADD_ENUM_NAME( m_cl_int, CL_GL_MIPMAP_LEVEL ); + ADD_ENUM_NAME( m_cl_int, CL_GL_NUM_SAMPLES ); + + // Error Code + ADD_ENUM_NAME( m_cl_int, CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR ); + + // cl_gl_context_info + ADD_ENUM_NAME( m_cl_int, CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICES_FOR_GL_CONTEXT_KHR ); + + // cl_context_properties + ADD_ENUM_NAME( m_cl_int, CL_GL_CONTEXT_KHR ); + ADD_ENUM_NAME( m_cl_int, CL_EGL_DISPLAY_KHR ); + ADD_ENUM_NAME( m_cl_int, CL_GLX_DISPLAY_KHR ); + ADD_ENUM_NAME( m_cl_int, CL_WGL_HDC_KHR ); + ADD_ENUM_NAME( m_cl_int, CL_CGL_SHAREGROUP_KHR ); + + // These enums are from the Khronos cl_gl_ext.h header file: + + // cl_khr_gl_event + ADD_ENUM_NAME( m_cl_int, CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR ); + + // These are enums from the Khronos cl_ext.h header file: + + // cl_khr_il_program + // These enums are core in OpenCL 2.1. + //CL_DEVICE_IL_VERSION_KHR 0x105B + //CL_PROGRAM_IL_KHR 0x1169 + + // cl_khr_icd + ADD_ENUM_NAME( m_cl_int, CL_PLATFORM_ICD_SUFFIX_KHR ); + ADD_ENUM_NAME( m_cl_int, CL_PLATFORM_NOT_FOUND_KHR ); + + // cl_khr_initalize_memory + ADD_ENUM_NAME( m_cl_int, CL_CONTEXT_MEMORY_INITIALIZE_KHR ); + + // cl_khr_terminate_context + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_TERMINATE_CAPABILITY_KHR ); + ADD_ENUM_NAME( m_cl_int, CL_CONTEXT_TERMINATE_KHR ); + + // cl_khr_spir + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_SPIR_VERSIONS ); + ADD_ENUM_NAME( m_cl_int, CL_PROGRAM_BINARY_TYPE_INTERMEDIATE ); + + // cl_khr_subgroups + // These enums were promoted to core in OpenCL 2.1. + //CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR 0x2033 + //CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR 0x2034 + + // cl_nv_device_attribute_query + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_REGISTERS_PER_BLOCK_NV ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_WARP_SIZE_NV ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_GPU_OVERLAP_NV ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_INTEGRATED_MEMORY_NV ); + + // cl_ext_atomic_counters + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_MAX_ATOMIC_COUNTERS_EXT ); + + // cl_amd_device_attribute_query + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_PROFILING_TIMER_OFFSET_AMD ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_TOPOLOGY_AMD ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_BOARD_NAME_AMD ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_GLOBAL_FREE_MEMORY_AMD ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_SIMD_WIDTH_AMD ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_WAVEFRONT_WIDTH_AMD ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_LOCAL_MEM_BANKS_AMD ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_THREAD_TRACE_SUPPORTED_AMD ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_GFXIP_MAJOR_AMD ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_GFXIP_MINOR_AMD ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_AVAILABLE_ASYNC_QUEUES_AMD ); + + // cl_amd_offline_devices + ADD_ENUM_NAME( m_cl_int, CL_CONTEXT_OFFLINE_DEVICES_AMD ); + + // cl_ext_device_fission extension + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_PARTITION_EQUALLY_EXT ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_PARTITION_BY_COUNTS_EXT ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_PARTITION_BY_NAMES_EXT ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_PARENT_DEVICE_EXT ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_PARTITION_TYPES_EXT ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_AFFINITY_DOMAINS_EXT ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_REFERENCE_COUNT_EXT ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_PARTITION_STYLE_EXT ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_PARTITION_FAILED_EXT ); + ADD_ENUM_NAME( m_cl_int, CL_INVALID_PARTITION_COUNT_EXT ); + ADD_ENUM_NAME( m_cl_int, CL_INVALID_PARTITION_NAME_EXT ); + + // cl_qcom_ext_host_ptr extension + ADD_ENUM_NAME( m_cl_mem_flags, CL_MEM_EXT_HOST_PTR_QCOM ); + + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM ); + ADD_ENUM_NAME( m_cl_int, CL_DEVICE_PAGE_SIZE_QCOM ); + ADD_ENUM_NAME( m_cl_int, CL_IMAGE_ROW_ALIGNMENT_QCOM ); + ADD_ENUM_NAME( m_cl_int, CL_IMAGE_SLICE_ALIGNMENT_QCOM ); + ADD_ENUM_NAME( m_cl_int, CL_MEM_HOST_UNCACHED_QCOM ); + ADD_ENUM_NAME( m_cl_int, CL_MEM_HOST_WRITEBACK_QCOM ); + ADD_ENUM_NAME( m_cl_int, CL_MEM_HOST_WRITETHROUGH_QCOM ); + ADD_ENUM_NAME( m_cl_int, CL_MEM_HOST_WRITE_COMBINING_QCOM ); + + // cl_qcom_ion_host_ptr extension + ADD_ENUM_NAME( m_cl_int, CL_MEM_ION_HOST_PTR_QCOM ); + + // cl_arm_printf extension + ADD_ENUM_NAME( m_cl_int, CL_PRINTF_CALLBACK_ARM ); + ADD_ENUM_NAME( m_cl_int, CL_PRINTF_BUFFERSIZE_ARM ); + +#if !defined(__ANDROID__) && !defined(__APPLE__) + // gl texture targets + ADD_ENUM_NAME( m_GLenum, GL_TEXTURE_BUFFER ); + ADD_ENUM_NAME( m_GLenum, GL_TEXTURE_1D ); + ADD_ENUM_NAME( m_GLenum, GL_TEXTURE_1D_ARRAY ); + ADD_ENUM_NAME( m_GLenum, GL_TEXTURE_2D ); + ADD_ENUM_NAME( m_GLenum, GL_TEXTURE_2D_ARRAY ); + ADD_ENUM_NAME( m_GLenum, GL_TEXTURE_3D ); + ADD_ENUM_NAME( m_GLenum, GL_TEXTURE_CUBE_MAP_POSITIVE_X ); + ADD_ENUM_NAME( m_GLenum, GL_TEXTURE_CUBE_MAP_POSITIVE_Y ); + ADD_ENUM_NAME( m_GLenum, GL_TEXTURE_CUBE_MAP_POSITIVE_Z ); + ADD_ENUM_NAME( m_GLenum, GL_TEXTURE_CUBE_MAP_NEGATIVE_X ); + ADD_ENUM_NAME( m_GLenum, GL_TEXTURE_CUBE_MAP_NEGATIVE_Y ); + ADD_ENUM_NAME( m_GLenum, GL_TEXTURE_CUBE_MAP_NEGATIVE_Z ); + ADD_ENUM_NAME( m_GLenum, GL_TEXTURE_RECTANGLE ); + + // gl texture formats + ADD_ENUM_NAME( m_GLenum, GL_ALPHA ); + ADD_ENUM_NAME( m_GLenum, GL_RGB ); + ADD_ENUM_NAME( m_GLenum, GL_RGBA ); + ADD_ENUM_NAME( m_GLenum, GL_RGBA32F ); + ADD_ENUM_NAME( m_GLenum, GL_RGB32F ); + ADD_ENUM_NAME( m_GLenum, GL_RGBA16F ); + ADD_ENUM_NAME( m_GLenum, GL_RGB16F ); + ADD_ENUM_NAME( m_GLenum, GL_RGBA32UI ); + ADD_ENUM_NAME( m_GLenum, GL_RGB32UI ); + ADD_ENUM_NAME( m_GLenum, GL_RGBA16UI ); + ADD_ENUM_NAME( m_GLenum, GL_RGB16UI ); + ADD_ENUM_NAME( m_GLenum, GL_RGBA8UI ); + ADD_ENUM_NAME( m_GLenum, GL_RGB8UI ); + ADD_ENUM_NAME( m_GLenum, GL_RGBA32I ); + ADD_ENUM_NAME( m_GLenum, GL_RGB32I ); + ADD_ENUM_NAME( m_GLenum, GL_RGBA16I ); + ADD_ENUM_NAME( m_GLenum, GL_RGB16I ); + ADD_ENUM_NAME( m_GLenum, GL_RGBA8I ); + ADD_ENUM_NAME( m_GLenum, GL_RGB8I ); + ADD_ENUM_NAME( m_GLenum, GL_RG ); + ADD_ENUM_NAME( m_GLenum, GL_R8 ); + ADD_ENUM_NAME( m_GLenum, GL_R16 ); + ADD_ENUM_NAME( m_GLenum, GL_RG8 ); + ADD_ENUM_NAME( m_GLenum, GL_RG16 ); + ADD_ENUM_NAME( m_GLenum, GL_R16F ); + ADD_ENUM_NAME( m_GLenum, GL_R32F ); + ADD_ENUM_NAME( m_GLenum, GL_RG16F ); + ADD_ENUM_NAME( m_GLenum, GL_RG32F ); + ADD_ENUM_NAME( m_GLenum, GL_R8I ); + ADD_ENUM_NAME( m_GLenum, GL_R8UI ); + ADD_ENUM_NAME( m_GLenum, GL_R16I ); + ADD_ENUM_NAME( m_GLenum, GL_R16UI ); + ADD_ENUM_NAME( m_GLenum, GL_R32I ); + ADD_ENUM_NAME( m_GLenum, GL_R32UI ); + ADD_ENUM_NAME( m_GLenum, GL_RG8I ); + ADD_ENUM_NAME( m_GLenum, GL_RG8UI ); + ADD_ENUM_NAME( m_GLenum, GL_RG16I ); + ADD_ENUM_NAME( m_GLenum, GL_RG16UI ); + ADD_ENUM_NAME( m_GLenum, GL_RG32I ); + ADD_ENUM_NAME( m_GLenum, GL_RG32UI ); +#endif +} diff --git a/Src/enummap.h b/Src/enummap.h new file mode 100644 index 00000000..bd2af913 --- /dev/null +++ b/Src/enummap.h @@ -0,0 +1,129 @@ +/* +// Copyright (c) 2018 Intel Corporation +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +*/ + +#pragma once + +#include "common.h" +#include "cli_ext.h" + +#include +#include + +class CEnumNameMap +{ +public: + CEnumNameMap(); + + #define GENERATE_MAP_AND_FUNC( _name, _type ) \ + private: \ + std::map< _type, std::string > m_##_type; \ + public: \ + std::string _name( _type e ) const \ + { \ + std::map< _type, std::string >::const_iterator i = \ + m_##_type.find( e ); \ + if( i == m_##_type.end() ) \ + { \ + return "**UNKNOWN ENUM**"; \ + } \ + else \ + { \ + return (*i).second; \ + } \ + } + + #define GENERATE_MAP_AND_BITFIELD_FUNC( _name, _type ) \ + private: \ + std::map< _type, std::string > m_##_type; \ + public: \ + std::string _name( _type e ) const \ + { \ + std::string ret = ""; \ + int bit = 0; \ + std::map< _type, std::string >::const_iterator i = \ + m_##_type.find( e ); \ + if( i != m_##_type.end() ) \ + { \ + ret += (*i).second; \ + } \ + else \ + { \ + while( e != 0 ) \ + { \ + _type check = (_type)1 << bit; \ + if( e & check ) \ + { \ + i = m_##_type.find( check ); \ + if( ret.length() ) \ + { \ + ret += " | "; \ + } \ + if( i != m_##_type.end() ) \ + { \ + ret += (*i).second; \ + } \ + else \ + { \ + ret += ""; \ + } \ + e &= ~check; \ + } \ + ++bit; \ + } \ + } \ + return ret; \ + } + + // This type doesn't exist in CL.h, but the enums conflict with + // other regular old cl_int enums. + typedef cl_int cl_command_exec_status; + + // CL bitfield values and plain uints may collide and need their own map. + // GL enums need their own map. + // CL enums that are allocated from the Khronos registry are unique and + // can go into the main/default cl_int map. + GENERATE_MAP_AND_FUNC( name, cl_int ); + GENERATE_MAP_AND_BITFIELD_FUNC( name_bool, cl_bool ); + GENERATE_MAP_AND_FUNC( name_build_status, cl_build_status ); + GENERATE_MAP_AND_FUNC( name_command_exec_status, cl_command_exec_status ); + GENERATE_MAP_AND_BITFIELD_FUNC( name_command_queue_properties, cl_command_queue_properties ); + GENERATE_MAP_AND_BITFIELD_FUNC( name_device_affinity_domain, cl_device_affinity_domain ); + GENERATE_MAP_AND_BITFIELD_FUNC( name_device_exec_capabilities, cl_device_exec_capabilities ); + GENERATE_MAP_AND_BITFIELD_FUNC( name_device_fp_config, cl_device_fp_config ); + GENERATE_MAP_AND_FUNC( name_device_local_mem_type, cl_device_local_mem_type ); + GENERATE_MAP_AND_FUNC( name_device_mem_cache_type, cl_device_mem_cache_type ); + GENERATE_MAP_AND_BITFIELD_FUNC( name_device_svm_capabilities, cl_device_svm_capabilities ); + GENERATE_MAP_AND_BITFIELD_FUNC( name_device_type, cl_device_type ); + GENERATE_MAP_AND_BITFIELD_FUNC( name_kernel_arg_type_qualifier, cl_kernel_arg_type_qualifier ); + GENERATE_MAP_AND_BITFIELD_FUNC( name_map_flags, cl_map_flags ); + GENERATE_MAP_AND_BITFIELD_FUNC( name_mem_flags, cl_mem_flags ); + GENERATE_MAP_AND_BITFIELD_FUNC( name_mem_migration_flags, cl_mem_migration_flags ); + GENERATE_MAP_AND_FUNC( name_program_binary_type, cl_program_binary_type ); + GENERATE_MAP_AND_BITFIELD_FUNC( name_svm_mem_flags, cl_svm_mem_flags ); + GENERATE_MAP_AND_FUNC( name_gl, GLenum ); + + #undef GENERATE_MAP_AND_FUNC + #undef GENERATE_MAP_AND_BITFIELD_FUNC + +private: + DISALLOW_COPY_AND_ASSIGN( CEnumNameMap ); +}; diff --git a/Src/git_version.cpp.in b/Src/git_version.cpp.in new file mode 100644 index 00000000..02b06286 --- /dev/null +++ b/Src/git_version.cpp.in @@ -0,0 +1,27 @@ +/* +// Copyright (c) 2018 Intel Corporation +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +*/ + +#include "intercept.h" + +const char* CLIntercept::sc_GitDescribe = "@GIT_DESCRIBE@"; +const char* CLIntercept::sc_GitRefSpec = "@GIT_REFSPEC@"; +const char* CLIntercept::sc_GitHash = "@GIT_SHA1@"; \ No newline at end of file diff --git a/Src/git_version.rc.in b/Src/git_version.rc.in new file mode 100644 index 00000000..71bece49 --- /dev/null +++ b/Src/git_version.rc.in @@ -0,0 +1,64 @@ +/* +// Copyright (c) 2018 Intel Corporation +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +*/ + +// Note: git_version.rc2 is generated from git_version.rc.in. + +///////////////////////////////////////////////////////////////////////////// +// English (United States) resources + +#if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_ENU) + +VS_VERSION_INFO VERSIONINFO + FILEVERSION 2,2,0,0 + PRODUCTVERSION 2,2,0,0 + FILEFLAGSMASK 0x0L +#ifdef _DEBUG + FILEFLAGS 0x1L +#else + FILEFLAGS 0x0L +#endif + FILEOS 0x0L + FILETYPE 0x2L + FILESUBTYPE 0x0L +BEGIN + BLOCK "StringFileInfo" + BEGIN + BLOCK "040904b0" + BEGIN + VALUE "CompanyName", "Intel(R) Corporation" + VALUE "FileDescription", "Intercept Layer for OpenCL(tm) Applications" + VALUE "FileVersion", "@GIT_DESCRIBE@" + VALUE "InternalName", "CLIntercept" + VALUE "LegalCopyright", "Copyright(C) Intel Corporation 2018" + VALUE "OriginalFilename", "OpenCL.dll" + VALUE "ProductName", "Intercept Layer for OpenCL(tm) Applications" + VALUE "ProductVersion", "@GIT_DESCRIBE@" + END + END + BLOCK "VarFileInfo" + BEGIN + VALUE "Translation", 0x409, 1200 + END +END + +#endif // English (United States) resources +///////////////////////////////////////////////////////////////////////////// diff --git a/Src/instrumentation.h b/Src/instrumentation.h new file mode 100644 index 00000000..95bf8286 --- /dev/null +++ b/Src/instrumentation.h @@ -0,0 +1,175 @@ +/* +// Copyright (c) 2018 Intel Corporation +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +*/ + +#pragma once + +#if defined(USE_ITT) + +#include + +#define INTEL_ITTNOTIFY_API_PRIVATE +#include + +/*****************************************************************************\ +TASK METADATA: +\*****************************************************************************/ +template +struct MapToITTType { enum { value = __itt_metadata_unknown }; }; + +template<> +struct MapToITTType { enum { value = __itt_metadata_u64 }; }; +template<> +struct MapToITTType { enum { value = __itt_metadata_s64 }; }; +template<> +struct MapToITTType { enum { value = __itt_metadata_u32 }; }; +template<> +struct MapToITTType { enum { value = __itt_metadata_s32 }; }; +template<> +struct MapToITTType { enum { value = __itt_metadata_u16 }; }; +template<> +struct MapToITTType { enum { value = __itt_metadata_s16 }; }; +template<> +struct MapToITTType { enum { value = __itt_metadata_float }; }; +template<> +struct MapToITTType { enum { value = __itt_metadata_double }; }; + +// TODO: Is there a standard preprocessor define that can tell us pointer size? +#if defined(_WIN64) || defined(__LP64__) + +CLI_C_ASSERT( sizeof(void*) == 8 ); +template +struct MapToITTType { enum { value = __itt_metadata_u64 }; }; + +#else + +CLI_C_ASSERT( sizeof(void*) == 4 ); +template +struct MapToITTType { enum { value = __itt_metadata_u32 }; }; + +#endif + +template +inline void add_task_metadata( + __itt_domain* domain, + const std::string& name, + const T value ) +{ + __itt_string_handle* itt_string_handle = __itt_string_handle_create(name.c_str()); + __itt_metadata_type metadataType = (__itt_metadata_type)MapToITTType::value; + + __itt_metadata_add_with_scope(domain, __itt_scope_task, itt_string_handle, metadataType, 1, (void*)&value); +} + +template<> +inline void add_task_metadata( + __itt_domain* domain, + const std::string& name, + const cl_image_format* value ) +{ + if( value ) + { + std::string fieldName; + + fieldName = name + ".image_channel_data_type"; + add_task_metadata(domain, fieldName.c_str(), value->image_channel_data_type); + + fieldName = name + ".image_channel_order"; + add_task_metadata(domain, fieldName.c_str(), value->image_channel_order); + } +} + +template +inline void add_task_metadata_array( + __itt_domain* domain, + const std::string& name, + const size_t count, + const T* values ) +{ + if( values ) + { + __itt_string_handle* itt_string_handle = __itt_string_handle_create(name.c_str()); + __itt_metadata_type metadataType = (__itt_metadata_type)MapToITTType::value; + + __itt_metadata_add_with_scope(domain, __itt_scope_task, itt_string_handle, metadataType, count, (void*)values); + } +} + +#define ITT_CALL_LOGGING_ENTER(_kernel) \ + if( pIntercept->config().ITTCallLogging ) \ + { \ + pIntercept->ittInit(); \ + pIntercept->ittCallLoggingEnter( __FUNCTION__, _kernel ); \ + } + +#define ITT_CALL_LOGGING_EXIT() \ + if( pIntercept->config().ITTCallLogging ) \ + { \ + pIntercept->ittInit(); \ + pIntercept->ittCallLoggingExit(); \ + } + +#define ITT_ADD_PARAM_AS_METADATA(_param) \ + if( pIntercept->config().ITTCallLogging ) \ + { \ + pIntercept->ittInit(); \ + __itt_domain* itt_domain = pIntercept->ittDomain(); \ + add_task_metadata( itt_domain, #_param, _param ); \ + } + +#define ITT_ADD_ARRAY_PARAM_AS_METADATA(_count, _param) \ + if( pIntercept->config().ITTCallLogging ) \ + { \ + pIntercept->ittInit(); \ + __itt_domain* itt_domain = pIntercept->ittDomain(); \ + add_task_metadata_array( itt_domain, #_param, _count, _param ); \ + } + +#define ITT_REGISTER_COMMAND_QUEUE(_queue, _perfCounters) \ + if( pIntercept->config().ITTPerformanceTiming ) \ + { \ + pIntercept->ittInit(); \ + pIntercept->ittRegisterCommandQueue( _queue, _perfCounters ); \ + } + +#define ITT_RELEASE_COMMAND_QUEUE(_queue) \ + if( pIntercept->config().ITTPerformanceTiming ) \ + { \ + pIntercept->ittInit(); \ + pIntercept->ittReleaseCommandQueue( _queue ); \ + } + +#else + +#define ITT_CALL_LOGGING_ENTER(_kernel) +#define ITT_CALL_LOGGING_EXIT() +#define ITT_ADD_PARAM_AS_METADATA(_param) +#define ITT_ADD_ARRAY_PARAM_AS_METADATA(_count, _param) +#define ITT_REGISTER_COMMAND_QUEUE(_queue, _perfCounters) +#define ITT_RELEASE_COMMAND_QUEUE(_queue) + +#endif + +#define CHROME_REGISTER_COMMAND_QUEUE(_queue) \ + if( pIntercept->config().ChromePerformanceTiming ) \ + { \ + pIntercept->chromeRegisterCommandQueue( _queue ); \ + } diff --git a/Src/intercept.cpp b/Src/intercept.cpp new file mode 100644 index 00000000..1e39b4a9 --- /dev/null +++ b/Src/intercept.cpp @@ -0,0 +1,10177 @@ +/* +// Copyright (c) 2018 Intel Corporation +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +*/ + +#include +#include +#include +#include +#include +#include +#include +#include // strdate + +#include "common.h" +#include "intercept.h" + +/*****************************************************************************\ + +Inline Function: + Hash + +Description: + Calculates hash from sequence of 32-bit values. + + Jenkins 96-bit mixing function with 32-bit feedback-loop and 64-bit state. + + All magic values are DWORDs of SHA2-256 mixing data: + 0x428a2f98 0x71374491 0xb5c0fbcf 0xe9b5dba5 + 0x3956c25b 0x59f111f1 0x923f82a4 0xab1c5ed5 + + From: http://www.burtleburtle.net/bob/c/lookup2.c + + lookup2.c, by Bob Jenkins, December 1996, Public Domain. + hash(), hash2(), hash3, and mix() are externally useful functions. + Routines to test the hash are included if SELF_TEST is defined. + You can use this free for any purpose. It has no warranty. + +\*****************************************************************************/ +#define HASH_JENKINS_MIX(a,b,c) \ +{ \ + a -= b; a -= c; a ^= (c>>13); \ + b -= c; b -= a; b ^= (a<<8); \ + c -= a; c -= b; c ^= (b>>13); \ + a -= b; a -= c; a ^= (c>>12); \ + b -= c; b -= a; b ^= (a<<16); \ + c -= a; c -= b; c ^= (b>>5); \ + a -= b; a -= c; a ^= (c>>3); \ + b -= c; b -= a; b ^= (a<<10); \ + c -= a; c -= b; c ^= (b>>15); \ +} +static inline uint64_t Hash( + const unsigned int *data, + size_t count ) +{ + unsigned int a = 0x428a2f98, hi = 0x71374491, lo = 0xb5c0fbcf; + while( count-- ) + { + a ^= *(data++); + HASH_JENKINS_MIX( a, hi, lo ); + } + return (((uint64_t)hi)<<32)|lo; +} +#undef HASH_JENKINS_MIX + +const char* CLIntercept::sc_URL = "https://github.com/intel/opencl-intercept-layer"; +const char* CLIntercept::sc_DumpDirectoryName = "CLIntercept_Dump"; +const char* CLIntercept::sc_ReportFileName = "clintercept_report.txt"; +const char* CLIntercept::sc_LogFileName = "clintercept_log.txt"; +const char* CLIntercept::sc_DumpPerfCountersFileNamePrefix = "clintercept_perfcounter"; +const char* CLIntercept::sc_TraceFileName = "clintercept_trace.json"; + +/////////////////////////////////////////////////////////////////////////////// +// +bool CLIntercept::Create( void* pGlobalData, CLIntercept*& pIntercept ) +{ + bool success = false; + + pIntercept = new CLIntercept( pGlobalData ); + if( pIntercept ) + { + success = pIntercept->init(); + if( success == false ) + { + Delete( pIntercept ); + } + } + + return success; +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::Delete( CLIntercept*& pIntercept ) +{ + delete pIntercept; + pIntercept = NULL; +} + +/////////////////////////////////////////////////////////////////////////////// +// +CLIntercept::CLIntercept( void* pGlobalData ) + : m_OS( pGlobalData ) +{ + m_Dispatch = dummyDispatch; + + m_OpenCLLibraryHandle = NULL; + + m_LoggedCLInfo = false; + + m_EnqueueCounter = 1; + m_StartTime = 0; + + m_ProgramNumber = 0; + + m_MemAllocNumber = 0; + + m_AubCaptureStarted = false; + m_AubCaptureKernelEnqueueSkipCounter = 0; + m_AubCaptureKernelEnqueueCaptureCounter = 0; + +#define CLI_CONTROL( _type, _name, _init, _desc ) m_Config . _name = _init; +#include "controls.h" +#undef CLI_CONTROL + +#if defined(USE_ITT) + m_ITTInitialized = false; + + m_ITTDomain = NULL; + + //m_ITTQueuedState = NULL; + //m_ITTSubmittedState = NULL; + //m_ITTExecutingState = NULL; + + //m_ITTQueueTrackGroup = NULL; +#endif +} + +/////////////////////////////////////////////////////////////////////////////// +// +CLIntercept::~CLIntercept() +{ + stopAubCapture( NULL ); + report(); + + m_OS.EnterCriticalSection(); + + log( "CLIntercept is shutting down...\n" ); + + // Set the dispatch to the dummy dispatch. The destructor is called + // as the process is terminating. We don't know when each DLL gets + // unloaded, so it's not safe to call into any OpenCL functions in + // our destructor. Setting to the dummy dispatch ensures that no + // OpenCL functions get called. Note that this means we do potentially + // leave some events, kernels, or programs un-released, but since + // the process is terminating, that's probably OK. + m_Dispatch = dummyDispatch; + + if( m_OpenCLLibraryHandle != NULL ) + { + OS().UnloadLibrary( m_OpenCLLibraryHandle ); + } + + { + CCpuTimingStatsMap::iterator i = m_CpuTimingStatsMap.begin(); + while( i != m_CpuTimingStatsMap.end() ) + { + SCpuTimingStats* pCpuTimingStats = (*i).second; + + if( pCpuTimingStats ) + { + delete pCpuTimingStats; + } + + (*i).second = NULL; + ++i; + } + } + + { + CDeviceTimingStatsMap::iterator i = m_DeviceTimingStatsMap.begin(); + while( i != m_DeviceTimingStatsMap.end() ) + { + SDeviceTimingStats* pDeviceTimingStats = (*i).second; + + if( pDeviceTimingStats ) + { + delete pDeviceTimingStats; + } + + (*i).second = NULL; + ++i; + } + } + + { + CEventList::iterator i = m_EventList.begin(); + while( i != m_EventList.end() ) + { + SEventListNode* pEventListNode = (*i); + + if( pEventListNode ) + { + // If we were able to release events, we'd release + // pEventListNode->Event here. + + delete pEventListNode; + } + + (*i) = NULL; + ++i; + } + } + + { + CContextCallbackInfoMap::iterator i = m_ContextCallbackInfoMap.begin(); + while( i != m_ContextCallbackInfoMap.end() ) + { + SContextCallbackInfo* pContextCallbackInfo = (*i).second; + + if( pContextCallbackInfo ) + { + delete pContextCallbackInfo; + } + + (*i).second = NULL; + ++i; + } + } + + { + CPrecompiledKernelOverridesMap::iterator i = m_PrecompiledKernelOverridesMap.begin(); + while( i != m_PrecompiledKernelOverridesMap.end() ) + { + SPrecompiledKernelOverrides* pOverrides = (*i).second; + + if( pOverrides ) + { + // If we were able to release kernels or programs, we'd release + // the override kernels and program here. + + delete pOverrides; + } + + (*i).second = NULL; + ++i; + } + } + + { + CBuiltinKernelOverridesMap::iterator i = m_BuiltinKernelOverridesMap.begin(); + while( i != m_BuiltinKernelOverridesMap.end() ) + { + SBuiltinKernelOverrides* pOverrides = (*i).second; + + if( pOverrides ) + { + // If we were able to release kernels or programs, we'd release + // the override kernels and program here. + + delete pOverrides; + } + + (*i).second = NULL; + ++i; + } + } + + log( "... shutdown complete.\n" ); + + m_InterceptLog.close(); + m_InterceptTrace.close(); + + m_OS.LeaveCriticalSection(); +} + +/////////////////////////////////////////////////////////////////////////////// +// +template +static bool ReadRegistry( + const OS::Services& OS, + const char* name, + T& value ) +{ + unsigned int readValue = 0; + bool success = OS.ReadRegistry( name, &readValue, sizeof(readValue) ); + if( success ) + { + value = readValue; + } + + return success; +} +template <> +bool ReadRegistry( + const OS::Services& OS, + const char* name, + bool& value ) +{ + unsigned int readValue = 0; + bool success = OS.ReadRegistry( name, &readValue, sizeof(readValue) ); + if( success ) + { + value = ( readValue != 0 ); + } + + return success; +} +template <> +bool ReadRegistry( + const OS::Services& OS, + const char* name, + std::string& value ) +{ + char readValue[256] = ""; + bool success = OS.ReadRegistry( name, readValue, sizeof(readValue) ); + if( success ) + { + value = readValue; + } + + return success; +} + +/////////////////////////////////////////////////////////////////////////////// +// +bool CLIntercept::init() +{ + if( m_OS.Init() == false ) + { +#ifdef __ANDROID__ + __android_log_print(ANDROID_LOG_INFO, "clIntercept", "OS.Init FAILED!\n" ); +#endif + return false; + } + + m_OS.EnterCriticalSection(); + +#if defined(_WIN32) + OS::Services_Common::ENV_PREFIX = "CLI_"; + OS::Services_Common::REGISTRY_KEY = "SOFTWARE\\INTEL\\IGFX\\CLINTERCEPT"; +#elif defined(__linux__) + OS::Services_Common::ENV_PREFIX = "CLI_"; + OS::Services_Common::CONFIG_FILE = "clintercept.conf"; +#endif + + bool breakOnLoad = false; + ReadRegistry( m_OS, "BreakOnLoad", breakOnLoad ); + + if( breakOnLoad ) + { + CLI_DEBUG_BREAK(); + } + + std::string dllName = ""; + ReadRegistry( m_OS, "DllName", dllName ); + + ReadRegistry( m_OS, "SimpleDumpProgram", m_Config.SimpleDumpProgramSource ); // backwards compatible, replaced by SimpleDumpProgramSource + ReadRegistry( m_OS, "DumpProgramsScript", m_Config.DumpProgramSourceScript ); // backwards compatible, replaced by DumpProgramSourceScript + ReadRegistry( m_OS, "DumpProgramsInject", m_Config.DumpProgramSource ); // backwards compatible, replaced by DumpProgramSource + ReadRegistry( m_OS, "InjectPrograms", m_Config.InjectProgramSource ); // backwards compatible, replaced by InjectProgramSource + +#define CLI_CONTROL( _type, _name, _init, _desc ) ReadRegistry( m_OS, #_name, m_Config . _name ); +#include "controls.h" +#undef CLI_CONTROL + + if( m_Config.LogToFile ) + { + std::string fileName = ""; + +#if defined(_WIN32) || defined(__linux__) + if( !m_Config.LogDir.empty() ) + { + std::replace( m_Config.LogDir.begin(), m_Config.LogDir.end(), '\\', '/' ); + OS::Services_Common::LOG_DIR = m_Config.LogDir.c_str(); + } +#endif + OS().GetDumpDirectoryName( sc_DumpDirectoryName, fileName ); + fileName += "/"; + fileName += sc_LogFileName; + + OS().MakeDumpDirectories( fileName ); + + if( m_Config.AppendFiles ) + { + m_InterceptLog.open( fileName.c_str(), std::ios::out | std::ios::app ); + } + else + { + m_InterceptLog.open( fileName.c_str(), std::ios::out ); + } + } + + if( m_Config.ChromeCallLogging || + m_Config.ChromePerformanceTiming ) + { + std::string fileName = ""; + + OS().GetDumpDirectoryName( sc_DumpDirectoryName, fileName ); + fileName += "/"; + fileName += sc_TraceFileName; + + OS().MakeDumpDirectories( fileName ); + m_InterceptTrace.open( fileName.c_str(), std::ios::out ); + m_InterceptTrace << "[\n"; + + uint64_t processId = OS().GetProcessID(); + uint64_t threadId = OS().GetThreadID(); + std::string processName = OS().GetProcessName(); + m_InterceptTrace + << "{\"ph\":\"M\", \"name\":\"process_name\", \"pid\":" << processId + << ", \"tid\":" << threadId + << ", \"args\":{\"name\":\"" << processName + << "\"}},\n"; + //m_InterceptTrace + // << "{\"ph\":\"M\", \"name\":\"thread_name\", \"pid\":" << processId + // << ", \"tid\":" << threadId + // << ", \"args\":{\"name\":\"Host APIs\"}},\n"; + } + + std::string name = ""; + OS().GetCLInterceptName( name ); + + std::string bits = + ( sizeof(void*) == 8 ) ? "64-bit" : + ( sizeof(void*) == 4 ) ? "32-bit" : + "XX-bit"; + + log( "-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=\n" ); + log( "CLIntercept (" + bits + ") is loading...\n" ); + log( "CLintercept file location: " + name + "\n" ); + log( "CLIntercept URL: " + std::string(sc_URL) + "\n" ); +#if defined(CLINTERCEPT_CMAKE) + log( "CLIntercept git description: " + std::string(sc_GitDescribe) + "\n" ); + log( "CLIntercept git refspec: " + std::string(sc_GitRefSpec) + "\n" ); + log( "CLInterecpt git hash: " + std::string(sc_GitHash) + "\n" ); +#endif +#if defined(_WIN32) + log( "CLIntercept environment variable prefix: " + std::string( OS::Services_Common::ENV_PREFIX ) + "\n" ); + log( "CLIntercept registry key: " + std::string( OS::Services_Common::REGISTRY_KEY ) + "\n" ); +#elif defined(__linux__) + log( "CLIntercept environment variable prefix: " + std::string( OS::Services_Common::ENV_PREFIX ) + "\n" ); + log( "CLIntercept config file: " + std::string( OS::Services_Common::CONFIG_FILE ) + "\n" ); +#endif + + // Windows and Linux load the real OpenCL library and retrieve + // the OpenCL entry points from the real library dynamically. +#if defined(_WIN32) || defined(__linux__) + if( dllName != "" ) + { + log( "Read DLL name from user parameters: " + dllName + "\n" ); + log( "Trying to load dispatch from: " + dllName + "\n" ); + + if( initDispatch( dllName ) ) + { + log( "... success!\n" ); + } + } + else + { +#if defined(_WIN32) + + char* windir = NULL; + size_t length = 0; + + _dupenv_s( &windir, &length, "windir" ); + + // Try some common DLL names. + const std::string dllNames[] = + { + "real_opencl.dll", + #if defined(WIN32) + std::string(windir) + "/syswow64/opencl.dll", + #endif + std::string(windir) + "/system32/opencl.dll", + }; + + free( windir ); + +#elif defined(__ANDROID__) + + const std::string dllNames[] = + { + "/system/vendor/lib/real_libOpenCL.so", + "real_libOpenCL.so", + }; + +#elif defined(__linux__) + + const std::string dllNames[] = + { + "./real_libOpenCL.so", + }; + +#else +#error Unknown OS! +#endif + + const int numNames = sizeof(dllNames) / sizeof(dllNames[0]); + int i = 0; + + for( i = 0; i < numNames; i++ ) + { + log( "Trying to load dispatch from: " + dllNames[i] + "\n" ); + + if( initDispatch( dllNames[i] ) ) + { + log( "... success!\n" ); + break; + } + } + } +#elif defined(__APPLE__) + if( initDispatch() ) + { + log( "Dispatch table initialized.\n" ); + } +#else +#error Unknown OS! +#endif + +#define CLI_CONTROL( _type, _name, _init, _desc ) if ( m_Config . _name != _init ) { log( #_name " is set to a non-default value!\n" ); } +#include "controls.h" +#undef CLI_CONTROL + + m_StartTime = m_OS.GetTimer(); + log( "Timer Started!\n" ); + + log( "... loading complete.\n" ); + + m_OS.LeaveCriticalSection(); + + return true; +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::report() +{ + m_OS.EnterCriticalSection(); + + char filepath[MAX_PATH] = ""; + +#if defined(_WIN32) + if( config().DumpProgramSourceScript ) + { + char dirname[MAX_PATH] = ""; + char filename[MAX_PATH] = ""; + + size_t remaining = MAX_PATH; + + char date[9] = ""; + char time[9] = ""; + char* curPos = NULL; + char* nextToken = NULL; + char* pch = NULL; + + // Directory: + + curPos = dirname; + remaining = MAX_PATH; + memset( curPos, 0, MAX_PATH ); + + _strdate_s( date, 9 ); + _strtime_s( time, 9 ); + + memcpy_s( curPos, remaining, "CLShaderDump_", 14 ); + curPos += 13; + remaining -= 13; + + memcpy_s( curPos, remaining, strtok_s( date, "/", &nextToken ), 2 ); + curPos += 2; + remaining -= 2; + + memcpy_s( curPos, remaining, strtok_s( NULL, "/", &nextToken ), 2 ); + curPos += 2; + remaining -= 2; + + memcpy_s( curPos, remaining, strtok_s( NULL, "/", &nextToken ), 2 ); + curPos += 2; + remaining -= 2; + + ::CreateDirectoryA( dirname, NULL ); + + // File: + + curPos = filename; + remaining = MAX_PATH; + memset( curPos, 0, MAX_PATH ); + + if( GetModuleFileNameA( NULL, filename, MAX_PATH-1 ) == 0 ) + { + CLI_ASSERT( 0 ); + strcpy_s( curPos, remaining, "process.exe" ); + } + + pch = strrchr( filename, '\\' ); + pch++; + memcpy_s( curPos, remaining, pch, strlen( pch ) ); + curPos += strlen( pch ) - 4; // -4 to cut off ".exe" + remaining -= strlen( pch ) - 4; + + memcpy_s( curPos, remaining, "_", 2 ); + curPos += 1; + remaining -= 1; + + memcpy_s( curPos, remaining, strtok_s( time, ":", &nextToken ), 2 ); + curPos += 2; + remaining -= 2; + + memcpy_s( curPos, remaining, strtok_s( NULL, ":", &nextToken ), 2 ); + curPos += 2; + remaining -= 2; + + memcpy_s( curPos, remaining, strtok_s( NULL, ":", &nextToken ), 2 ); + curPos += 2; + remaining -= 2; + + CLI_SPRINTF( curPos, remaining, "" ); + curPos += 1; + remaining -= 1; + + CLI_SPRINTF( filepath, MAX_PATH, "%s/%s.%s", dirname, filename, "log" ); + } + else +#endif + { + std::string fileName = ""; + + OS().GetDumpDirectoryName( sc_DumpDirectoryName, fileName ); + fileName += "/"; + fileName += sc_ReportFileName; + + OS().MakeDumpDirectories( fileName ); + + CLI_SPRINTF( filepath, MAX_PATH, "%s", fileName.c_str() ); + } + + // Report + + std::ofstream os; + if( m_Config.AppendFiles ) + { + os.open( filepath, std::ios::out | std::ios::binary | std::ios::app ); + } + else + { + os.open( filepath, std::ios::out | std::ios::binary ); + } + if( os.good() ) + { + if( config().FinishAfterEnqueue ) + { + os << "*** WARNING *** FinishAfterEnqueue Enabled!" << std::endl << std::endl; + } + if( config().FlushAfterEnqueue ) + { + os << "*** WARNING *** FlushAfterEnqueue Enabled!" << std::endl << std::endl; + } + if( config().NullEnqueue ) + { + os << "*** WARNING *** NullEnqueue Enabled!" << std::endl << std::endl; + } + + os << "Total Enqueues: " << m_EnqueueCounter - 1 << std::endl << std::endl; + + if( config().HostPerformanceTiming ) + { + os << std::endl << "Host Performance Timing Results:" << std::endl; + + os << std::endl + << std::right << std::setw(44) << "Function Name" << ", " + << std::right << std::setw( 6) << "Calls" << ", " + << std::right << std::setw(13) << "Average (ns)" << ", " + << std::right << std::setw(13) << "Min (ns)" << ", " + << std::right << std::setw(13) << "Max (ns)" << std::endl; + + uint64_t overallTotalTicks = 0; + CCpuTimingStatsMap::iterator i = m_CpuTimingStatsMap.begin(); + while( i != m_CpuTimingStatsMap.end() ) + { + SCpuTimingStats* pCpuTimingStats = (*i).second; + const std::string& name = (*i).first; + + if( !name.empty() && pCpuTimingStats ) + { + os << std::right << std::setw(44) << name << ", " + << std::right << std::setw( 6) << pCpuTimingStats->NumberOfCalls << ", " + << std::right << std::setw(13) << OS().TickToNS( pCpuTimingStats->TotalTicks ) / pCpuTimingStats->NumberOfCalls << ", " + << std::right << std::setw(13) << OS().TickToNS( pCpuTimingStats->MinTicks ) << ", " + << std::right << std::setw(13) << OS().TickToNS( pCpuTimingStats->MaxTicks ) << std::endl; + + overallTotalTicks += pCpuTimingStats->TotalTicks; + } + + ++i; + } + + os << std::endl + << std::right << std::setw(44) << "Function Name" << ", " + << std::right << std::setw( 6) << "Calls" << ", " + << std::right << std::setw(13) << "Ticks" << ", " + << std::right << std::setw(13) << "Min Ticks" << ", " + << std::right << std::setw(13) << "Max Ticks" << ", " + << std::right << std::setw(13) << "% Ticks" << std::endl; + + i = m_CpuTimingStatsMap.begin(); + while( i != m_CpuTimingStatsMap.end() ) + { + SCpuTimingStats* pCpuTimingStats = (*i).second; + const std::string& name = (*i).first; + + if( !name.empty() && pCpuTimingStats ) + { + os << std::right << std::setw(44) << name << ", " + << std::right << std::setw( 6) << pCpuTimingStats->NumberOfCalls << ", " + << std::right << std::setw(13) << pCpuTimingStats->TotalTicks << ", " + << std::right << std::setw(13) << pCpuTimingStats->MinTicks << ", " + << std::right << std::setw(13) << pCpuTimingStats->MaxTicks << ", " + << std::right << std::setw(13) + << std::fixed << std::setprecision(2) + << ( pCpuTimingStats->TotalTicks * 100.0 ) / ( overallTotalTicks ) << std::endl; + } + + ++i; + } + } + + if( config().DevicePerformanceTiming ) + { + os << std::endl << "Device Performance Timing Results:" << std::endl; + + cl_ulong totalTotalNS = 0; + size_t longestName = 32; + + CDeviceTimingStatsMap::iterator i = m_DeviceTimingStatsMap.begin(); + while( i != m_DeviceTimingStatsMap.end() ) + { + const std::string& name = (*i).first; + SDeviceTimingStats* pDeviceTimingStats = (*i).second; + + if( !name.empty() && pDeviceTimingStats ) + { + totalTotalNS += pDeviceTimingStats->TotalNS; + longestName = std::max< size_t >( name.length(), longestName ); + } + + ++i; + } + + os << std::endl << "Total Time (ns): " << totalTotalNS << std::endl; + + os << std::endl + << std::right << std::setw(longestName) << "Function Name" << ", " + << std::right << std::setw( 6) << "Calls" << ", " + << std::right << std::setw(13) << "Time (ns)" << ", " + << std::right << std::setw( 8) << "Time (%)" << ", " + << std::right << std::setw(13) << "Average (ns)" << ", " + << std::right << std::setw(13) << "Min (ns)" << ", " + << std::right << std::setw(13) << "Max (ns)" << std::endl; + + i = m_DeviceTimingStatsMap.begin(); + while( i != m_DeviceTimingStatsMap.end() ) + { + const std::string& name = (*i).first; + SDeviceTimingStats* pDeviceTimingStats = (*i).second; + + if( !name.empty() && pDeviceTimingStats ) + { + os << std::right << std::setw(longestName) << name << ", " + << std::right << std::setw( 6) << pDeviceTimingStats->NumberOfCalls << ", " + << std::right << std::setw(13) << pDeviceTimingStats->TotalNS << ", " + << std::right << std::setw( 7) << std::fixed << std::setprecision(2) << pDeviceTimingStats->TotalNS * 100.0f / totalTotalNS << "%, " + << std::right << std::setw(13) << pDeviceTimingStats->TotalNS / pDeviceTimingStats->NumberOfCalls << ", " + << std::right << std::setw(13) << pDeviceTimingStats->MinNS << ", " + << std::right << std::setw(13) << pDeviceTimingStats->MaxNS << std::endl; + } + + ++i; + } + } + +#if defined(USE_MDAPI) + if( !config().DevicePerfCounterCustom.empty() ) + { + reportMDAPICounters( os ); + } +#endif + + os.close(); + } + + m_OS.LeaveCriticalSection(); +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::getCallLoggingPrefix( + std::string& str ) +{ + if( m_Config.CallLoggingElapsedTime ) + { + uint64_t tickDelta = + OS().GetTimer() - + m_StartTime; + uint64_t usDelta = + OS().TickToNS( tickDelta ) / 1000; + std::ostringstream ss; + + ss << "Time: "; + ss << usDelta; + ss << " "; + + str += ss.str(); + } + + if( m_Config.CallLoggingThreadId || + m_Config.CallLoggingThreadNumber ) + { + uint64_t threadId = OS().GetThreadID(); + std::ostringstream ss; + + if( m_Config.CallLoggingThreadId ) + { + ss << "TID = "; + ss << threadId; + ss << " "; + } + if( m_Config.CallLoggingThreadNumber ) + { + unsigned int threadNum = 0; + if( m_ThreadNumberMap.find( threadId ) != m_ThreadNumberMap.end() ) + { + threadNum = m_ThreadNumberMap[ threadId ]; + } + else + { + threadNum = (unsigned int)m_ThreadNumberMap.size(); + m_ThreadNumberMap[ threadId ] = threadNum; + } + ss << "TNum = "; + ss << threadNum; + ss << " "; + } + + str += ss.str(); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::callLoggingEnter( + const std::string& functionName, + const cl_kernel kernel ) +{ + m_OS.EnterCriticalSection(); + + std::string str; + getCallLoggingPrefix( str ); + + str += functionName; + + if( kernel ) + { + const std::string& kernelName = m_KernelNameMap[ kernel ]; + str += "( "; + str += kernelName; + str += " )"; + } + + if( m_Config.CallLoggingEnqueueCounter ) + { + std::ostringstream ss; + ss << ", EnqueueCounter: "; + ss << m_EnqueueCounter; + str += ss.str(); + } + + log( ">>>> " + str + "\n" ); + + m_OS.LeaveCriticalSection(); +} +void CLIntercept::callLoggingEnter( + const std::string& functionName, + const cl_kernel kernel, + const char* formatStr, + ... ) +{ + va_list args; + va_start( args, formatStr ); + + std::string str = functionName; + + if( kernel ) + { + m_OS.EnterCriticalSection(); + + const std::string& kernelName = m_KernelNameMap[ kernel ]; + str += "( "; + str += kernelName; + str += " )"; + + m_OS.LeaveCriticalSection(); + } + + char temp[ CLI_MAX_STRING_SIZE ] = ""; + int size = CLI_VSPRINTF( temp, CLI_MAX_STRING_SIZE, formatStr, args ); + if( size >= 0 && size < CLI_MAX_STRING_SIZE ) + { + str += ": "; + str += temp; + } + else + { + str += ": too long"; + } + callLoggingEnter( str, NULL ); + + va_end( args ); +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::callLoggingInfo( + const std::string& str ) +{ + m_OS.EnterCriticalSection(); + + log( "---- " + str + "\n" ); + + m_OS.LeaveCriticalSection(); +} + +void CLIntercept::callLoggingInfo( + const char* formatStr, + ... ) +{ + va_list args; + va_start( args, formatStr ); + + char temp[ CLI_MAX_STRING_SIZE ] = ""; + int size = CLI_VSPRINTF( temp, CLI_MAX_STRING_SIZE, formatStr, args ); + if( size >= 0 && size < CLI_MAX_STRING_SIZE ) + { + callLoggingInfo( std::string( temp ) ); + } + else + { + callLoggingInfo( std::string( "too long" ) ); + } + + va_end( args ); +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::callLoggingExit( + const std::string& functionName, + const cl_kernel kernel, + const cl_event* event ) +{ + m_OS.EnterCriticalSection(); + + std::string str; + getCallLoggingPrefix( str ); + + str += functionName; + + if( kernel ) + { + const std::string& kernelName = m_KernelNameMap[ kernel ]; + str += "( "; + str += kernelName; + str += " )"; + } + + if( event ) + { + char temp[ CLI_MAX_STRING_SIZE ] = ""; + CLI_SPRINTF( temp, CLI_MAX_STRING_SIZE, " created event = %p", *event ); + str += temp; + } + + log( "<<<< " + str + "\n" ); + + m_OS.LeaveCriticalSection(); +} +void CLIntercept::callLoggingExit( + const std::string& functionName, + const cl_kernel kernel, + const cl_event* event, + const char* formatStr, + ... ) +{ + va_list args; + va_start( args, formatStr ); + + std::string str = functionName; + + if( kernel ) + { + m_OS.EnterCriticalSection(); + + const std::string& kernelName = m_KernelNameMap[ kernel ]; + str += "( "; + str += kernelName; + str += " )"; + + m_OS.LeaveCriticalSection(); + } + + char temp[ CLI_MAX_STRING_SIZE ] = ""; + + if( event ) + { + CLI_SPRINTF( temp, CLI_MAX_STRING_SIZE, " created event = %p", *event ); + str += temp; + } + + int size = CLI_VSPRINTF( temp, CLI_MAX_STRING_SIZE, formatStr, args ); + if( size >= 0 && size < CLI_MAX_STRING_SIZE ) + { + str += ": "; + str += temp; + } + else + { + str += ": too long"; + } + + callLoggingExit( str, NULL, NULL ); + + va_end( args ); +} + +/////////////////////////////////////////////////////////////////////////////// +// +cl_int CLIntercept::allocateAndGetPlatformInfoString( + cl_platform_id platform, + cl_platform_info param_name, + char*& param_value ) const +{ + cl_int errorCode = CL_SUCCESS; + size_t size = 0; + + if( errorCode == CL_SUCCESS ) + { + if( param_value != NULL ) + { + CLI_ASSERT( 0 ); + delete [] param_value; + param_value = NULL; + } + } + + if( errorCode == CL_SUCCESS ) + { + errorCode = dispatch().clGetPlatformInfo( + platform, + param_name, + 0, + NULL, + &size ); + } + + if( errorCode == CL_SUCCESS ) + { + param_value = new char[ size ]; + if( param_value == NULL ) + { + errorCode = CL_OUT_OF_HOST_MEMORY; + } + } + + if( errorCode == CL_SUCCESS ) + { + errorCode = dispatch().clGetPlatformInfo( + platform, + param_name, + size, + param_value, + NULL ); + } + + if( errorCode != CL_SUCCESS ) + { + delete [] param_value; + param_value = NULL; + } + + return errorCode; +} + +/////////////////////////////////////////////////////////////////////////////// +// +cl_int CLIntercept::allocateAndGetDeviceInfoString( + cl_device_id device, + cl_device_info param_name, + char*& param_value ) const +{ + cl_int errorCode = CL_SUCCESS; + size_t size = 0; + + if( errorCode == CL_SUCCESS ) + { + if( param_value != NULL ) + { + CLI_ASSERT( 0 ); + delete [] param_value; + param_value = NULL; + } + } + + if( errorCode == CL_SUCCESS ) + { + errorCode = dispatch().clGetDeviceInfo( + device, + param_name, + 0, + NULL, + &size ); + } + + if( errorCode == CL_SUCCESS ) + { + param_value = new char[ size ]; + if( param_value == NULL ) + { + errorCode = CL_OUT_OF_HOST_MEMORY; + } + } + + if( errorCode == CL_SUCCESS ) + { + errorCode = dispatch().clGetDeviceInfo( + device, + param_name, + size, + param_value, + NULL ); + } + + if( errorCode != CL_SUCCESS ) + { + delete [] param_value; + param_value = NULL; + } + + return errorCode; +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::getPlatformInfoString( + const cl_platform_id platform, + std::string& str ) const +{ + str = ""; + + cl_int errorCode = CL_SUCCESS; + + char* platformName = NULL; + + errorCode |= allocateAndGetPlatformInfoString( + platform, + CL_PLATFORM_NAME, + platformName ); + + if( errorCode != CL_SUCCESS ) + { + CLI_ASSERT( 0 ); + str += "ERROR"; + } + else + { + str += platformName; + } + + delete [] platformName; + platformName = NULL; +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::getDeviceInfoString( + cl_uint numDevices, + const cl_device_id* devices, + std::string& str ) const +{ + str = ""; + + unsigned int i = 0; + for( i = 0; i < numDevices; i++ ) + { + cl_int errorCode = CL_SUCCESS; + + cl_device_type deviceType = CL_DEVICE_TYPE_DEFAULT; + char* deviceName = NULL; + + errorCode |= dispatch().clGetDeviceInfo( + devices[i], + CL_DEVICE_TYPE, + sizeof( deviceType ), + &deviceType, + NULL ); + errorCode |= allocateAndGetDeviceInfoString( + devices[i], + CL_DEVICE_NAME, + deviceName ); + + if( errorCode != CL_SUCCESS ) + { + CLI_ASSERT( 0 ); + str += "ERROR"; + } + else + { + if( i != 0 ) + { + str += " | "; + } + + if( deviceName ) + { + str += deviceName; + } + str += " ("; + str += enumName().name_device_type( deviceType ); + str += ")"; + } + + delete [] deviceName; + deviceName = NULL; + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::getEventListString( + cl_uint numEvents, + const cl_event* eventList, + std::string& str ) const +{ + { + std::ostringstream ss; + ss << "( size = "; + ss << numEvents; + ss << " )[ "; + str += ss.str(); + } + unsigned int i = 0; + for( i = 0; i < numEvents; i++ ) + { + if( i += 0 ) + { + str += ", "; + } + { + char temp[ CLI_MAX_STRING_SIZE ] = ""; + CLI_SPRINTF( temp, CLI_MAX_STRING_SIZE, "%p", eventList[i] ); + str += temp; + } + } + str += " ]"; +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::getContextPropertiesString( + const cl_context_properties* properties, + std::string& str ) const +{ + str = ""; + + if( properties ) + { + while( properties[0] != 0 ) + { + char temp_str[ CLI_MAX_STRING_SIZE ]; + + cl_int property = (cl_int)properties[0]; + str += enumName().name( property ) + " = "; + + switch( property ) + { + case CL_CONTEXT_PLATFORM: + { + const cl_platform_id* pp = (const cl_platform_id*)( properties + 1 ); + const cl_platform_id platform = pp[0]; + std::string platformInfo; + getPlatformInfoString( platform, platformInfo ); + str += platformInfo; + } + break; + case CL_GL_CONTEXT_KHR: + case CL_EGL_DISPLAY_KHR: + case CL_GLX_DISPLAY_KHR: + case CL_WGL_HDC_KHR: + case CL_CGL_SHAREGROUP_KHR: + { + const void** pp = (const void**)( properties + 1 ); + const void* value = pp[0]; + CLI_SPRINTF( temp_str, CLI_MAX_STRING_SIZE, "%p", value ); + str += temp_str; + } + break; + case CL_CONTEXT_INTEROP_USER_SYNC: + { + const cl_bool* pb = (const cl_bool*)( properties + 1); + cl_bool value = pb[0]; + str += enumName().name_bool( value ); + } + break; + default: + str += ""; + break; + } + + properties += 2; + if( properties[0] != 0 ) + { + str += ", "; + } + } + } + else + { + str = "NULL"; + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::getSamplerPropertiesString( + const cl_sampler_properties* properties, + std::string& str ) const +{ + str = ""; + + if( properties ) + { + while( properties[0] != 0 ) + { + cl_int property = (cl_int)properties[0]; + str += enumName().name( property ) + " = "; + + switch( property ) + { + case CL_SAMPLER_NORMALIZED_COORDS: + { + const cl_bool* pb = (const cl_bool*)( properties + 1); + cl_bool value = pb[0]; + str += enumName().name_bool( value ); + } + break; + case CL_SAMPLER_ADDRESSING_MODE: + case CL_SAMPLER_FILTER_MODE: + case CL_SAMPLER_MIP_FILTER_MODE: + { + const cl_int* pi = (const cl_int*)( properties + 1); + cl_int value = pi[0]; + str += enumName().name( value ); + } + break; + case CL_SAMPLER_LOD_MIN: + case CL_SAMPLER_LOD_MAX: + { +#if 0 + if( property == CL_SAMPLER_LOD_MAX ) + { + cl_float* pFixup = (cl_float*)( properties + 1); + if( pFixup[0] < 0.5f ) + { + pFixup[0] = 100.0f; + } + } +#endif + + const cl_float* pf = (const cl_float*)( properties + 1 ); + + cl_float value = pf[0]; + + char fstr[ CLI_MAX_STRING_SIZE ]; + CLI_SPRINTF( fstr, CLI_MAX_STRING_SIZE, "%.2f", value ); + str += fstr; + } + break; + default: + str += ""; + break; + } + + properties += 2; + if( properties[0] != 0 ) + { + str += ", "; + } + } + } + else + { + str = "NULL"; + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::getCommandQueuePropertiesString( + const cl_queue_properties* properties, + std::string& str ) const +{ + str = ""; + + if( properties ) + { + while( properties[0] != 0 ) + { + cl_int property = (cl_int)properties[0]; + str += enumName().name( property ) + " = "; + + switch( property ) + { + case CL_QUEUE_PROPERTIES: + { + str += ""; + } + break; + case CL_QUEUE_SIZE: + { + const cl_uint* pu = (const cl_uint*)( properties + 1); + cl_uint value = pu[0]; + str += value; + } + break; + default: + str += ""; + break; + } + + properties += 2; + if( properties[0] != 0 ) + { + str += ", "; + } + } + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::getCreateKernelsInProgramRetString( + cl_int retVal, + cl_kernel* kernels, + cl_uint* num_kernels_ret, + std::string& str ) const +{ + if( kernels && + num_kernels_ret && + ( num_kernels_ret[0] != 0 ) ) + { + cl_uint numKernels = num_kernels_ret[0]; + + str += "kernels = [ "; + for( cl_uint i = 0; i < numKernels; i++ ) + { + char s[256]; + CLI_SPRINTF( s, 256, "%p", kernels[i] ); + str += s; + + if( i < numKernels - 1 ) + { + str += ", "; + } + } + str += " ]"; + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::getKernelArgString( + cl_uint arg_index, + size_t arg_size, + const void* arg_value, + std::string& str ) const +{ + char s[CLI_MAX_STRING_SIZE] = ""; + + if( getSampler( + arg_size, + arg_value, + str ) ) + { + CLI_SPRINTF( s, CLI_MAX_STRING_SIZE, "index = %d, size = %d, value = %s\n", + arg_index, + (unsigned int)arg_size, + str.c_str() ); + } + else if( ( arg_value != NULL ) && + ( arg_size == sizeof(cl_mem) ) ) + { + cl_mem* pMem = (cl_mem*)arg_value; + CLI_SPRINTF( s, CLI_MAX_STRING_SIZE, "index = %d, size = %d, value = %p", + arg_index, + (unsigned int)arg_size, + pMem[0] ); + } + else if( ( arg_value != NULL ) && + ( arg_size == sizeof(cl_uint) ) ) + { + cl_uint* pData = (cl_uint*)arg_value; + CLI_SPRINTF( s, CLI_MAX_STRING_SIZE, "index = %d, size = %d, value = 0x%x", + arg_index, + (unsigned int)arg_size, + pData[0] ); + } + else if( ( arg_value != NULL ) && + ( arg_size == sizeof(cl_ulong) ) ) + { + cl_ulong* pData = (cl_ulong*)arg_value; + CLI_SPRINTF( s, CLI_MAX_STRING_SIZE, "index = %d, size = %d, value = 0x%jx", + arg_index, + (unsigned int)arg_size, + pData[0] ); + } + else if( ( arg_value != NULL ) && + ( arg_size == sizeof(cl_int4) ) ) + { + cl_int4* pData = (cl_int4*)arg_value; + CLI_SPRINTF( s, CLI_MAX_STRING_SIZE, "index = %d, size = %d, valueX = 0x%0x, valueY = 0x%0x, valueZ = 0x%0x, valueW = 0x%0x", + arg_index, + (unsigned int)arg_size, + pData->s[0], + pData->s[1], + pData->s[2], + pData->s[3]); + } + else + { + CLI_SPRINTF( s, CLI_MAX_STRING_SIZE, "index = %d, size = %d", + arg_index, + (unsigned int)arg_size ); + } + + str = s; +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::getEnqueueNDRangeKernelArgsString( + cl_uint work_dim, + const size_t* global_work_offset, + const size_t* global_work_size, + const size_t* local_work_size, + std::string& str ) const +{ + std::ostringstream ss; + + if( global_work_offset ) + { + ss << "global_work_offset = < "; + for( cl_uint i = 0; i < work_dim; i++ ) + { + ss << global_work_offset[i]; + if( i < work_dim - 1 ) + { + ss << ", "; + } + } + ss << " >, "; + } + + ss << "global_work_size = < "; + if( global_work_size ) + { + for( cl_uint i = 0; i < work_dim; i++ ) + { + ss << global_work_size[i]; + if( i < work_dim - 1 ) + { + ss << ", "; + } + } + } + else + { + ss << "NULL?"; + } + ss << " >, "; + + ss << "local_work_size = < "; + if( local_work_size ) + { + for( cl_uint i = 0; i < work_dim; i++ ) + { + ss << local_work_size[i]; + if( i < work_dim - 1 ) + { + ss << ", "; + } + } + } + else + { + ss << "NULL"; + } + ss << " >"; + + str = ss.str(); +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::getCreateSubBufferArgsString( + cl_buffer_create_type createType, + const void *createInfo, + std::string& str ) const +{ + std::ostringstream ss; + + switch( createType ) + { + case CL_BUFFER_CREATE_TYPE_REGION: + { + cl_buffer_region* pRegion = (cl_buffer_region*)createInfo; + ss << "origin = " + << pRegion->origin + << " size = " + << pRegion->size; + } + break; + default: + ss << ""; + break; + } + + str = ss.str(); +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::logCLInfo() +{ + if( m_LoggedCLInfo == false ) + { + m_OS.EnterCriticalSection(); + + if( m_LoggedCLInfo == false ) + { + m_LoggedCLInfo = true; + + cl_int errorCode = CL_SUCCESS; + cl_uint numPlatforms = 0; + + if( errorCode == CL_SUCCESS ) + { + errorCode = dispatch().clGetPlatformIDs( + 0, + NULL, + &numPlatforms ); + } + + if( errorCode == CL_SUCCESS && numPlatforms != 0 ) + { + logf( "\nEnumerated %u platform%s.\n\n", + numPlatforms, + numPlatforms > 1 ? "s" : "" ); + + cl_platform_id* platforms = new cl_platform_id[numPlatforms]; + if( platforms ) + { + errorCode = dispatch().clGetPlatformIDs( + numPlatforms, + platforms, + NULL ); + } + else + { + errorCode = CL_OUT_OF_HOST_MEMORY; + } + + for( cl_uint p = 0; p < numPlatforms; p++ ) + { + if( errorCode == CL_SUCCESS ) + { + logf( "Platform %u:\n", p ); + logPlatformInfo( platforms[p] ); + } + + cl_uint numDevices = 0; + + if( errorCode == CL_SUCCESS ) + { + errorCode = dispatch().clGetDeviceIDs( + platforms[p], + CL_DEVICE_TYPE_ALL, + 0, + NULL, + &numDevices ); + } + if( errorCode == CL_SUCCESS && numDevices != 0 ) + { + logf( "\tPlatform has %u device%s.\n\n", + numDevices, + numDevices > 1 ? "s" : "" ); + + cl_device_id* devices = new cl_device_id[numDevices]; + if( devices ) + { + errorCode = dispatch().clGetDeviceIDs( + platforms[p], + CL_DEVICE_TYPE_ALL, + numDevices, + devices, + NULL ); + } + else + { + errorCode = CL_OUT_OF_HOST_MEMORY; + } + + for( cl_uint d = 0; d < numDevices; d++ ) + { + if( errorCode == CL_SUCCESS ) + { + logf( "Device %u:\n", d ); + logDeviceInfo( devices[d] ); + log( "\n" ); + } + } + + delete [] devices; + } + } + + delete [] platforms; + } + } + + m_OS.LeaveCriticalSection(); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::logBuild( + uint64_t buildTimeStart, + const cl_program program, + cl_uint numDevices, + const cl_device_id* deviceList ) +{ + uint64_t buildTimeEnd = m_OS.GetTimer(); + + m_OS.EnterCriticalSection(); + + cl_device_id* localDeviceList = NULL; + + cl_int errorCode = CL_SUCCESS; + + // There are two possibilities. Either the device_list is NULL, in which + // case we need to get the build log for all devices, or it's non-NULL, + // in which case we only need to get the build log for all devices in + // the device list. + + if( ( errorCode == CL_SUCCESS ) && + ( deviceList == NULL ) ) + { + errorCode = dispatch().clGetProgramInfo( + program, + CL_PROGRAM_NUM_DEVICES, + sizeof( numDevices ), + &numDevices, + NULL ); + + if( errorCode == CL_SUCCESS ) + { + localDeviceList = new cl_device_id[ numDevices ]; + if( localDeviceList == NULL ) + { + errorCode = CL_OUT_OF_HOST_MEMORY; + } + else + { + errorCode = dispatch().clGetProgramInfo( + program, + CL_PROGRAM_DEVICES, + numDevices * sizeof( cl_device_id ), + localDeviceList, + NULL ); + if( errorCode == CL_SUCCESS ) + { + deviceList = localDeviceList; + } + } + } + } + + if( m_Config.BuildLogging && + errorCode == CL_SUCCESS ) + { + unsigned int programNumber = m_ProgramNumberMap[ program ]; + unsigned int compileCount = m_ProgramNumberCompileCountMap[ programNumber ]; + + logf( "Build Info for program %p, number %u, compile %u, for %u device(s):\n", + program, + programNumber, + compileCount, + numDevices ); + + float buildTimeMS = m_OS.TickToNS( buildTimeEnd - buildTimeStart ) / 1e6f; + logf( " Build finished in %.2f ms.\n", buildTimeMS ); + } + + if( errorCode == CL_SUCCESS ) + { + size_t i = 0; + for( i = 0; i < numDevices; i++ ) + { + if( m_Config.BuildLogging ) + { + cl_build_status buildStatus = CL_BUILD_NONE; + errorCode = dispatch().clGetProgramBuildInfo( + program, + deviceList[ i ], + CL_PROGRAM_BUILD_STATUS, + sizeof( buildStatus ), + &buildStatus, + NULL ); + + if( errorCode == CL_SUCCESS ) + { + char* deviceName = NULL; + char* deviceOpenCLCVersion = NULL; + errorCode = allocateAndGetDeviceInfoString( + deviceList[i], + CL_DEVICE_NAME, + deviceName ); + errorCode |= allocateAndGetDeviceInfoString( + deviceList[i], + CL_DEVICE_OPENCL_C_VERSION, + deviceOpenCLCVersion ); + + char str[256] = ""; + + CLI_SPRINTF( str, 256, "Build Status for device %u = ", + (unsigned int)i ); + + std::string message = str; + + if( errorCode == CL_SUCCESS ) + { + message += deviceName; + message += " ("; + message += deviceOpenCLCVersion; + message += "): "; + } + + message += enumName().name_build_status( buildStatus ); + message += "\n"; + + log( message ); + + delete [] deviceName; + deviceName = NULL; + + delete [] deviceOpenCLCVersion; + deviceOpenCLCVersion = NULL; + } + } + + size_t buildLogSize = 0; + errorCode = dispatch().clGetProgramBuildInfo( + program, + deviceList[ i ], + CL_PROGRAM_BUILD_LOG, + 0, + NULL, + &buildLogSize ); + + if( errorCode == CL_SUCCESS ) + { + char* buildLog = new char[ buildLogSize + 1 ]; + if( buildLog ) + { + dispatch().clGetProgramBuildInfo( + program, + deviceList[ i ], + CL_PROGRAM_BUILD_LOG, + buildLogSize, + buildLog, + NULL ); + + // Check if the build log is already null-terminated. + // If it is, we're good, otherwise null terminate it. + if( buildLog[ buildLogSize - 1 ] == '\0' ) + { + buildLogSize--; + } + else + { + buildLog[ buildLogSize ] = '\0'; + } + + if( m_Config.BuildLogging ) + { + log( "-------> Start of Build Log:\n" ); + log( std::string(buildLog) ); + log( "<------- End of Build Log\n\n" ); + } + if( m_Config.DumpProgramBuildLogs ) + { + dumpProgramBuildLog( + program, + deviceList[ i ], + buildLog, + buildLogSize ); + } + + delete [] buildLog; + } + } + } + } + + delete [] localDeviceList; + + m_OS.LeaveCriticalSection(); +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::logError( + const std::string& functionName, + cl_int errorCode ) +{ + std::ostringstream ss; + ss << "ERROR! " << functionName << " returned " << enumName().name(errorCode) << " (" << errorCode << ")\n"; + + m_OS.EnterCriticalSection(); + + log( ss.str() ); + + m_OS.LeaveCriticalSection(); +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::logFlushOrFinishAfterEnqueueStart( + const std::string& flushOrFinish, + const std::string& functionName ) +{ + m_OS.EnterCriticalSection(); + + log( "Calling " + flushOrFinish + " after " + functionName + "...\n" ); + + m_OS.LeaveCriticalSection(); +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::logFlushOrFinishAfterEnqueueEnd( + const std::string& flushOrFinish, + const std::string& functionName, + cl_int errorCode ) +{ + std::ostringstream ss; + ss << "... " << flushOrFinish << " after " << functionName << " returned " << enumName().name( errorCode ) << " (" << errorCode << ")\n"; + + m_OS.EnterCriticalSection(); + + log( ss.str() ); + + m_OS.LeaveCriticalSection(); +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::logPreferredWorkGroupSizeMultiple( + const cl_kernel* kernels, + cl_uint numKernels ) +{ + if( numKernels > 0 ) + { + m_OS.EnterCriticalSection(); + + cl_int errorCode = CL_SUCCESS; + + // We can share the program and device list for all kernels. + + cl_kernel queryKernel = kernels[0]; + + // First, get the program for this kernel. + cl_program program = NULL; + if( errorCode == CL_SUCCESS ) + { + errorCode = dispatch().clGetKernelInfo( + queryKernel, + CL_KERNEL_PROGRAM, + sizeof(program), + &program, + NULL ); + } + + // Next, get the list of devices for the program. + cl_uint numDevices = 0; + cl_device_id* deviceList = NULL; + if( errorCode == CL_SUCCESS ) + { + errorCode = dispatch().clGetProgramInfo( + program, + CL_PROGRAM_NUM_DEVICES, + sizeof( numDevices ), + &numDevices, + NULL ); + + if( errorCode == CL_SUCCESS ) + { + deviceList = new cl_device_id[ numDevices ]; + if( deviceList == NULL ) + { + errorCode = CL_OUT_OF_HOST_MEMORY; + } + else + { + errorCode = dispatch().clGetProgramInfo( + program, + CL_PROGRAM_DEVICES, + numDevices * sizeof( cl_device_id ), + deviceList, + NULL ); + } + } + } + + // Log the preferred work group size multiple for each kernel, + // for each device. + while( numKernels-- ) + { + cl_kernel kernel = kernels[ numKernels ]; + + if( errorCode == CL_SUCCESS ) + { + const std::string& kernelName = m_KernelNameMap[ kernel ]; + log( "Preferred Work Group Size Multiple for: '" + kernelName + "':\n" ); + } + if( errorCode == CL_SUCCESS ) + { + size_t i = 0; + for( i = 0; i < numDevices; i++ ) + { + size_t kernelPreferredWorkGroupSizeMultiple = 0; + errorCode = dispatch().clGetKernelWorkGroupInfo( + kernel, + deviceList[i], + CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, + sizeof(kernelPreferredWorkGroupSizeMultiple), + &kernelPreferredWorkGroupSizeMultiple, + NULL ); + if( errorCode == CL_SUCCESS ) + { + char* deviceName = NULL; + + errorCode = allocateAndGetDeviceInfoString( + deviceList[i], + CL_DEVICE_NAME, + deviceName ); + if( errorCode == CL_SUCCESS ) + { + logf( " for device %s: %u\n", + deviceName, + (unsigned int)kernelPreferredWorkGroupSizeMultiple ); + } + + delete [] deviceName; + } + } + } + } + + delete [] deviceList; + + m_OS.LeaveCriticalSection(); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::contextCallbackCaller( + const char* errinfo, + const void* private_info, + size_t cb, + void* user_data ) +{ + SContextCallbackInfo* pContextCallbackInfo = + (SContextCallbackInfo*)user_data; + + pContextCallbackInfo->pIntercept->contextCallback( + errinfo, + private_info, + cb ); + if( pContextCallbackInfo->pApplicationCallback ) + { + pContextCallbackInfo->pApplicationCallback( + errinfo, + private_info, + cb, + pContextCallbackInfo->pUserData ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::contextCallback( + const std::string& errinfo, + const void* private_info, + size_t cb ) +{ + m_OS.EnterCriticalSection(); + + char str[256] = ""; + CLI_SPRINTF( str, 256, "=======> Context Callback (private_info = %p, cb = %u):\n", + private_info, + (unsigned int)cb ); + + log( str + errinfo + "\n" + "<======= End of Context Callback\n" ); + + m_OS.LeaveCriticalSection(); +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::contextCallbackOverrideInit( + const cl_context_properties* properties, + void (CL_CALLBACK*& pCallback)( const char*, const void*, size_t, void* ), + void*& pUserData, + SContextCallbackInfo*& pContextCallbackInfo, + cl_context_properties*& pLocalContextProperties ) +{ + pContextCallbackInfo = new SContextCallbackInfo; + if( pContextCallbackInfo ) + { + pContextCallbackInfo->pIntercept = this; + pContextCallbackInfo->pApplicationCallback = pCallback; + pContextCallbackInfo->pUserData = pUserData; + + pCallback = CLIntercept::contextCallbackCaller; + pUserData = pContextCallbackInfo; + } + + if( m_Config.ContextHintLevel ) + { + // We want to add a context hints to the context properties, unless + // the context properties already requests performance hints + // (requesting the same property twice is an error). So, look through + // the context properties for the performance hint enum. We need to + // do this anyways to count the number of property pairs. + bool foundPerformanceHintEnum = false; + int numProperties = 0; + if( properties ) + { + while( properties[ numProperties ] != 0 ) + { + if( properties[ numProperties ] == CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL ) + { + foundPerformanceHintEnum = true; + } + numProperties += 2; + } + } + + if( foundPerformanceHintEnum == false ) + { + // The performance hint property isn't already set, so we'll + // need to allocate an extra pair of properties for it. + numProperties += 2; + } + + // Allocate a new array of properties. We need to allocate two + // properties for each pair, plus one property for the terminating + // zero. + pLocalContextProperties = new cl_context_properties[ numProperties + 1 ]; + if( pLocalContextProperties ) + { + // Copy the old properties array to the new properties array, + // if the new properties array exists. + numProperties = 0; + if( properties ) + { + while( properties[ numProperties ] != 0 ) + { + pLocalContextProperties[ numProperties ] = properties[ numProperties ]; + if( properties[ numProperties ] == CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL ) + { + CLI_ASSERT( foundPerformanceHintEnum ); + pLocalContextProperties[ numProperties + 1 ] = m_Config.ContextHintLevel; + } + else + { + pLocalContextProperties[ numProperties + 1 ] = properties[ numProperties + 1 ]; + } + numProperties += 2; + } + } + // Add the performance hint property if it wasn't already set. + if( foundPerformanceHintEnum == false ) + { + pLocalContextProperties[ numProperties ] = CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL; + pLocalContextProperties[ numProperties + 1 ] = m_Config.ContextHintLevel; + numProperties += 2; + } + // Add the terminating zero. + pLocalContextProperties[ numProperties ] = 0; + } + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::contextCallbackOverrideCleanup( + const cl_context context, + SContextCallbackInfo*& pContextCallbackInfo, + cl_context_properties*& pLocalContextProperties ) +{ + if( context && pContextCallbackInfo ) + { + m_OS.EnterCriticalSection(); + + // Check if we already have a context callback info for this context. If + // we do, free it. + SContextCallbackInfo* pOldContextCallbackInfo = + m_ContextCallbackInfoMap[ context ]; + if( pOldContextCallbackInfo ) + { + delete pOldContextCallbackInfo; + pOldContextCallbackInfo = NULL; + } + + m_ContextCallbackInfoMap[ context ] = pContextCallbackInfo; + + m_OS.LeaveCriticalSection(); + } + else + { + delete pContextCallbackInfo; + pContextCallbackInfo = NULL; + } + + if( pLocalContextProperties ) + { + delete pLocalContextProperties; + pLocalContextProperties = NULL; + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::eventCallbackCaller( + cl_event event, + cl_int status, + void* user_data ) +{ + SEventCallbackInfo* pEventCallbackInfo = + (SEventCallbackInfo*)user_data; + + CLIntercept* pIntercept = pEventCallbackInfo->pIntercept; + + CALL_LOGGING_ENTER( "event = %p, status = %s (%d)", + event, + pIntercept->enumName().name_command_exec_status( status ).c_str(), + status ); + + pIntercept->eventCallback( + event, + status ); + if( pEventCallbackInfo->pApplicationCallback ) + { + pEventCallbackInfo->pApplicationCallback( + event, + status, + pEventCallbackInfo->pUserData ); + } + + CALL_LOGGING_EXIT(); + + delete pEventCallbackInfo; +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::eventCallback( + cl_event event, + int status ) +{ + // TODO: Since we call log the eventCallbackCaller, do we need to do + // anything here? +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::incrementEnqueueCounter() +{ + m_OS.EnterCriticalSection(); + + m_EnqueueCounter++; + + m_OS.LeaveCriticalSection(); +} + +uint64_t CLIntercept::getEnqueueCounter() +{ + return m_EnqueueCounter; +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::overrideNullLocalWorkSize( + const cl_uint work_dim, + const size_t* global_work_size, + const size_t*& local_work_size ) +{ + if( local_work_size == NULL ) + { + switch( work_dim ) + { + case 1: + if( m_Config.NullLocalWorkSizeX != 0 ) + { + if( global_work_size[0] % m_Config.NullLocalWorkSizeX == 0 ) + { + local_work_size = &m_Config.NullLocalWorkSizeX; + } + else + { + m_OS.EnterCriticalSection(); + logf( "Couldn't override NULL local work size: < %u > %% < %u > != 0!\n", + (unsigned int)global_work_size[0], + (unsigned int)m_Config.NullLocalWorkSizeX ); + m_OS.LeaveCriticalSection(); + } + } + break; + case 2: + if( ( m_Config.NullLocalWorkSizeX != 0 ) && + ( m_Config.NullLocalWorkSizeY != 0 ) ) + { + if( ( global_work_size[0] % m_Config.NullLocalWorkSizeX == 0 ) && + ( global_work_size[1] % m_Config.NullLocalWorkSizeY == 0 ) ) + { + local_work_size = &m_Config.NullLocalWorkSizeX; + } + else + { + m_OS.EnterCriticalSection(); + logf( "Couldn't override NULL local work size: < %u, %u > %% < %u, %u > != 0!\n", + (unsigned int)global_work_size[0], + (unsigned int)global_work_size[1], + (unsigned int)m_Config.NullLocalWorkSizeX, + (unsigned int)m_Config.NullLocalWorkSizeY ); + m_OS.LeaveCriticalSection(); + } + } + break; + case 3: + if( ( m_Config.NullLocalWorkSizeX != 0 ) && + ( m_Config.NullLocalWorkSizeY != 0 ) && + ( m_Config.NullLocalWorkSizeZ != 0 ) ) + { + if( ( global_work_size[0] % m_Config.NullLocalWorkSizeX == 0 ) && + ( global_work_size[1] % m_Config.NullLocalWorkSizeY == 0 ) && + ( global_work_size[2] % m_Config.NullLocalWorkSizeZ == 0 ) ) + { + local_work_size = &m_Config.NullLocalWorkSizeX; + } + else + { + m_OS.EnterCriticalSection(); + logf( "Couldn't override NULL local work size: < %u, %u, %u > %% < %u, %u, %u > != 0!\n", + (unsigned int)global_work_size[0], + (unsigned int)global_work_size[1], + (unsigned int)global_work_size[2], + (unsigned int)m_Config.NullLocalWorkSizeX, + (unsigned int)m_Config.NullLocalWorkSizeY, + (unsigned int)m_Config.NullLocalWorkSizeZ ); + m_OS.LeaveCriticalSection(); + } + } + break; + default: + // Nothing. + break; + } + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::combineProgramStrings( + cl_uint& count, + const char**& strings, + const size_t*& lengths, + char*& singleString ) const +{ + size_t allocSize = 0; + cl_uint i = 0; + + for( i = 0; i < count; i++ ) + { + size_t length = 0; + if( ( lengths == NULL ) || + ( lengths[i] == 0 ) ) + { + length = strlen( strings[i] ); + } + else + { + length = lengths[i]; + } + allocSize += length; + } + + // Allocate a multiple of four bytes. + // Allocate some extra to make sure we're null terminated. + allocSize = ( allocSize + ( 4 + 4 - 1 ) ) & ~( 4 - 1 ); + + singleString = new char[ allocSize ]; + if( singleString ) + { + memset( singleString, 0, allocSize ); + + char* pDst = singleString; + size_t remaining = allocSize; + for( i = 0; i < count; i++ ) + { + size_t length = 0; + if( ( lengths == NULL ) || + ( lengths[i] == 0 ) ) + { + length = strlen( strings[i] ); + } + else + { + length = lengths[i]; + } + CLI_MEMCPY( + pDst, + remaining, + strings[i], + length ); + pDst += length; + remaining -= length; + } + + // Replace any NULL chars between kernels with spaces. + if( count > 1 ) + { + for( char* pStr = singleString; pStr < pDst - 1; pStr++ ) + { + if( *pStr == 0x0 ) + { + *pStr = 0x20; + } + } + } + + count = 1; + strings = ( const char** )&singleString; + lengths = NULL; + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::incrementProgramCompileCount( + const cl_program program ) +{ + m_OS.EnterCriticalSection(); + + unsigned int programNumber = m_ProgramNumberMap[ program ]; + unsigned int compileCount = m_ProgramNumberCompileCountMap[ programNumber ]; + + ++compileCount; + + m_ProgramNumberCompileCountMap[ programNumber ] = compileCount; + + m_OS.LeaveCriticalSection(); +} + +/////////////////////////////////////////////////////////////////////////////// +// +uint64_t CLIntercept::hashString( + const char* singleString, + size_t length ) +{ + uint64_t hash = 0; + + if( singleString != NULL ) + { + const unsigned int* dwProgramSource = (const unsigned int*)singleString; + size_t dwProgramSize = length; + + dwProgramSize = ( dwProgramSize + ( 4 - 1 ) ) & ~( 4 - 1 ); + dwProgramSize /= 4; + + hash = Hash( + dwProgramSource, + dwProgramSize ); + } + + return hash; +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::saveProgramHash( + const cl_program program, + uint64_t hash ) +{ + m_OS.EnterCriticalSection(); + + if( program != NULL ) + { + m_ProgramHashMap[ program ] = hash; + } + + m_OS.LeaveCriticalSection(); +} + +/////////////////////////////////////////////////////////////////////////////// +// +bool CLIntercept::injectProgramSource( + const uint64_t hash, + cl_uint& count, + const char**& strings, + const size_t*& lengths, + char*& singleString ) +{ + // We don't expect to get here unless we've combined the app's string(s) + // into a single string and computed a hash from it. + CLI_ASSERT( singleString ); + + m_OS.EnterCriticalSection(); + + bool injected = false; + + std::string fileName; + + // Get the dump directory name. + { + OS().GetDumpDirectoryName( sc_DumpDirectoryName, fileName ); + fileName += "/Inject"; + } + + // Make two candidate filenames. They will have the form: + // CLI___source.cl, or + // CLI__source.cl + { + char numberString1[256] = ""; + CLI_SPRINTF( numberString1, 256, "%04u_%08X", + m_ProgramNumber, + (unsigned int)hash ); + + char numberString2[256] = ""; + CLI_SPRINTF( numberString2, 256, "%08X", + (unsigned int)hash ); + + std::string fileName1; + fileName1 = fileName; + fileName1 += "/CLI_"; + fileName1 += numberString1; + fileName1 += "_source.cl"; + + std::string fileName2; + fileName2 = fileName; + fileName2 += "/CLI_"; + fileName2 += numberString2; + fileName2 += "_source.cl"; + + std::ifstream is; + + is.open( + fileName1.c_str(), + std::ios::in | std::ios::binary ); + if( is.good() ) + { + log( "Injecting source file: " + fileName1 + "\n" ); + } + else + { + log( "Injection source file doesn't exist: " + fileName1 + "\n" ); + + is.clear(); + is.open( + fileName2.c_str(), + std::ios::in | std::ios::binary ); + if( is.good() ) + { + log( "Injecting source file: " + fileName2 + "\n" ); + } + else + { + log( "Injection source file doesn't exist: " + fileName2 + "\n" ); + } + } + + if( is.good() ) + { + // The file exists. Figure out how big it is. + size_t filesize = 0; + + is.seekg(0, std::ios::end); + filesize = (size_t)is.tellg(); + is.seekg(0, std::ios::beg); + + char* newSingleString = new char[ filesize + 1 ]; + if( newSingleString ) + { + memset( newSingleString, 0, filesize + 1 ); + + is.read( newSingleString, filesize ); + + delete [] singleString; + + singleString = newSingleString; + count = 1; + strings = ( const char** )&singleString; + lengths = NULL; + + injected = true; + } + + is.close(); + } + } + + m_OS.LeaveCriticalSection(); + return injected; +} + +/////////////////////////////////////////////////////////////////////////////// +// +bool CLIntercept::prependProgramSource( + const uint64_t hash, + cl_uint& count, + const char**& strings, + const size_t*& lengths, + char*& singleString ) +{ + // We don't expect to get here unless we've combined the app's string(s) + // into a single string and computed a hash from it. + CLI_ASSERT( singleString ); + + m_OS.EnterCriticalSection(); + + bool injected = false; + + std::string fileName; + + // Get the dump directory name. + { + OS().GetDumpDirectoryName( sc_DumpDirectoryName, fileName ); + fileName += "/Inject"; + } + + // Make three candidate filenames. They will have the form: + // CLI___prepend.cl, or + // CLI__prepend.cl, or + // CLI_prepend.cl + { + char numberString1[256] = ""; + CLI_SPRINTF( numberString1, 256, "%04u_%08X", + m_ProgramNumber, + (unsigned int)hash ); + + char numberString2[256] = ""; + CLI_SPRINTF( numberString2, 256, "%08X", + (unsigned int)hash ); + + std::string fileName1; + fileName1 = fileName; + fileName1 += "/CLI_"; + fileName1 += numberString1; + fileName1 += "_prepend.cl"; + + std::string fileName2; + fileName2 = fileName; + fileName2 += "/CLI_"; + fileName2 += numberString2; + fileName2 += "_prepend.cl"; + + std::string fileName3; + fileName3 = fileName; + fileName3 += "/CLI_prepend.cl"; + + std::ifstream is; + + is.open( + fileName1.c_str(), + std::ios::in | std::ios::binary ); + if( is.good() ) + { + log( "Prepending source file: " + fileName1 + "\n" ); + } + else + { + log( "Prepend source file doesn't exist: " + fileName1 + "\n" ); + + is.clear(); + is.open( + fileName2.c_str(), + std::ios::in | std::ios::binary ); + if( is.good() ) + { + log( "Prepending source file: " + fileName2 + "\n" ); + } + else + { + log( "Prepend source file doesn't exist: " + fileName2 + "\n" ); + + is.clear(); + is.open( + fileName3.c_str(), + std::ios::in | std::ios::binary ); + if( is.good() ) + { + log( "Prepending source file: " + fileName3 + "\n" ); + } + else + { + log( "Prepend source file doesn't exist: " + fileName3 + "\n" ); + } + } + } + + if( is.good() ) + { + // The file exists. Figure out how big it is. + size_t filesize = 0; + + is.seekg(0, std::ios::end); + filesize = (size_t)is.tellg(); + is.seekg(0, std::ios::beg); + + size_t newSize = + filesize + + strlen(singleString) + + 1; // for the null terminator + + char* newSingleString = new char[ newSize ]; + if( newSingleString ) + { + memset( newSingleString, 0, newSize ); + + is.read( newSingleString, filesize ); + + CLI_STRCAT( newSingleString, newSize, singleString ); + + delete [] singleString; + + singleString = newSingleString; + count = 1; + strings = ( const char** )&singleString; + lengths = NULL; + + injected = true; + } + + is.close(); + } + } + + m_OS.LeaveCriticalSection(); + return injected; +} + +/////////////////////////////////////////////////////////////////////////////// +// +bool CLIntercept::injectProgramSPIRV( + const uint64_t hash, + size_t& length, + const void*& il, + char*& injectedIL ) +{ + m_OS.EnterCriticalSection(); + + bool injected = false; + + std::string fileName; + + // Get the dump directory name. + { + OS().GetDumpDirectoryName( sc_DumpDirectoryName, fileName ); + fileName += "/Inject"; + } + + // Make two candidate filenames. They will have the form: + // CLI___0000.spv, or + // CLI__0000.spv + { + char numberString1[256] = ""; + CLI_SPRINTF( numberString1, 256, "%04u_%08X_0000", + m_ProgramNumber, + (unsigned int)hash ); + + char numberString2[256] = ""; + CLI_SPRINTF( numberString2, 256, "%08X_0000", + (unsigned int)hash ); + + std::string fileName1; + fileName1 = fileName; + fileName1 += "/CLI_"; + fileName1 += numberString1; + fileName1 += ".spv"; + + std::string fileName2; + fileName2 = fileName; + fileName2 += "/CLI_"; + fileName2 += numberString2; + fileName2 += ".spv"; + + std::ifstream is; + + is.open( + fileName1.c_str(), + std::ios::in | std::ios::binary ); + if( is.good() ) + { + log( "Injecting SPIR-V file: " + fileName1 + "\n" ); + } + else + { + log( "Injection SPIR-V file doesn't exist: " + fileName1 + "\n" ); + + is.clear(); + is.open( + fileName2.c_str(), + std::ios::in | std::ios::binary ); + if( is.good() ) + { + log( "Injecting SPIR-V file: " + fileName2 + "\n" ); + } + else + { + log( "Injection SPIR-V file doesn't exist: " + fileName2 + "\n" ); + } + } + + if( is.good() ) + { + // The file exists. Figure out how big it is. + size_t filesize = 0; + + is.seekg(0, std::ios::end); + filesize = (size_t)is.tellg(); + is.seekg(0, std::ios::beg); + + injectedIL = new char[ filesize ]; + if( injectedIL ) + { + is.read( injectedIL, filesize ); + + il = injectedIL; + length = filesize; + + injected = true; + } + + is.close(); + } + } + + m_OS.LeaveCriticalSection(); + return injected; +} + +/////////////////////////////////////////////////////////////////////////////// +// +bool CLIntercept::injectProgramOptions( + const cl_program program, + const char*& options, + char*& newOptions ) +{ + m_OS.EnterCriticalSection(); + + CLI_ASSERT( newOptions == NULL ); + + bool injected = false; + + unsigned int programNumber = m_ProgramNumberMap[ program ]; + uint64_t programHash = m_ProgramHashMap[ program ]; + unsigned int compileCount = m_ProgramNumberCompileCountMap[ programNumber ]; + + std::string fileName; + + // Get the dump directory name. + { + OS().GetDumpDirectoryName( sc_DumpDirectoryName, fileName ); + fileName += "/Inject"; + } + // Make four candidate filenames. They will have the form: + // CLI____options.txt, or + // CLI___options.txt, or + // CLI__options.txt, or + // CLI_options.txt + { + char numberString1[256] = ""; + CLI_SPRINTF( numberString1, 256, "%04u_%08X_%04u", + programNumber, + (unsigned int)programHash, + compileCount ); + + char numberString2[256] = ""; + CLI_SPRINTF( numberString2, 256, "%08X_%04u", + (unsigned int)programHash, + compileCount ); + + char numberString3[256] = ""; + CLI_SPRINTF( numberString3, 256, "%08X", + (unsigned int)programHash ); + + std::string fileName1; + fileName1 = fileName; + fileName1 += "/CLI_"; + fileName1 += numberString1; + fileName1 += "_options.txt"; + + std::string fileName2; + fileName2 = fileName; + fileName2 += "/CLI_"; + fileName2 += numberString2; + fileName2 += "_options.txt"; + + std::string fileName3; + fileName3 = fileName; + fileName3 += "/CLI_"; + fileName3 += numberString3; + fileName3 += "_options.txt"; + + std::string fileName4; + fileName4 = fileName; + fileName4 += "/CLI_options.txt"; + + std::ifstream is; + + is.open( + fileName1.c_str(), + std::ios::in | std::ios::binary ); + if( is.good() ) + { + log( "Injecting options file: " + fileName1 + "\n" ); + } + else + { + log( "Injection options file doesn't exist: " + fileName1 + "\n" ); + + is.clear(); + is.open( + fileName2.c_str(), + std::ios::in | std::ios::binary ); + if( is.good() ) + { + log( "Injecting options file: " + fileName2 + "\n" ); + } + else + { + log( "Injection options file doesn't exist: " + fileName2 + "\n" ); + + is.clear(); + is.open( + fileName3.c_str(), + std::ios::in | std::ios::binary ); + if( is.good() ) + { + log( "Injecting options file: " + fileName3 + "\n" ); + } + else + { + log( "Injection options file doesn't exist: " + fileName3 + "\n" ); + + is.clear(); + is.open( + fileName4.c_str(), + std::ios::in | std::ios::binary ); + if( is.good() ) + { + log( "Injecting options file: " + fileName4 + "\n" ); + } + else + { + log( "Injection options file doesn't exist: " + fileName4 + "\n" ); + } + } + } + } + + if( is.good() ) + { + // The file exists. Figure out how big it is. + size_t filesize = 0; + + is.seekg(0, std::ios::end); + filesize = (size_t)is.tellg(); + is.seekg(0, std::ios::beg); + + newOptions = new char[ filesize + 1 ]; + if( newOptions ) + { + memset( newOptions, 0, filesize + 1 ); + + is.read( newOptions, filesize ); + + options = newOptions; + + injected = true; + } + + is.close(); + } + } + + m_OS.LeaveCriticalSection(); + return injected; +} + +/////////////////////////////////////////////////////////////////////////////// +// +bool CLIntercept::appendBuildOptions( + const char*& options, + char*& newOptions ) +{ + m_OS.EnterCriticalSection(); + + bool modified = false; + + if( options == NULL ) + { + // If the options string does not exist, we can simply point it at the + // options we'd like to "append" to it. We don't need to allocate any + // new memory in this case. We also expect that we haven't allocated + // any new options in this case, because if we did, we would have + // pointed the options string to the new options. + + CLI_ASSERT( newOptions == NULL ); + options = config().AppendBuildOptions.c_str(); + + modified = true; + } + else + { + // If the options string does exist, we have two possibilities: + // Either we've already modified the options so we've already + // allocated new options, or we're still working on the application + // provided options. + + size_t newSize = + strlen(options) + + 1 // for a space + + config().AppendBuildOptions.length() + + 1; // for the null terminator + + char* newNewOptions = new char[ newSize ]; + if( newNewOptions ) + { + memset( newNewOptions, 0, newSize ); + + CLI_STRCAT( newNewOptions, newSize, options ); + CLI_STRCAT( newNewOptions, newSize, " " ); + CLI_STRCAT( newNewOptions, newSize, config().AppendBuildOptions.c_str() ); + + // If we have already allocated new options, we can free them + // now. + if( newOptions ) + { + delete [] newOptions; + newOptions = NULL; + } + + // Either way, the new new options are now the new options. + newOptions = newNewOptions; + options = newOptions; + + modified = true; + } + } + + m_OS.LeaveCriticalSection(); + return modified; +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::dumpProgramSourceScript( + cl_program program, + const char* singleString ) +{ +#if defined(_WIN32) + + m_OS.EnterCriticalSection(); + + CLI_ASSERT( config().DumpProgramSourceScript || config().SimpleDumpProgramSource ); + + char dirname[MAX_PATH] = ""; + char filename[MAX_PATH] = ""; + char filepath[MAX_PATH] = ""; + + if( config().DumpProgramSourceScript ) + { + size_t remaining = MAX_PATH; + + char date[9] = ""; + char time[9] = ""; + char* curPos = NULL; + char* nextToken = NULL; + char* pch = NULL; + + // Directory: + + curPos = dirname; + remaining = MAX_PATH; + memset( curPos, 0, MAX_PATH ); + + _strdate_s( date, 9 ); + _strtime_s( time, 9 ); + + memcpy_s( curPos, remaining, "CLShaderDump_", 14 ); + curPos += 13; + remaining -= 13; + + memcpy_s( curPos, remaining, strtok_s( date, "/", &nextToken ), 2 ); + curPos += 2; + remaining -= 2; + + memcpy_s( curPos, remaining, strtok_s( NULL, "/", &nextToken ), 2 ); + curPos += 2; + remaining -= 2; + + memcpy_s( curPos, remaining, strtok_s( NULL, "/", &nextToken ), 2 ); + curPos += 2; + remaining -= 2; + + ::CreateDirectoryA( dirname, NULL ); + + // File: + + curPos = filename; + remaining = MAX_PATH; + memset( curPos, 0, MAX_PATH ); + + if( GetModuleFileNameA( NULL, filename, MAX_PATH-1 ) == 0 ) + { + CLI_ASSERT( 0 ); + strcpy_s( curPos, remaining, "process.exe" ); + } + + pch = strrchr( filename, '\\' ); + pch++; + memcpy_s( curPos, remaining, pch, strlen( pch ) ); + curPos += strlen( pch ) - 4; // -4 to cut off ".exe" + remaining -= strlen( pch ) - 4; + + memcpy_s( curPos, remaining, "_", 2 ); + curPos += 1; + remaining -= 1; + + memcpy_s( curPos, remaining, strtok_s( time, ":", &nextToken ), 2 ); + curPos += 2; + remaining -= 2; + + memcpy_s( curPos, remaining, strtok_s( NULL, ":", &nextToken ), 2 ); + curPos += 2; + remaining -= 2; + + memcpy_s( curPos, remaining, strtok_s( NULL, ":", &nextToken ), 2 ); + curPos += 2; + remaining -= 2; + + CLI_SPRINTF( curPos, remaining, "_%8.8x", m_ProgramNumber ); + curPos += 9; + remaining -= 9; + } + else + { + CLI_SPRINTF( dirname, MAX_PATH, "." ); + CLI_SPRINTF( filename, MAX_PATH, "kernel" ); + } + + CLI_SPRINTF( filepath, MAX_PATH, "%s/%s.%s", dirname, filename, "cl" ); + + if( singleString ) + { + std::ofstream os; + os.open( + filepath, + std::ios::out | std::ios::binary ); + if( os.good() ) + { + os.write( singleString, strlen( singleString ) ); + os.close(); + } + } + + m_ProgramNumberMap[ program ] = m_ProgramNumber; + m_ProgramNumberCompileCountMap[ m_ProgramNumber ] = 0; + m_ProgramNumber++; + + m_OS.LeaveCriticalSection(); + +#else + CLI_ASSERT( 0 ); +#endif +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::dumpProgramSource( + uint64_t hash, + cl_program program, + const char* singleString ) +{ + m_OS.EnterCriticalSection(); + + CLI_ASSERT( config().DumpProgramSource || config().AutoCreateSPIRV ); + + std::string fileName; + + // Get the dump directory name. + { + OS().GetDumpDirectoryName( sc_DumpDirectoryName, fileName ); + } + // Make the filename. It will have the form: + // CLI___source.cl + { + char numberString[256] = ""; + + if( config().OmitProgramNumber ) + { + CLI_SPRINTF( numberString, 256, "%08X", + (unsigned int)hash ); + } + else + { + CLI_SPRINTF( numberString, 256, "%04u_%08X", + m_ProgramNumber, + (unsigned int)hash ); + } + + fileName += "/CLI_"; + fileName += numberString; + fileName += "_source.cl"; + } + // Now make directories as appropriate. + { + OS().MakeDumpDirectories( fileName ); + } + // Dump the program source to a .cl file. + if( singleString ) + { + std::ofstream os; + os.open( + fileName.c_str(), + std::ios::out | std::ios::binary ); + if( os.good() ) + { + log( "Dumping program to file (inject): " + fileName + "\n" ); + + // don't write the null terminator to the file + os.write( singleString, strlen( singleString ) ); + os.close(); + } + } + + m_ProgramNumberMap[ program ] = m_ProgramNumber; + m_ProgramNumberCompileCountMap[ m_ProgramNumber ] = 0; + m_ProgramNumber++; + + m_OS.LeaveCriticalSection(); +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::dumpInputProgramBinaries( + uint64_t hash, + const cl_program program, + cl_uint num_devices, + const cl_device_id* device_list, + const size_t* lengths, + const unsigned char** binaries ) +{ + m_OS.EnterCriticalSection(); + + CLI_ASSERT( config().DumpInputProgramBinaries ); + + std::string fileName; + + // Get the dump directory name. + { + OS().GetDumpDirectoryName( sc_DumpDirectoryName, fileName ); + } + + // Make the filename. It will have the form: + // CLI__ + // Leave off the extension for now. + { + char numberString[256] = ""; + + if( config().OmitProgramNumber ) + { + CLI_SPRINTF( numberString, 256, "%08X", + (unsigned int)hash ); + } + else + { + CLI_SPRINTF( numberString, 256, "%04u_%08X", + m_ProgramNumber, + (unsigned int)hash ); + } + + fileName += "/CLI_"; + fileName += numberString; + } + // Now make directories as appropriate. + { + OS().MakeDumpDirectories( fileName ); + } + + for( size_t i = 0; i < num_devices; i++ ) + { + cl_device_type deviceType = CL_DEVICE_TYPE_DEFAULT; + + // It's OK if this fails. If it does, it just + // means that our output file won't have a device + // type. + dispatch().clGetDeviceInfo( + device_list[ i ], + CL_DEVICE_TYPE, + sizeof( deviceType ), + &deviceType, + NULL ); + + std::string outputFileName = fileName; + + if( deviceType & CL_DEVICE_TYPE_CPU ) + { + outputFileName += "_CPU"; + } + if( deviceType & CL_DEVICE_TYPE_GPU ) + { + outputFileName += "_GPU"; + } + if( deviceType & CL_DEVICE_TYPE_ACCELERATOR ) + { + outputFileName += "_ACCELERATOR"; + } + if( deviceType & CL_DEVICE_TYPE_CUSTOM ) + { + outputFileName += "_CUSTOM"; + } + + outputFileName += ".bin"; + + std::ofstream os; + os.open( + outputFileName.c_str(), + std::ios::out | std::ios::binary ); + if( os.good() ) + { + log( "Dumping input program binary to file: " + outputFileName + "\n" ); + + os.write( + (const char*)binaries[ i ], + lengths[ i ] ); + os.close(); + } + } + + m_ProgramNumberMap[ program ] = m_ProgramNumber; + m_ProgramNumberCompileCountMap[ m_ProgramNumber ] = 0; + m_ProgramNumber++; + + m_OS.LeaveCriticalSection(); +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::dumpProgramSPIRV( + uint64_t hash, + cl_program program, + const size_t length, + const void* il ) +{ + m_OS.EnterCriticalSection(); + + CLI_ASSERT( config().DumpProgramSPIRV ); + + std::string fileName; + + // Get the dump directory name. + { + OS().GetDumpDirectoryName( sc_DumpDirectoryName, fileName ); + } + + // Make the filename. It will have the form: + // CLI___0000.spv + { + char numberString[256] = ""; + + if( config().OmitProgramNumber ) + { + CLI_SPRINTF( numberString, 256, "%08X_0000", + (unsigned int)hash ); + } + else + { + CLI_SPRINTF( numberString, 256, "%04u_%08X_0000", + m_ProgramNumber, + (unsigned int)hash ); + } + + fileName += "/CLI_"; + fileName += numberString; + fileName += ".spv"; + } + + // Now make directories as appropriate. + { + OS().MakeDumpDirectories( fileName ); + } + + // Dump the program source to a .cl file. + { + std::ofstream os; + os.open( + fileName.c_str(), + std::ios::out | std::ios::binary ); + if( os.good() ) + { + log( "Dumping program to file (inject): " + fileName + "\n" ); + + os.write( (const char*)il, length ); + os.close(); + + // Optionally, run spirv-dis to disassemble the generated module. + if( !config().SPIRVDis.empty() ) + { + std::string command = + config().SPIRVDis + + " -o " + fileName + "t" + + " " + fileName; + + logf( "Running: %s\n", command.c_str() ); + OS().ExecuteCommand( command ); + } + } + } + + m_ProgramNumberMap[ program ] = m_ProgramNumber; + m_ProgramNumberCompileCountMap[ m_ProgramNumber ] = 0; + m_ProgramNumber++; + + m_OS.LeaveCriticalSection(); +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::dumpProgramOptionsScript( + const cl_program program, + const char* options ) +{ +#if defined(_WIN32) + + m_OS.EnterCriticalSection(); + + CLI_ASSERT( config().DumpProgramSource || config().SimpleDumpProgramSource ); + + unsigned int programNumber = m_ProgramNumberMap[ program ]; + + if( options ) + { + char dirname[MAX_PATH] = ""; + char filename[MAX_PATH] = ""; + char filepath[MAX_PATH] = ""; + + if( config().DumpProgramSourceScript ) + { + size_t remaining = MAX_PATH; + + char date[9] = ""; + char time[9] = ""; + char* curPos = NULL; + char* nextToken = NULL; + char* pch = NULL; + + // Directory: + + curPos = dirname; + remaining = MAX_PATH; + memset( curPos, 0, MAX_PATH ); + + _strdate_s( date, 9 ); + _strtime_s( time, 9 ); + + memcpy_s( curPos, remaining, "CLShaderDump_", 14 ); + curPos += 13; + remaining -= 13; + + memcpy_s( curPos, remaining, strtok_s( date, "/", &nextToken ), 2 ); + curPos += 2; + remaining -= 2; + + memcpy_s( curPos, remaining, strtok_s( NULL, "/", &nextToken ), 2 ); + curPos += 2; + remaining -= 2; + + memcpy_s( curPos, remaining, strtok_s( NULL, "/", &nextToken ), 2 ); + curPos += 2; + remaining -= 2; + + ::CreateDirectoryA( dirname, NULL ); + + // File: + + curPos = filename; + remaining = MAX_PATH; + memset( curPos, 0, MAX_PATH ); + + if( GetModuleFileNameA( NULL, filename, MAX_PATH-1 ) == 0 ) + { + CLI_ASSERT( 0 ); + strcpy_s( curPos, remaining, "process.exe" ); + } + + pch = strrchr( filename, '\\' ); + pch++; + memcpy_s( curPos, remaining, pch, strlen( pch ) ); + curPos += strlen( pch ) - 4; // -4 to cut off ".exe" + remaining -= strlen( pch ) - 4; + + memcpy_s( curPos, remaining, "_", 2 ); + curPos += 1; + remaining -= 1; + + memcpy_s( curPos, remaining, strtok_s( time, ":", &nextToken ), 2 ); + curPos += 2; + remaining -= 2; + + memcpy_s( curPos, remaining, strtok_s( NULL, ":", &nextToken ), 2 ); + curPos += 2; + remaining -= 2; + + memcpy_s( curPos, remaining, strtok_s( NULL, ":", &nextToken ), 2 ); + curPos += 2; + remaining -= 2; + + CLI_SPRINTF( curPos, remaining, "_%8.8x", programNumber ); + curPos += 9; + remaining -= 9; + } + else + { + CLI_SPRINTF( dirname, MAX_PATH, "." ); + CLI_SPRINTF( filename, MAX_PATH, "kernel" ); + } + + CLI_SPRINTF( filepath, MAX_PATH, "%s/%s.%s", dirname, filename, "txt" ); + + std::ofstream os; + os.open( + filepath, + std::ios::out | std::ios::binary ); + if( os.good() ) + { + os.write( options, strlen( options ) ); + os.close(); + } + } + + m_OS.LeaveCriticalSection(); + +#else + CLI_ASSERT( 0 ); +#endif +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::dumpProgramOptions( + const cl_program program, + const char* options ) +{ + m_OS.EnterCriticalSection(); + + CLI_ASSERT( config().DumpProgramSource || config().DumpProgramBinaries || config().DumpProgramSPIRV ); + + unsigned int programNumber = m_ProgramNumberMap[ program ]; + uint64_t programHash = m_ProgramHashMap[ program ]; + unsigned int compileCount = m_ProgramNumberCompileCountMap[ programNumber ]; + + if( options ) + { + std::string fileName; + + // Get the dump directory name. + { + OS().GetDumpDirectoryName( sc_DumpDirectoryName, fileName ); + } + // Make the filename. It will have the form: + // CLI___ + // Leave off the extension for now. + { + char numberString[256] = ""; + + if( config().OmitProgramNumber ) + { + CLI_SPRINTF( numberString, 256, "%08X_%04u", + (unsigned int)programHash, + compileCount ); + } + else + { + CLI_SPRINTF( numberString, 256, "%04u_%08X_%04u", + programNumber, + (unsigned int)programHash, + compileCount ); + } + + fileName += "/CLI_"; + fileName += numberString; + } + // Dump the program source to a .txt file. + { + fileName += "_options.txt"; + std::ofstream os; + os.open( + fileName.c_str(), + std::ios::out | std::ios::binary ); + if( os.good() ) + { + log( "Dumping program options to file (inject): " + fileName + "\n" ); + + // don't write the null terminator to the file + os.write( options, strlen( options) ); + os.close(); + } + } + } + + m_OS.LeaveCriticalSection(); +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::dumpProgramBuildLog( + const cl_program program, + const cl_device_id device, + const char* buildLog, + const size_t buildLogSize ) +{ + // We're already in a critical section when we get here, so we don't need to + // grab the critical section again. + + CLI_ASSERT( config().DumpProgramBuildLogs ); + CLI_ASSERT( buildLog ); + + unsigned int programNumber = m_ProgramNumberMap[ program ]; + uint64_t programHash = m_ProgramHashMap[ program ]; + unsigned int compileCount = m_ProgramNumberCompileCountMap[ programNumber ]; + + std::string fileName; + + // Get the dump directory name. + { + OS().GetDumpDirectoryName( sc_DumpDirectoryName, fileName ); + } + // Make the filename. It will have the form: + // CLI___ + // Leave off the extension for now. + { + char numberString[256] = ""; + + if( config().OmitProgramNumber ) + { + CLI_SPRINTF( numberString, 256, "%08X_%04u", + (unsigned int)programHash, + compileCount ); + } + else + { + CLI_SPRINTF( numberString, 256, "%04u_%08X_%04u", + programNumber, + (unsigned int)programHash, + compileCount ); + } + + fileName += "/CLI_"; + fileName += numberString; + } + // Now make directories as appropriate. + { + OS().MakeDumpDirectories( fileName ); + } + + cl_device_type deviceType = CL_DEVICE_TYPE_DEFAULT; + + // It's OK if this fails. If it does, it just + // means that our output file won't have a device + // type. + dispatch().clGetDeviceInfo( + device, + CL_DEVICE_TYPE, + sizeof( deviceType ), + &deviceType, + NULL ); + + if( deviceType & CL_DEVICE_TYPE_CPU ) + { + fileName += "_CPU"; + } + if( deviceType & CL_DEVICE_TYPE_GPU ) + { + fileName += "_GPU"; + } + if( deviceType & CL_DEVICE_TYPE_ACCELERATOR ) + { + fileName += "_ACCELERATOR"; + } + if( deviceType & CL_DEVICE_TYPE_CUSTOM ) + { + fileName += "_CUSTOM"; + } + + fileName += "_build_log.txt"; + + std::ofstream os; + os.open( + fileName.c_str(), + std::ios::out | std::ios::binary ); + if( os.good() ) + { + log( "Dumping build log to file: " + fileName + "\n" ); + + os.write( + buildLog, + buildLogSize ); + os.close(); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::updateHostTimingStats( + const std::string& functionName, + cl_kernel kernel, + uint64_t start, + uint64_t end ) +{ + m_OS.EnterCriticalSection(); + + std::string key( functionName ); + if( kernel ) + { + const std::string& kernelName = m_KernelNameMap[ kernel ]; + key += "( "; + key += kernelName; + key += " )"; + } + + SCpuTimingStats* pCpuTimingStats = m_CpuTimingStatsMap[ key ]; + if( pCpuTimingStats == NULL ) + { + pCpuTimingStats = new SCpuTimingStats; + if( pCpuTimingStats == NULL ) + { + // Memory allocation failure. + } + else + { + pCpuTimingStats->NumberOfCalls = 0; + pCpuTimingStats->TotalTicks = 0; + pCpuTimingStats->MinTicks = UINT_MAX; + pCpuTimingStats->MaxTicks = 0; + + m_CpuTimingStatsMap[ key ] = pCpuTimingStats; + } + } + + uint64_t numberOfCalls = 0; + uint64_t tickDelta = end - start; + + if( pCpuTimingStats != NULL ) + { + pCpuTimingStats->NumberOfCalls++; + pCpuTimingStats->TotalTicks += tickDelta; + pCpuTimingStats->MinTicks = std::min< uint64_t >( pCpuTimingStats->MinTicks, tickDelta ); + pCpuTimingStats->MaxTicks = std::max< uint64_t >( pCpuTimingStats->MaxTicks, tickDelta ); + + numberOfCalls = pCpuTimingStats->NumberOfCalls; + } + + if( config().HostPerformanceTimeLogging ) + { + uint64_t nsDelta = OS().TickToNS( tickDelta ); + logf( "Host Time for call %u: %s = %u\n", + (unsigned int)numberOfCalls, + key.c_str(), + (unsigned int)nsDelta ); + } + + m_OS.LeaveCriticalSection(); +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::modifyCommandQueueProperties( + cl_command_queue_properties& props ) const +{ + if( config().DevicePerformanceTiming || + config().ITTPerformanceTiming || + config().ChromePerformanceTiming || + config().SIMDSurvey || + !config().DevicePerfCounterCustom.empty() ) + { + props |= (cl_command_queue_properties)CL_QUEUE_PROFILING_ENABLE; + } + if( config().InOrderQueue ) + { + props &= ~(cl_command_queue_properties)CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE; + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::createCommandQueueOverrideInit( + const cl_queue_properties* properties, + cl_queue_properties*& pLocalQueueProperties ) const +{ + // We want to add command queue properties, unless command queue + // properties already exist (requesting the same property twice is an + // error). So, look through the queue properties for the command queue + // properties enum. We need to do this anyways to count the number of + // property pairs. + bool foundCommandQueuePropertiesEnum = false; + int numProperties = 0; + if( properties ) + { + while( properties[ numProperties ] != 0 ) + { + if( properties[ numProperties ] == CL_QUEUE_PROPERTIES ) + { + foundCommandQueuePropertiesEnum = true; + } + numProperties += 2; + } + } + + if( foundCommandQueuePropertiesEnum == false ) + { + // The performance hint property isn't already set, so we'll + // need to allocate an extra pair of properties for it. + numProperties += 2; + } + + // Allocate a new array of properties. We need to allocate two + // properties for each pair, plus one property for the terminating + // zero. + pLocalQueueProperties = new cl_queue_properties[ numProperties + 1 ]; + if( pLocalQueueProperties ) + { + // Copy the old properties array to the new properties array, + // if the new properties array exists. + numProperties = 0; + if( properties ) + { + while( properties[ numProperties ] != 0 ) + { + pLocalQueueProperties[ numProperties ] = properties[ numProperties ]; + if( properties[ numProperties ] == CL_QUEUE_PROPERTIES ) + { + CLI_ASSERT( foundCommandQueuePropertiesEnum ); + + cl_command_queue_properties props = properties[ numProperties + 1 ]; + + modifyCommandQueueProperties( props ); + + pLocalQueueProperties[ numProperties + 1 ] = props; + } + else + { + pLocalQueueProperties[ numProperties + 1 ] = + properties[ numProperties + 1 ]; + } + numProperties += 2; + } + } + // Add command queue properties if they aren't already set. + if( foundCommandQueuePropertiesEnum == false ) + { + cl_command_queue_properties props = 0; + + modifyCommandQueueProperties( props ); + + pLocalQueueProperties[ numProperties ] = CL_QUEUE_PROPERTIES; + pLocalQueueProperties[ numProperties + 1 ] = props; + numProperties += 2; + } + // Add the terminating zero. + pLocalQueueProperties[ numProperties ] = 0; + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::createCommandQueueOverrideCleanup( + cl_queue_properties*& pLocalQueueProperties ) const +{ + if( pLocalQueueProperties ) + { + delete pLocalQueueProperties; + pLocalQueueProperties = NULL; + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::addTimingEvent( + const std::string& functionName, + const uint64_t queuedTime, + const cl_kernel kernel, + const cl_uint workDim, + const size_t* gws, + const size_t* lws, + cl_event event ) +{ + m_OS.EnterCriticalSection(); + + SEventListNode* pNode = new SEventListNode; + if( pNode ) + { + pNode->FunctionName = functionName; + if( kernel ) + { + pNode->KernelName = m_KernelNameMap[ kernel ]; + + if( config().DevicePerformanceTimeHashTracking ) + { + cl_program program = NULL; + dispatch().clGetKernelInfo( + kernel, + CL_KERNEL_PROGRAM, + sizeof(program), + &program, + NULL ); + if( program ) + { + unsigned int programNumber = m_ProgramNumberMap[ program ]; + uint64_t programHash = m_ProgramHashMap[ program ]; + unsigned int compileCount = m_ProgramNumberCompileCountMap[ programNumber ]; + + char hashString[256] = ""; + if( config().OmitProgramNumber ) + { + CLI_SPRINTF( hashString, 256, "(%08X_%04u)", + (unsigned int)programHash, + compileCount ); + } + else + { + CLI_SPRINTF( hashString, 256, "(%04u_%08X_%04u)", + programNumber, + (unsigned int)programHash, + compileCount ); + } + pNode->KernelName += hashString; + } + } + + if( config().DevicePerformanceTimeKernelInfoTracking ) + { + cl_command_queue queue = NULL; + dispatch().clGetEventInfo( + event, + CL_EVENT_COMMAND_QUEUE, + sizeof(queue), + &queue, + NULL ); + if( queue ) + { + cl_device_id device = NULL; + dispatch().clGetCommandQueueInfo( + queue, + CL_QUEUE_DEVICE, + sizeof(device), + &device, + NULL ); + if( device ) + { + std::ostringstream ss; + { + size_t pwgsm = 0; + dispatch().clGetKernelWorkGroupInfo( + kernel, + device, + CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, + sizeof(pwgsm), + &pwgsm, + NULL ); + if( pwgsm ) + { + ss << " SIMD" << (unsigned int)pwgsm; + } + } + { + cl_ulong slm = 0; + dispatch().clGetKernelWorkGroupInfo( + kernel, + device, + CL_KERNEL_LOCAL_MEM_SIZE, + sizeof(slm), + &slm, + NULL ); + if( slm ) + { + ss << " SLM=" << (unsigned int)slm; + } + } + { + cl_ulong tpm = 0; + dispatch().clGetKernelWorkGroupInfo( + kernel, + device, + CL_KERNEL_PRIVATE_MEM_SIZE, + sizeof(tpm), + &tpm, + NULL ); + if( tpm ) + { + ss << " TPM=" << (unsigned int)tpm; + } + } + { + cl_ulong spill = 0; + dispatch().clGetKernelWorkGroupInfo( + kernel, + device, + CL_KERNEL_SPILL_MEM_SIZE_INTEL, + sizeof(spill), + &spill, + NULL ); + if( spill ) + { + ss << " SPILL=" << (unsigned int)spill; + } + } + pNode->KernelName += ss.str(); + } + } + } + + if( config().DevicePerformanceTimeGWSTracking && gws ) + { + std::ostringstream ss; + ss << " GWS[ "; + if( workDim >= 1 ) + { + ss << gws[0]; + } + if( workDim >= 2 ) + { + ss << " x " << gws[1]; + } + if( workDim >= 3 ) + { + ss << " x " << gws[2]; + } + ss << " ]"; + pNode->KernelName += ss.str(); + } + + if( config().DevicePerformanceTimeLWSTracking ) + { + std::ostringstream ss; + ss << " LWS[ "; + if( lws ) + { + if( workDim >= 1 ) + { + ss << lws[0]; + } + if( workDim >= 2 ) + { + ss << " x " << lws[1]; + } + if( workDim >= 3 ) + { + ss << " x " << lws[2]; + } + } + else + { + ss << "NULL"; + } + ss << " ]"; + pNode->KernelName += ss.str(); + } + } + pNode->QueuedTime = queuedTime; + pNode->Kernel = kernel; // Note: no retain, so cannot count on this value... + pNode->Event = event; + + m_EventList.push_back( pNode ); + } + + m_OS.LeaveCriticalSection(); +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::checkTimingEvents() +{ + m_OS.EnterCriticalSection(); + + CEventList::iterator current = m_EventList.begin(); + CEventList::iterator next; + + while( current != m_EventList.end() ) + { + cl_int errorCode = CL_SUCCESS; + cl_int eventStatus = 0; + + next = current; + ++next; + + SEventListNode* pNode = *current; + + errorCode = dispatch().clGetEventInfo( + pNode->Event, + CL_EVENT_COMMAND_EXECUTION_STATUS, + sizeof( eventStatus ), + &eventStatus, + NULL ); + + switch( errorCode ) + { + case CL_SUCCESS: + if( eventStatus == CL_COMPLETE ) + { + if( config().DevicePerformanceTiming || + config().ITTPerformanceTiming || + config().ChromePerformanceTiming || + config().SIMDSurvey ) + { + cl_ulong commandQueued = 0; + cl_ulong commandSubmit = 0; + cl_ulong commandStart = 0; + cl_ulong commandEnd = 0; + + uint64_t numberOfCalls = 0; + + errorCode |= dispatch().clGetEventProfilingInfo( + pNode->Event, + CL_PROFILING_COMMAND_QUEUED, + sizeof( commandQueued ), + &commandQueued, + NULL ); + errorCode |= dispatch().clGetEventProfilingInfo( + pNode->Event, + CL_PROFILING_COMMAND_SUBMIT, + sizeof( commandSubmit ), + &commandSubmit, + NULL ); + errorCode |= dispatch().clGetEventProfilingInfo( + pNode->Event, + CL_PROFILING_COMMAND_START, + sizeof( commandStart ), + &commandStart, + NULL ); + errorCode |= dispatch().clGetEventProfilingInfo( + pNode->Event, + CL_PROFILING_COMMAND_END, + sizeof( commandEnd ), + &commandEnd, + NULL ); + if( errorCode == CL_SUCCESS ) + { + cl_ulong delta = commandEnd - commandStart; + + const std::string& key = + pNode->KernelName.empty() ? + pNode->FunctionName : + pNode->KernelName; + + SDeviceTimingStats* pDeviceTimingStats = m_DeviceTimingStatsMap[ key ]; + if( pDeviceTimingStats == NULL ) + { + pDeviceTimingStats = new SDeviceTimingStats; + if( pDeviceTimingStats == NULL ) + { + // Memory allocation failure. + } + else + { + pDeviceTimingStats->NumberOfCalls = 0; + pDeviceTimingStats->TotalNS = 0; + pDeviceTimingStats->MinNS = CL_ULONG_MAX; + pDeviceTimingStats->MaxNS = 0; + + m_DeviceTimingStatsMap[ key ] = pDeviceTimingStats; + } + } + + if( pDeviceTimingStats != NULL ) + { + pDeviceTimingStats->NumberOfCalls++; + pDeviceTimingStats->TotalNS += delta; + pDeviceTimingStats->MinNS = std::min< cl_ulong >( pDeviceTimingStats->MinNS, delta ); + pDeviceTimingStats->MaxNS = std::max< cl_ulong >( pDeviceTimingStats->MaxNS, delta ); + + numberOfCalls = pDeviceTimingStats->NumberOfCalls; + } + + if( config().DevicePerformanceTimeLogging ) + { + cl_ulong queuedDelta = commandSubmit - commandQueued; + cl_ulong submitDelta = commandStart - commandSubmit; + + logf( "Device Time for call %u to %s = %u ns (queued -> submit), %u ns (submit -> start), %u ns (start -> end)\n", + (cl_uint)numberOfCalls, + key.c_str(), + (cl_uint)queuedDelta, + (cl_uint)submitDelta, + (cl_uint)delta ); + } + + if( config().DevicePerformanceTimelineLogging ) + { + logf( "Device Timeline for call %u to %s = %lu ns (queued), %lu ns (submit), %lu ns (start), %lu ns (end)\n", + (cl_uint)numberOfCalls, + key.c_str(), + commandQueued, + commandSubmit, + commandStart, + commandEnd ); + } + + if( config().SIMDSurvey && + pNode->Kernel ) + { + SSIMDSurveyKernel* pSIMDSurveyKernel = + m_SIMDSurveyKernelMap[ pNode->Kernel ]; + if( pSIMDSurveyKernel ) + { + if( pNode->Kernel == pSIMDSurveyKernel->SIMD8Kernel && + pSIMDSurveyKernel->SIMD8ExecutionTimeNS > delta ) + { + pSIMDSurveyKernel->SIMD8ExecutionTimeNS = delta; + logf( "SIMD Survey: Results: New min SIMD8 Time for kernel %s is: %lu\n", + pNode->KernelName.c_str(), + pSIMDSurveyKernel->SIMD8ExecutionTimeNS ); + } + if( pNode->Kernel == pSIMDSurveyKernel->SIMD16Kernel && + pSIMDSurveyKernel->SIMD16ExecutionTimeNS > delta ) + { + pSIMDSurveyKernel->SIMD16ExecutionTimeNS = delta; + logf( "SIMD Survey: Results: New min SIMD16 Time for kernel %s is: %lu\n", + pNode->KernelName.c_str(), + pSIMDSurveyKernel->SIMD16ExecutionTimeNS ); + } + if( pNode->Kernel == pSIMDSurveyKernel->SIMD32Kernel && + pSIMDSurveyKernel->SIMD32ExecutionTimeNS > delta ) + { + pSIMDSurveyKernel->SIMD32ExecutionTimeNS = delta; + logf( "SIMD Survey: Results: New min SIMD32 Time for kernel %s is: %lu\n", + pNode->KernelName.c_str(), + pSIMDSurveyKernel->SIMD32ExecutionTimeNS ); + } + if( pNode->Kernel != pSIMDSurveyKernel->SIMD8Kernel && + pNode->Kernel != pSIMDSurveyKernel->SIMD16Kernel && + pNode->Kernel != pSIMDSurveyKernel->SIMD32Kernel ) + { + logf( "SIMD Survey: Results: Default Time for kernel %s is: %lu\n", + pNode->KernelName.c_str(), + delta ); + } + } + else + { + logf( "SIMD Survey: Results: Don't have any information kernel %p!?!?\n", + pNode->Kernel ); + } + } + } + } + +#if defined(USE_ITT) + if( config().ITTPerformanceTiming ) + { + const std::string& name = + pNode->KernelName.empty() ? + pNode->FunctionName : + pNode->KernelName; + + ittTraceEvent( + name, + pNode->Event, + pNode->QueuedTime ); + } +#endif + + if( config().ChromePerformanceTiming ) + { + const std::string& name = + pNode->KernelName.empty() ? + pNode->FunctionName : + pNode->KernelName; + + chromeTraceEvent( + name, + pNode->Event, + pNode->QueuedTime ); + } + +#if defined(USE_MDAPI) + if( !config().DevicePerfCounterCustom.empty() ) + { + const std::string& name = + pNode->KernelName.empty() ? + pNode->FunctionName : + pNode->KernelName; + + saveMDAPICounters( + name, + pNode->Event ); + } +#endif + + dispatch().clReleaseEvent( pNode->Event ); + delete pNode; + + m_EventList.erase( current ); + } + break; + case CL_INVALID_EVENT: + { + // This is unexpected. We retained the event when we + // added it to the list. Remove the event from the + // list. + logf( "Unexpectedly got CL_INVALID_EVENT for an event from %s!\n", + pNode->FunctionName.c_str() ); + + delete pNode; + + m_EventList.erase( current ); + } + break; + default: + // nothing + break; + } + + current = next; + } + + m_OS.LeaveCriticalSection(); +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::addKernelName( + cl_kernel kernel, + const std::string& kernelName ) +{ + m_OS.EnterCriticalSection(); + + m_KernelNameMap[ kernel ] = kernelName; + + m_OS.LeaveCriticalSection(); +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::addKernelNames( + cl_kernel* kernels, + cl_uint numKernels ) +{ + m_OS.EnterCriticalSection(); + + while( numKernels-- ) + { + cl_kernel kernel = kernels[ numKernels ]; + char* kernelName = NULL; + size_t kernelNameSize = 0; + cl_int errorCode = CL_SUCCESS; + + errorCode = dispatch().clGetKernelInfo( + kernel, + CL_KERNEL_FUNCTION_NAME, + 0, + NULL, + &kernelNameSize ); + if( errorCode == CL_SUCCESS ) + { + kernelName = new char[ kernelNameSize + 1 ]; + if( kernelName ) + { + errorCode = dispatch().clGetKernelInfo( + kernel, + CL_KERNEL_FUNCTION_NAME, + kernelNameSize, + kernelName, + NULL ); + if( errorCode == CL_SUCCESS ) + { + kernelName[ kernelNameSize ] = 0; + m_KernelNameMap[ kernel ] = kernelName; + } + + delete [] kernelName; + } + } + } + + m_OS.LeaveCriticalSection(); +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::removeKernel( + cl_kernel kernel ) +{ + m_OS.EnterCriticalSection(); + + cl_uint refCount = 0; + cl_int errorCode = CL_SUCCESS; + + errorCode = dispatch().clGetKernelInfo( + kernel, + CL_KERNEL_REFERENCE_COUNT, + sizeof( refCount ), + &refCount, + NULL ); + if( errorCode == CL_SUCCESS ) + { + if( refCount == 1 ) + { + m_KernelNameMap.erase( kernel ); + + SSIMDSurveyKernel* pSIMDSurveyKernel = + m_SIMDSurveyKernelMap[ kernel ]; + if( pSIMDSurveyKernel ) + { + errorCode = dispatch().clReleaseKernel( pSIMDSurveyKernel->SIMD8Kernel ); + errorCode = dispatch().clReleaseKernel( pSIMDSurveyKernel->SIMD16Kernel ); + errorCode = dispatch().clReleaseKernel( pSIMDSurveyKernel->SIMD32Kernel ); + + // Remove the parent kernel and each of the child kernels from the map. + m_SIMDSurveyKernelMap.erase( kernel ); + + m_SIMDSurveyKernelMap.erase( pSIMDSurveyKernel->SIMD8Kernel ); + m_SIMDSurveyKernelMap.erase( pSIMDSurveyKernel->SIMD16Kernel ); + m_SIMDSurveyKernelMap.erase( pSIMDSurveyKernel->SIMD32Kernel ); + + // Also clean up the kernel name map. + m_KernelNameMap.erase( pSIMDSurveyKernel->SIMD8Kernel ); + m_KernelNameMap.erase( pSIMDSurveyKernel->SIMD16Kernel ); + m_KernelNameMap.erase( pSIMDSurveyKernel->SIMD32Kernel ); + + // Done! + delete pSIMDSurveyKernel; + pSIMDSurveyKernel = NULL; + } + } + } + + m_OS.LeaveCriticalSection(); +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::addBuffer( + cl_mem buffer ) +{ + if( buffer ) + { + m_OS.EnterCriticalSection(); + + cl_int errorCode = CL_SUCCESS; + size_t size = 0; + + errorCode |= dispatch().clGetMemObjectInfo( + buffer, + CL_MEM_SIZE, + sizeof( size_t ), + &size, + NULL ); + + if( errorCode == CL_SUCCESS ) + { + m_MemAllocNumberMap[ buffer ] = m_MemAllocNumber; + m_BufferInfoMap[ buffer ] = size; + m_MemAllocNumber++; + } + + m_OS.LeaveCriticalSection(); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::addSampler( + cl_sampler sampler, + const std::string& str ) +{ + if( sampler ) + { + m_OS.EnterCriticalSection(); + m_SamplerDataMap[sampler] = str; + m_OS.LeaveCriticalSection(); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::removeSampler( + cl_sampler sampler ) +{ + if( sampler ) + { + m_OS.EnterCriticalSection(); + + CSamplerDataMap::iterator iter = m_SamplerDataMap.find( sampler ); + if( iter != m_SamplerDataMap.end() ) + { + m_SamplerDataMap.erase( iter ); + } + + m_OS.LeaveCriticalSection(); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +bool CLIntercept::getSampler( + size_t size, + const void *arg_value, + std::string& str ) const +{ + bool found = false; + + if( ( arg_value != NULL ) && ( size == sizeof( cl_sampler ) ) ) + { + const cl_sampler sampler = *(const cl_sampler *)arg_value; + + CSamplerDataMap::const_iterator iter = m_SamplerDataMap.find( sampler ); + if( iter != m_SamplerDataMap.end() ) + { + str = iter->second; + found = true; + } + } + + return found; +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::dumpArgument( + cl_kernel kernel, + cl_int arg_index, + size_t size, + const void *pBuffer ) +{ + if ( kernel ) + { + m_OS.EnterCriticalSection(); + + std::string fileName = ""; + + // Get the dump directory name. + { + OS().GetDumpDirectoryName( sc_DumpDirectoryName, fileName ); + fileName += "/SetKernelArg/"; + } + + // Now make directories as appropriate. + { + OS().MakeDumpDirectories( fileName ); + } + + // Add the enqueue count to file name + { + char enqueueCount[ MAX_PATH ]; + + CLI_SPRINTF( enqueueCount, MAX_PATH, "%04u", + (unsigned int)m_EnqueueCounter ); + fileName += "SetKernelArg_"; + fileName += enqueueCount; + } + + // Add the kernel name to the filename + { + fileName += "_Kernel_"; + fileName += m_KernelNameMap[ kernel ]; + } + + // Add the arg number to the file name + { + char argName[ MAX_PATH ]; + + CLI_SPRINTF( argName, MAX_PATH, "%d", arg_index ); + + fileName += "_Arg_"; + fileName += argName; + } + + // Add extension to file name + { + fileName += ".bin"; + } + + // Dump the buffer contents to the file. + { + if( pBuffer != NULL) + { + std::ofstream os; + os.open( + fileName.c_str(), + std::ios_base::out | std::ios_base::binary ); + + if( os.good() ) + { + os.write( (const char *)pBuffer, size ); + os.close(); + } + } + } + + m_OS.LeaveCriticalSection(); + } +} +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::addImage( + cl_mem image ) +{ + if( image ) + { + m_OS.EnterCriticalSection(); + + cl_int errorCode = CL_SUCCESS; + + size_t width = 0; + size_t height = 0; + size_t depth = 0; + size_t arraySize = 0; + size_t elementSize = 0; + + errorCode |= dispatch().clGetImageInfo( + image, + CL_IMAGE_WIDTH, + sizeof(width), + &width, + NULL ); + errorCode |= dispatch().clGetImageInfo( + image, + CL_IMAGE_HEIGHT, + sizeof(height), + &height, + NULL ); + errorCode |= dispatch().clGetImageInfo( + image, + CL_IMAGE_DEPTH, + sizeof(depth), + &depth, + NULL ); + errorCode |= dispatch().clGetImageInfo( + image, + CL_IMAGE_ARRAY_SIZE, + sizeof(arraySize), + &arraySize, + NULL ); + errorCode |= dispatch().clGetImageInfo( + image, + CL_IMAGE_ELEMENT_SIZE, + sizeof(elementSize), + &elementSize, + NULL ); + + if( errorCode == CL_SUCCESS ) + { + SImageInfo imageInfo; + + imageInfo.Region[0] = width; + if( height == 0 ) + { + if( arraySize == 0 ) + { + imageInfo.Region[1] = 1; // 1D iamge + } + else + { + imageInfo.Region[1] = arraySize; // 1D image array + } + } + else + { + imageInfo.Region[1] = height; // 2D image, 3D image, or 3D image array + } + + if( depth == 0 ) + { + if( arraySize == 0 ) + { + imageInfo.Region[2] = 1; // 2D image + } + else + { + imageInfo.Region[2] = arraySize; // 2D image array + } + } + else + { + imageInfo.Region[2] = depth; // 3D image + } + + imageInfo.ElementSize = elementSize; + + m_MemAllocNumberMap[ image ] = m_MemAllocNumber; + m_ImageInfoMap[ image ] = imageInfo; + m_MemAllocNumber++; + } + + m_OS.LeaveCriticalSection(); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::removeMemObj( + cl_mem memobj ) +{ + m_OS.EnterCriticalSection(); + + cl_uint refCount = 0; + cl_int errorCode = CL_SUCCESS; + + errorCode = dispatch().clGetMemObjectInfo( + memobj, + CL_MEM_REFERENCE_COUNT, + sizeof( refCount ), + &refCount, + NULL ); + if( errorCode == CL_SUCCESS ) + { + if( refCount == 1 ) + { + m_MemAllocNumberMap.erase( memobj ); + m_BufferInfoMap.erase( memobj ); + m_ImageInfoMap.erase( memobj ); + } + } + + m_OS.LeaveCriticalSection(); +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::addSVMAllocation( + void* svmPtr, + size_t size ) +{ + if( svmPtr ) + { + m_OS.EnterCriticalSection(); + + m_MemAllocNumberMap[ svmPtr ] = m_MemAllocNumber; + m_SVMAllocInfoMap[ svmPtr ] = size; + m_MemAllocNumber++; + + m_OS.LeaveCriticalSection(); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::removeSVMAllocation( + void* svmPtr ) +{ + m_OS.EnterCriticalSection(); + + m_MemAllocNumberMap.erase( svmPtr ); + m_SVMAllocInfoMap.erase( svmPtr ); + + m_OS.LeaveCriticalSection(); +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::setKernelArg( + cl_kernel kernel, + cl_uint arg_index, + cl_mem memobj ) +{ + m_OS.EnterCriticalSection(); + + if( m_MemAllocNumberMap.find( memobj ) != m_MemAllocNumberMap.end() ) + { + CKernelArgMemMap& kernelArgMap = m_KernelArgMap[ kernel ]; + kernelArgMap[ arg_index ] = memobj; + } + + m_OS.LeaveCriticalSection(); +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::setKernelArgSVMPointer( + cl_kernel kernel, + cl_uint arg_index, + const void* arg ) +{ + m_OS.EnterCriticalSection(); + + // Unlike clSetKernelArg(), which must pass a cl_mem, clSetKernelArgSVMPointer + // can pass a pointer to the base of a SVM allocation or anywhere inside of + // an SVM allocation. As a result, we may need to search the SVM map to find + // the base address and size of the SVM allocation. Still, try to just lookup + // the SVM allocation in the map, just in case the app sets the base address + // (this may be the common case?). + + CKernelArgMemMap& kernelArgMap = m_KernelArgMap[ kernel ]; + + if( m_SVMAllocInfoMap.find( arg ) != m_SVMAllocInfoMap.end() ) + { + // Got it, the pointer was the base address of an SVM allocation. + kernelArgMap[ arg_index ] = arg; + } + else + { + intptr_t iarg = (intptr_t)arg; + for( CSVMAllocInfoMap::iterator i = m_SVMAllocInfoMap.begin(); + i != m_SVMAllocInfoMap.end(); + ++i ) + { + const void* ptr = (*i).first; + size_t size = (*i).second; + + intptr_t start = (intptr_t)ptr; + intptr_t end = start + size; + if( start <= iarg && + iarg < end ) + { + kernelArgMap[ arg_index ] = ptr; + break; + } + } + } + + m_OS.LeaveCriticalSection(); +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::dumpBuffersForKernel( + const std::string& name, + cl_kernel kernel, + cl_command_queue command_queue ) +{ + m_OS.EnterCriticalSection(); + + std::string fileNamePrefix = ""; + + // Get the dump directory name. + { + OS().GetDumpDirectoryName( sc_DumpDirectoryName, fileNamePrefix ); + fileNamePrefix += "/memDump"; + fileNamePrefix += name; + fileNamePrefix += "Enqueue/"; + } + + // Now make directories as appropriate. + { + OS().MakeDumpDirectories( fileNamePrefix ); + } + + CKernelArgMemMap& kernelArgMemMap = m_KernelArgMap[ kernel ]; + CKernelArgMemMap::iterator i = kernelArgMemMap.begin(); + while( i != kernelArgMemMap.end() ) + { + CLI_C_ASSERT( sizeof(void*) == sizeof(cl_mem) ); + cl_uint arg_index = (*i).first; + void* allocation = (void*)(*i).second; + cl_mem memobj = (cl_mem)allocation; + ++i; + if( ( m_SVMAllocInfoMap.find( allocation ) != m_SVMAllocInfoMap.end() ) || + ( m_BufferInfoMap.find( memobj ) != m_BufferInfoMap.end() ) ) + { + unsigned int number = m_MemAllocNumberMap[ memobj ]; + + std::string fileName = fileNamePrefix; + char tmpStr[ MAX_PATH ]; + + // Add the enqueue count to file name + { + CLI_SPRINTF( tmpStr, MAX_PATH, "%04u", + (unsigned int)m_EnqueueCounter ); + + fileName += "Enqueue_"; + fileName += tmpStr; + } + + // Add the kernel name to the filename + { + fileName += "_Kernel_"; + fileName += m_KernelNameMap[ kernel ]; + } + + // Add the arg number to the file name + { + CLI_SPRINTF( tmpStr, MAX_PATH, "%u", arg_index ); + + fileName += "_Arg_"; + fileName += tmpStr; + } + + // Add the buffer number to the file name + { + CLI_SPRINTF( tmpStr, MAX_PATH, "%04u", number ); + + fileName += "_Buffer_"; + fileName += tmpStr; + } + + // Add extension to file name + { + fileName += ".bin"; + } + + // Dump the buffer contents to the file. + if( m_SVMAllocInfoMap.find( allocation ) != m_SVMAllocInfoMap.end() ) + { + size_t size = m_SVMAllocInfoMap[ allocation ]; + + cl_int error = dispatch().clEnqueueSVMMap( + command_queue, + CL_TRUE, + CL_MAP_READ, + allocation, + size, + 0, + NULL, + NULL ); + if( error == CL_SUCCESS ) + { + std::ofstream os; + os.open( + fileName.c_str(), + std::ios::out | std::ios::binary ); + + if( os.good() ) + { + os.write( (const char*)allocation, size ); + os.close(); + } + + dispatch().clEnqueueSVMUnmap( + command_queue, + allocation, + 0, + NULL, + NULL ); + } + } + else if( m_BufferInfoMap.find( memobj ) != m_BufferInfoMap.end() ) + { + size_t size = m_BufferInfoMap[ memobj ]; + + cl_int error = CL_SUCCESS; + void* ptr = dispatch().clEnqueueMapBuffer( + command_queue, + memobj, + CL_TRUE, + CL_MAP_READ, + 0, + size, + 0, + NULL, + NULL, + &error ); + if( error == CL_SUCCESS ) + { + std::ofstream os; + os.open( + fileName.c_str(), + std::ios::out | std::ios::binary ); + + if( os.good() ) + { + os.write( (const char*)ptr, size ); + os.close(); + } + + dispatch().clEnqueueUnmapMemObject( + command_queue, + memobj, + ptr, + 0, + NULL, + NULL ); + } + } + } + } + + m_OS.LeaveCriticalSection(); +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::dumpImagesForKernel( + const std::string& name, + cl_kernel kernel, + cl_command_queue command_queue ) +{ + m_OS.EnterCriticalSection(); + + std::string fileNamePrefix = ""; + + // Get the dump directory name. + { + OS().GetDumpDirectoryName( sc_DumpDirectoryName, fileNamePrefix ); + fileNamePrefix += "/memDump"; + fileNamePrefix += name; + fileNamePrefix += "Enqueue/"; + } + + // Now make directories as appropriate. + { + OS().MakeDumpDirectories( fileNamePrefix ); + } + + CKernelArgMemMap& kernelArgMemMap = m_KernelArgMap[ kernel ]; + CKernelArgMemMap::iterator i = kernelArgMemMap.begin(); + + while( i != kernelArgMemMap.end() ) + { + CLI_C_ASSERT( sizeof(void*) == sizeof(cl_mem) ); + + cl_uint arg_index = (*i).first; + cl_mem memobj = (cl_mem)(*i).second; + + ++i; + + if( m_ImageInfoMap.find( memobj ) != m_ImageInfoMap.end() ) + { + const SImageInfo& info = m_ImageInfoMap[ memobj ]; + unsigned int number = m_MemAllocNumberMap[ memobj ]; + + std::string fileName = fileNamePrefix; + char tmpStr[ MAX_PATH ]; + + // Add the enqueue count to file name + { + CLI_SPRINTF( tmpStr, MAX_PATH, "%04u", + (unsigned int)m_EnqueueCounter ); + + fileName += "Enqueue_"; + fileName += tmpStr; + } + + // Add the kernel name to the filename + { + fileName += "_Kernel_"; + fileName += m_KernelNameMap[ kernel ]; + } + + // Add the arg number to the file name + { + CLI_SPRINTF( tmpStr, MAX_PATH, "%u", arg_index ); + + fileName += "_Arg_"; + fileName += tmpStr; + } + + // Add the image number to the file name + { + CLI_SPRINTF( tmpStr, MAX_PATH, "%04u", number ); + + fileName += "_Image_"; + fileName += tmpStr; + } + + // Add the image dimensions to the file name + { + CLI_SPRINTF( tmpStr, MAX_PATH, "_%ux%ux%u_%ubpp", + (unsigned int)info.Region[0], + (unsigned int)info.Region[1], + (unsigned int)info.Region[2], + (unsigned int)info.ElementSize * 8 ); + + fileName += tmpStr; + } + + // Add extension to file name + { + fileName += ".raw"; + } + + // Dump the image contents to the file. + { + size_t size = + info.Region[0] * + info.Region[1] * + info.Region[2] * + info.ElementSize; + char* readImageData = new char[ size ]; + + if( readImageData ) + { + size_t origin[3] = { 0, 0, 0 }; + cl_int error = dispatch().clEnqueueReadImage( + command_queue, + memobj, + CL_TRUE, + origin, + info.Region, + 0, + 0, + readImageData, + 0, + NULL, + NULL ); + + if( error == CL_SUCCESS ) + { + std::ofstream os; + os.open( + fileName.c_str(), + std::ios::out | std::ios::binary ); + + if( os.good() ) + { + os.write( readImageData, size ); + os.close(); + } + } + + delete [] readImageData; + } + } + } + } + + m_OS.LeaveCriticalSection(); +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::dumpBuffer( + const std::string& name, + cl_mem memobj, + cl_command_queue command_queue, + void* ptr, + size_t offset, + size_t size ) +{ + m_OS.EnterCriticalSection(); + + if( m_BufferInfoMap.find( memobj ) != m_BufferInfoMap.end() ) + { + std::string fileName = ""; + + // Get the dump directory name. + { + OS().GetDumpDirectoryName( sc_DumpDirectoryName, fileName ); + fileName += "/memDumpCreateMapUnmap/"; + } + + // Now make directories as appropriate. + { + OS().MakeDumpDirectories( fileName ); + } + + fileName += name; + + // Add the buffer number to the file name + { + unsigned int number = m_MemAllocNumberMap[ memobj ]; + + char bufferName[ MAX_PATH ]; + + CLI_SPRINTF( bufferName, MAX_PATH, "%04u", number ); + + fileName += "_Buffer_"; + fileName += bufferName; + } + + // Add the offset to the file name + { + char offsetName[ MAX_PATH ]; + + CLI_SPRINTF( offsetName, MAX_PATH, "%04u", + (unsigned int)offset ); + + fileName += "_Offset_"; + fileName += offsetName; + } + + // Add the enqueue count to file name + { + char enqueueCount[ MAX_PATH ]; + + CLI_SPRINTF( enqueueCount, MAX_PATH, "%04u", + (unsigned int)m_EnqueueCounter ); + + fileName += "_Enqueue_"; + fileName += enqueueCount; + } + + // Add extension to file name + { + fileName += ".bin"; + } + + // Dump the buffer contents to the file. + // There are two possibilities: + // 1) We have a pointer and size already. This might happen + // when the buffer is being created or was just mapped. + // In this case, we can just write this to the file. + // 2) We have no pointer or size. This usually happens when + // the buffer is being unmapped. In this case, we'll + // map and dump the entire buffer. + if( ptr != NULL && size != 0 ) + { + std::ofstream os; + os.open( + fileName.c_str(), + std::ios::out | std::ios::binary ); + + if( os.good() ) + { + os.write( (const char*)ptr, size ); + os.close(); + } + } + else + { + // We should have checked this already... + CLI_ASSERT( m_BufferInfoMap.find( memobj ) != m_BufferInfoMap.end() ); + + size_t size = m_BufferInfoMap[ memobj ]; + + cl_int error = CL_SUCCESS; + ptr = dispatch().clEnqueueMapBuffer( + command_queue, + memobj, + CL_TRUE, + CL_MAP_READ, + 0, + size, + 0, + NULL, + NULL, + &error ); + if( error == CL_SUCCESS ) + { + std::ofstream os; + os.open( + fileName.c_str(), + std::ios::out | std::ios::binary ); + + if( os.good() ) + { + os.write( (const char*)ptr, size ); + os.close(); + } + + dispatch().clEnqueueUnmapMemObject( + command_queue, + memobj, + ptr, + 0, + NULL, + NULL ); + } + } + } + + m_OS.LeaveCriticalSection(); +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::startAubCapture( + const std::string& functionName, + const cl_kernel kernel, + const cl_uint workDim, + const size_t* gws, + const size_t* lws, + cl_command_queue command_queue ) +{ + if( m_AubCaptureStarted == false ) + { + m_OS.EnterCriticalSection(); + + // For kernels, perform aub capture skip checks. We'll skip aubcapture if: + // - the current skip counter is less than the specified skip counter, or + // - the current capture counter is greater than or equal to the specified capture counter. + + bool skip = false; + if( kernel != NULL ) + { + if( m_AubCaptureKernelEnqueueSkipCounter < m_Config.AubCaptureNumKernelEnqueuesSkip ) + { + logf( "Skipping kernel aub capture: current skip counter is %u, requested skip counter is %u.\n", + m_AubCaptureKernelEnqueueSkipCounter, + m_Config.AubCaptureNumKernelEnqueuesSkip ); + + skip = true; + ++m_AubCaptureKernelEnqueueSkipCounter; + } + else + { + if( m_AubCaptureKernelEnqueueCaptureCounter >= m_Config.AubCaptureNumKernelEnqueuesCapture ) + { + logf( "Skipping kernel aub capture: current capture counter is %u, requested capture counter is %u.\n", + m_AubCaptureKernelEnqueueCaptureCounter, + m_Config.AubCaptureNumKernelEnqueuesCapture ); + skip = true; + } + + ++m_AubCaptureKernelEnqueueCaptureCounter; + } + } + + if( skip == false && + m_AubCaptureStarted == false ) + { + // Try to call clFinish() on the passed-in command queue. + // This isn't perfect, since we'd really rather call + // clFinish on all command queues to start with a fresh + // capture, but it's better than nothing. + // TODO: Is Flush() sufficient? + dispatch().clFinish( command_queue ); + + char charBuf[ MAX_PATH ]; + + std::string fileName = ""; + + // Get the dump directory name. + { + OS().GetDumpDirectoryName( sc_DumpDirectoryName, fileName ); + fileName += "/"; + fileName += "AubCapture"; + + if( m_Config.AubCaptureIndividualEnqueues ) + { + fileName += "_Enqueue_"; + + CLI_SPRINTF( charBuf, MAX_PATH, "%08u", (cl_uint)m_EnqueueCounter ); + + fileName += charBuf; + fileName += "_"; + + if( kernel ) + { + const std::string& kernelName = m_KernelNameMap[ kernel ]; + fileName += "kernel_"; + fileName += kernelName; + + std::ostringstream ss; + ss << "_G_"; + if( gws ) + { + if( workDim >= 1 ) + { + ss << gws[0]; + } + if( workDim >= 2 ) + { + ss << "x" << gws[1]; + } + if( workDim >= 3 ) + { + ss << "x" << gws[2]; + } + } + else + { + ss << "NULL"; + } + ss << "_L_"; + if( lws ) + { + if( workDim >= 1 ) + { + ss << lws[0]; + } + if( workDim >= 2 ) + { + ss << "x" << lws[1]; + } + if( workDim >= 3 ) + { + ss << "x" << lws[2]; + } + } + else + { + ss << "NULL"; + } + fileName += ss.str(); + } + else + { + fileName += functionName; + } + } + else if( m_Config.AubCaptureMinEnqueue != 0 || + m_Config.AubCaptureMaxEnqueue != UINT_MAX ) + { + fileName += "_Enqueue_"; + + CLI_SPRINTF( charBuf, MAX_PATH, "%08u", m_Config.AubCaptureMinEnqueue ); + + fileName += charBuf; + fileName += "_to_"; + + CLI_SPRINTF( charBuf, MAX_PATH, "%08u", m_Config.AubCaptureMaxEnqueue ); + + fileName += charBuf; + } + + fileName += ".daf"; + } + + // Now make directories as appropriate. + { + OS().MakeDumpDirectories( fileName ); + } + + OS().StartAubCapture( + fileName, + config().AubCaptureStartWait ); + log( "AubCapture started... maybe. Filename is: " + fileName + "\n" ); + + // No matter what, set the flag that aubcapture is started, so we + // don't try again. + m_AubCaptureStarted = true; + } + + m_OS.LeaveCriticalSection(); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::stopAubCapture( + cl_command_queue command_queue ) +{ + if( m_AubCaptureStarted == true ) + { + m_OS.EnterCriticalSection(); + + if( m_AubCaptureStarted == true ) + { + if( command_queue ) + { + dispatch().clFinish( command_queue ); + } + + OS().StopAubCapture( + config().AubCaptureEndWait ); + log( "AubCapture stopped.\n" ); + + // No matter what, clar the flag that aubcapture is started, so we + // don't try again. + m_AubCaptureStarted = false; + } + + m_OS.LeaveCriticalSection(); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::initPrecompiledKernelOverrides( + const cl_context context ) +{ + m_OS.EnterCriticalSection(); + + cl_int errorCode = CL_SUCCESS; + + // Check to see if overrides already exist. If they do, release them. + SPrecompiledKernelOverrides* pOverrides = + m_PrecompiledKernelOverridesMap[ context ]; + if( pOverrides ) + { + errorCode = dispatch().clReleaseKernel( pOverrides->Kernel_CopyBufferBytes ); + errorCode = dispatch().clReleaseKernel( pOverrides->Kernel_CopyBufferUInts ); + errorCode = dispatch().clReleaseKernel( pOverrides->Kernel_CopyBufferUInt4s ); + errorCode = dispatch().clReleaseKernel( pOverrides->Kernel_CopyBufferUInt16s ); + + errorCode = dispatch().clReleaseKernel( pOverrides->Kernel_CopyImage2Dto2DFloat ); + errorCode = dispatch().clReleaseKernel( pOverrides->Kernel_CopyImage2Dto2DInt ); + errorCode = dispatch().clReleaseKernel( pOverrides->Kernel_CopyImage2Dto2DUInt ); + + errorCode = dispatch().clReleaseProgram( pOverrides->Program ); + + delete pOverrides; + pOverrides = NULL; + + m_PrecompiledKernelOverridesMap[ context ] = NULL; + } + + // Allocate new overrides. + pOverrides = new SPrecompiledKernelOverrides; + if( pOverrides ) + { + pOverrides->Program = NULL; + + pOverrides->Kernel_CopyBufferBytes = NULL; + pOverrides->Kernel_CopyBufferUInts = NULL; + pOverrides->Kernel_CopyBufferUInt4s = NULL; + pOverrides->Kernel_CopyBufferUInt16s = NULL; + + pOverrides->Kernel_CopyImage2Dto2DFloat = NULL; + pOverrides->Kernel_CopyImage2Dto2DInt = NULL; + pOverrides->Kernel_CopyImage2Dto2DUInt = NULL; + + const char* pProgramString = NULL; + size_t programStringLength = 0; + + // Get the program string from the resource embedded into this DLL. + if( errorCode == CL_SUCCESS ) + { + if( m_OS.GetPrecompiledKernelString( + pProgramString, + programStringLength ) == false ) + { + errorCode = CL_INVALID_VALUE; + } + } + + // Create the program: + if( errorCode == CL_SUCCESS ) + { + pOverrides->Program = dispatch().clCreateProgramWithSource( + context, + 1, + &pProgramString, + &programStringLength, + &errorCode ); + } + + // Build the program: + if( errorCode == CL_SUCCESS ) + { + errorCode = dispatch().clBuildProgram( + pOverrides->Program, + 0, + NULL, + NULL, + NULL, + NULL ); + + if( errorCode != CL_SUCCESS ) + { + cl_int tempErrorCode = CL_SUCCESS; + + // Get the number of devices for this context. + cl_uint numDevices = 0; + tempErrorCode = dispatch().clGetContextInfo( + context, + CL_CONTEXT_NUM_DEVICES, + sizeof( numDevices ), + &numDevices, + NULL ); + + if( numDevices != 0 ) + { + cl_device_id* devices = new cl_device_id[ numDevices ]; + if( devices ) + { + tempErrorCode = dispatch().clGetContextInfo( + context, + CL_CONTEXT_DEVICES, + numDevices * sizeof( cl_device_id ), + devices, + NULL ); + + if( tempErrorCode == CL_SUCCESS ) + { + cl_uint i = 0; + for( i = 0; i < numDevices; i++ ) + { + size_t buildLogSize = 0; + dispatch().clGetProgramBuildInfo( + pOverrides->Program, + devices[ i ], + CL_PROGRAM_BUILD_LOG, + 0, + NULL, + &buildLogSize ); + + char* buildLog = new char[ buildLogSize + 1 ]; + if( buildLog ) + { + dispatch().clGetProgramBuildInfo( + pOverrides->Program, + devices[ i ], + CL_PROGRAM_BUILD_LOG, + buildLogSize * sizeof( char ), + buildLog, + NULL ); + + buildLog[ buildLogSize ] = '\0'; + + log( "-------> Start of Build Log:\n" ); + log( buildLog ); + log( "<------- End of Build Log!\n" ); + + delete [] buildLog; + } + } + } + } + } + } + } + + // Create all of the kernels in the program: + + if( config().OverrideReadBuffer || + config().OverrideWriteBuffer || + config().OverrideCopyBuffer ) + { + if( errorCode == CL_SUCCESS ) + { + pOverrides->Kernel_CopyBufferBytes = dispatch().clCreateKernel( + pOverrides->Program, + "CopyBufferBytes", + &errorCode ); + } + if( errorCode == CL_SUCCESS ) + { + pOverrides->Kernel_CopyBufferUInts = dispatch().clCreateKernel( + pOverrides->Program, + "CopyBufferUInts", + &errorCode ); + } + if( errorCode == CL_SUCCESS ) + { + pOverrides->Kernel_CopyBufferUInt4s = dispatch().clCreateKernel( + pOverrides->Program, + "CopyBufferUInt4s", + &errorCode ); + } + if( errorCode == CL_SUCCESS ) + { + pOverrides->Kernel_CopyBufferUInt16s = dispatch().clCreateKernel( + pOverrides->Program, + "CopyBufferUInt16s", + &errorCode ); + } + } + + if( config().OverrideReadImage || + config().OverrideWriteImage || + config().OverrideCopyImage ) + { + // TODO: Check to see if images are supported? + // What should happen if this is a multiple-device context, + // and one device supports images, but another doesn't? + + if( errorCode == CL_SUCCESS ) + { + pOverrides->Kernel_CopyImage2Dto2DFloat = dispatch().clCreateKernel( + pOverrides->Program, + "CopyImage2Dto2DFloat", + &errorCode ); + } + if( errorCode == CL_SUCCESS ) + { + pOverrides->Kernel_CopyImage2Dto2DInt = dispatch().clCreateKernel( + pOverrides->Program, + "CopyImage2Dto2DInt", + &errorCode ); + } + if( errorCode == CL_SUCCESS ) + { + pOverrides->Kernel_CopyImage2Dto2DUInt = dispatch().clCreateKernel( + pOverrides->Program, + "CopyImage2Dto2DUInt", + &errorCode ); + } + } + + if( errorCode == CL_SUCCESS ) + { + m_PrecompiledKernelOverridesMap[ context ] = pOverrides; + } + else + { + delete pOverrides; + pOverrides = NULL; + } + } + + m_OS.LeaveCriticalSection(); +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::initBuiltinKernelOverrides( + const cl_context context ) +{ + m_OS.EnterCriticalSection(); + + cl_int errorCode = CL_SUCCESS; + + // Check to see if overrides already exist. If they do, release them. + SBuiltinKernelOverrides* pOverrides = + m_BuiltinKernelOverridesMap[ context ]; + if( pOverrides ) + { + errorCode = dispatch().clReleaseKernel( pOverrides->Kernel_block_motion_estimate_intel ); + + errorCode = dispatch().clReleaseProgram( pOverrides->Program ); + + delete pOverrides; + pOverrides = NULL; + + m_BuiltinKernelOverridesMap[ context ] = NULL; + } + + // Allocate new overrides. + pOverrides = new SBuiltinKernelOverrides; + if( pOverrides ) + { + pOverrides->Program = NULL; + + pOverrides->Kernel_block_motion_estimate_intel = NULL; + + const char* pProgramString = NULL; + size_t programStringLength = 0; + + // Get the program string from the resource embedded into this DLL. + if( errorCode == CL_SUCCESS ) + { + if( m_OS.GetBuiltinKernelString( + pProgramString, + programStringLength ) == false ) + { + errorCode = CL_INVALID_VALUE; + } + } + + // Create the program: + if( errorCode == CL_SUCCESS ) + { + pOverrides->Program = dispatch().clCreateProgramWithSource( + context, + 1, + &pProgramString, + &programStringLength, + &errorCode ); + } + + // Build the program: + if( errorCode == CL_SUCCESS ) + { + errorCode = dispatch().clBuildProgram( + pOverrides->Program, + 0, + NULL, + "-Dcl_intel_device_side_vme_enable -DHW_NULL_CHECK", + NULL, + NULL ); + + if( errorCode != CL_SUCCESS ) + { + cl_int tempErrorCode = CL_SUCCESS; + + // Get the number of devices for this context. + cl_uint numDevices = 0; + tempErrorCode = dispatch().clGetContextInfo( + context, + CL_CONTEXT_NUM_DEVICES, + sizeof( numDevices ), + &numDevices, + NULL ); + + if( numDevices != 0 ) + { + cl_device_id* devices = new cl_device_id[ numDevices ]; + if( devices ) + { + tempErrorCode = dispatch().clGetContextInfo( + context, + CL_CONTEXT_DEVICES, + numDevices * sizeof( cl_device_id ), + devices, + NULL ); + + if( tempErrorCode == CL_SUCCESS ) + { + cl_uint i = 0; + for( i = 0; i < numDevices; i++ ) + { + size_t buildLogSize = 0; + dispatch().clGetProgramBuildInfo( + pOverrides->Program, + devices[ i ], + CL_PROGRAM_BUILD_LOG, + 0, + NULL, + &buildLogSize ); + + char* buildLog = new char[ buildLogSize + 1 ]; + if( buildLog ) + { + dispatch().clGetProgramBuildInfo( + pOverrides->Program, + devices[ i ], + CL_PROGRAM_BUILD_LOG, + buildLogSize * sizeof( char ), + buildLog, + NULL ); + + buildLog[ buildLogSize ] = '\0'; + + log( "-------> Start of Build Log:\n" ); + log( buildLog ); + log( "<------- End of Build Log!\n" ); + + delete [] buildLog; + } + } + } + } + } + } + } + + // Create all of the kernels in the program: + + if( errorCode == CL_SUCCESS ) + { + pOverrides->Kernel_block_motion_estimate_intel = dispatch().clCreateKernel( + pOverrides->Program, + "block_motion_estimate_intel", + &errorCode ); + } + + if( errorCode == CL_SUCCESS ) + { + m_BuiltinKernelOverridesMap[ context ] = pOverrides; + } + else + { + delete pOverrides; + pOverrides = NULL; + } + } + + m_OS.LeaveCriticalSection(); +} + +/////////////////////////////////////////////////////////////////////////////// +// +cl_program CLIntercept::createProgramWithInjectionBinaries( + uint64_t hash, + cl_context context, + cl_int* errcode_ret ) +{ + m_OS.EnterCriticalSection(); + + cl_int errorCode = CL_SUCCESS; + cl_program program = NULL; + + std::string fileName1; + std::string fileName2; + size_t numDevices = 0; + + if( errorCode == CL_SUCCESS ) + { + std::string fileName; + + // Get the dump directory name. + { + OS().GetDumpDirectoryName( sc_DumpDirectoryName, fileName ); + fileName += "/Inject"; + } + // Make two candidate filenames. They will have the form: + // CLI___0000, or + // CLI__0000 + // Leave off the extension for now. + { + char numberString1[256] = ""; + CLI_SPRINTF( numberString1, 256, "%04u_%08X_0000", + m_ProgramNumber, + (unsigned int)hash ); + + char numberString2[256] = ""; + CLI_SPRINTF( numberString2, 256, "%08X_0000", + (unsigned int)hash ); + + fileName1 = fileName; + fileName1 += "/CLI_"; + fileName1 += numberString1; + + fileName2 = fileName; + fileName2 += "/CLI_"; + fileName2 += numberString2; + } + + errorCode = dispatch().clGetContextInfo( + context, + CL_CONTEXT_DEVICES, + 0, + NULL, + &numDevices ); + } + + cl_device_id* devices = NULL; + + char** programBinaries = NULL; + size_t* programBinarySizes = NULL; + + if( errorCode == CL_SUCCESS ) + { + numDevices = numDevices / sizeof( cl_device_id ); + + devices = new cl_device_id [ numDevices ]; + + programBinaries = new char*[ numDevices ]; + programBinarySizes = new size_t[ numDevices ]; + + if( ( devices == NULL ) || + ( programBinaries == NULL ) || + ( programBinarySizes == NULL ) ) + { + CLI_ASSERT( 0 ); + errorCode = CL_OUT_OF_HOST_MEMORY; + } + } + + if( errorCode == CL_SUCCESS ) + { + for( size_t i = 0; i < numDevices; i++ ) + { + programBinaries[i] = NULL; + } + + errorCode = dispatch().clGetContextInfo( + context, + CL_CONTEXT_DEVICES, + numDevices * sizeof( cl_device_id ), + devices, + NULL ); + + if( errorCode == CL_SUCCESS ) + { + // Assume all binaries exist, until this is proven otherwise. + bool allBinariesExist = true; + + for( size_t i = 0; i < numDevices; i++ ) + { + cl_device_type deviceType = CL_DEVICE_TYPE_DEFAULT; + + // It's OK if this fails. If it does, it just + // means that our output file won't have a device + // type. + dispatch().clGetDeviceInfo( + devices[ i ], + CL_DEVICE_TYPE, + sizeof( deviceType ), + &deviceType, + NULL ); + + std::string suffix; + + if( deviceType & CL_DEVICE_TYPE_CPU ) + { + suffix += "_CPU"; + } + if( deviceType & CL_DEVICE_TYPE_GPU ) + { + suffix += "_GPU"; + } + if( deviceType & CL_DEVICE_TYPE_ACCELERATOR ) + { + suffix += "_ACCELERATOR"; + } + if( deviceType & CL_DEVICE_TYPE_CUSTOM ) + { + suffix += "_CUSTOM"; + } + + suffix += ".bin"; + + std::string inputFileName = fileName1 + suffix; + + std::ifstream is; + is.open( + inputFileName.c_str(), + std::ios::in | std::ios::binary ); + if( is.good() ) + { + log( "Injection binary file exists: " + inputFileName + "\n" ); + } + else + { + log( "Injection binary file doesn't exist: " + inputFileName + "\n" ); + + inputFileName = fileName2 + suffix; + is.clear(); + is.open( + inputFileName.c_str(), + std::ios::in | std::ios::binary ); + if( is.good() ) + { + log( "Injection binary file exists: " + inputFileName + "\n" ); + } + else + { + log( "Injection binary file doesn't exist: " + inputFileName + "\n" ); + } + } + + if( is.good() ) + { + // The file exists. Figure out how big it is. + is.seekg( 0, std::ios::end ); + programBinarySizes[i] = (size_t)is.tellg(); + is.seekg( 0, std::ios::beg ); + + programBinaries[i] = new char[ programBinarySizes[i] ]; + if( programBinaries[i] == NULL ) + { + CLI_ASSERT( 0 ); + errorCode = CL_OUT_OF_HOST_MEMORY; + } + else + { + is.read( programBinaries[i], programBinarySizes[i] ); + } + + is.close(); + } + else + { + log( "Injection binary is missing!\n" ); + allBinariesExist = false; + } + } + + if( allBinariesExist && + ( errorCode == CL_SUCCESS ) ) + { + log( "All injection binaries exist.\n" ); + + program = dispatch().clCreateProgramWithBinary( + context, + (cl_uint)numDevices, + devices, + programBinarySizes, + (const unsigned char**)programBinaries, + NULL, // binary_status + &errorCode ); + if( program ) + { + logf("Injection successful: clCreateProgramWithBinary() returned %p\n", + program ); + } + if( errorCode != CL_SUCCESS ) + { + log( "Injecting binaries failed: clCreateProgramWithBinary() returned %s\n" + + enumName().name( errorCode ) + "\n" ); + } + } + + for( size_t i = 0; i < numDevices; i++ ) + { + programBinarySizes[i] = 0; + + delete [] programBinaries[i]; + programBinaries[i] = NULL; + } + } + } + + delete [] devices; + + delete [] programBinaries; + delete [] programBinarySizes; + + if( errcode_ret ) + { + errcode_ret[0] = errorCode; + } + + m_OS.LeaveCriticalSection(); + return program; +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::dumpProgramBinary( + const cl_program program ) +{ + m_OS.EnterCriticalSection(); + + unsigned int programNumber = m_ProgramNumberMap[ program ]; + uint64_t programHash = m_ProgramHashMap[ program ]; + unsigned int compileCount = m_ProgramNumberCompileCountMap[ programNumber ]; + + std::string fileName; + + // Get the dump directory name. + { + OS().GetDumpDirectoryName( sc_DumpDirectoryName, fileName ); + } + // Make the filename. It will have the form: + // CLI___ + // Leave off the extension for now. + { + char numberString[256] = ""; + + if( config().OmitProgramNumber ) + { + CLI_SPRINTF( numberString, 256, "%08X_%04u", + (unsigned int)programHash, + compileCount ); + } + else + { + CLI_SPRINTF( numberString, 256, "%04u_%08X_%04u", + programNumber, + (unsigned int)programHash, + compileCount ); + } + + fileName += "/CLI_"; + fileName += numberString; + } + // Now make directories as appropriate. + { + OS().MakeDumpDirectories( fileName ); + } + + cl_int errorCode = CL_SUCCESS; + + size_t numDevices = 0; + + if( errorCode == CL_SUCCESS ) + { + // Get all of the devices associated with this program. + errorCode = dispatch().clGetProgramInfo( + program, + CL_PROGRAM_DEVICES, + 0, + NULL, + &numDevices ); + } + + cl_device_id* devices = NULL; + char** programBinaries = NULL; + size_t* programBinarySizes = NULL; + + if( errorCode == CL_SUCCESS ) + { + numDevices /= sizeof( cl_device_id ); + + devices = new cl_device_id[ numDevices ]; + programBinaries = new char*[ numDevices ]; + programBinarySizes = new size_t[ numDevices ]; + + if( ( devices == NULL ) || + ( programBinaries == NULL ) || + ( programBinarySizes == NULL ) ) + { + CLI_ASSERT( 0 ); + errorCode = CL_OUT_OF_HOST_MEMORY; + } + } + + if( errorCode == CL_SUCCESS ) + { + for( size_t i = 0; i < numDevices; i++ ) + { + programBinaries[i] = NULL; + } + + errorCode = dispatch().clGetProgramInfo( + program, + CL_PROGRAM_DEVICES, + numDevices * sizeof( cl_device_id ), + devices, + NULL ); + } + + if( errorCode == CL_SUCCESS ) + { + errorCode = dispatch().clGetProgramInfo( + program, + CL_PROGRAM_BINARY_SIZES, + numDevices * sizeof( size_t ), + programBinarySizes, + NULL ); + } + + if( errorCode == CL_SUCCESS ) + { + for( size_t i = 0; i < numDevices; i++ ) + { + programBinaries[ i ] = new char[ programBinarySizes[ i ] ]; + if( programBinaries[ i ] == NULL ) + { + errorCode = CL_OUT_OF_HOST_MEMORY; + break; + } + } + + if( errorCode == CL_SUCCESS ) + { + errorCode = dispatch().clGetProgramInfo( + program, + CL_PROGRAM_BINARIES, + numDevices * sizeof( char* ), + programBinaries, + NULL ); + } + + if( errorCode == CL_SUCCESS ) + { + for( size_t i = 0; i < numDevices; i++ ) + { + cl_device_type deviceType = CL_DEVICE_TYPE_DEFAULT; + + // It's OK if this fails. If it does, it just + // means that our output file won't have a device + // type. + dispatch().clGetDeviceInfo( + devices[ i ], + CL_DEVICE_TYPE, + sizeof( deviceType ), + &deviceType, + NULL ); + + std::string outputFileName = fileName; + + if( deviceType & CL_DEVICE_TYPE_CPU ) + { + outputFileName += "_CPU"; + } + if( deviceType & CL_DEVICE_TYPE_GPU ) + { + outputFileName += "_GPU"; + } + if( deviceType & CL_DEVICE_TYPE_ACCELERATOR ) + { + outputFileName += "_ACCELERATOR"; + } + if( deviceType & CL_DEVICE_TYPE_CUSTOM ) + { + outputFileName += "_CUSTOM"; + } + + outputFileName += ".bin"; + + std::ofstream os; + os.open( + outputFileName.c_str(), + std::ios::out | std::ios::binary ); + if( os.good() ) + { + log( "Dumping program binary to file: " + outputFileName + "\n" ); + + os.write( + programBinaries[ i ], + programBinarySizes[ i ] ); + os.close(); + } + } + } + + for( size_t i = 0; i < numDevices; i++ ) + { + delete [] programBinaries[ i ]; + programBinaries[ i ] = NULL; + } + } + + delete [] devices; + devices = NULL; + + delete [] programBinaries; + programBinaries = NULL; + + delete [] programBinarySizes; + programBinarySizes = NULL; + + m_OS.LeaveCriticalSection(); +} + +/////////////////////////////////////////////////////////////////////////////// +// +cl_program CLIntercept::createProgramWithInjectionSPIRV( + uint64_t hash, + cl_context context, + cl_int* errcode_ret ) +{ + m_OS.EnterCriticalSection(); + + cl_program program = NULL; + + // Don't bother with any of this if we weren't able to get a pointer to + // the entry point to create a program with IL. + if( dispatch().clCreateProgramWithIL == NULL ) + { + log( "Aborting InjectProgramSPIRV because clCreateProgramWithIL is NULL!\n" ); + } + else + { + std::string fileName; + + // Get the dump directory name. + { + OS().GetDumpDirectoryName(sc_DumpDirectoryName, fileName); + fileName += "/Inject"; + } + + // Make three candidate filenames. They will have the form: + // CLI___0000.spv, or + // CLI__0000.spv + { + char numberString1[256] = ""; + CLI_SPRINTF(numberString1, 256, "%04u_%08X_0000", + m_ProgramNumber, + (unsigned int)hash); + + char numberString2[256] = ""; + CLI_SPRINTF(numberString2, 256, "%08X_0000", + (unsigned int)hash); + + std::string fileName1; + fileName1 = fileName; + fileName1 += "/CLI_"; + fileName1 += numberString1; + fileName1 += ".spv"; + + std::string fileName2; + fileName2 = fileName; + fileName2 += "/CLI_"; + fileName2 += numberString2; + fileName2 += ".spv"; + + std::ifstream is; + + is.open( + fileName1.c_str(), + std::ios::in | std::ios::binary); + if( is.good() ) + { + log("Injecting SPIR-V file: " + fileName1 + "\n"); + } + else + { + log("Injection SPIR-V file doesn't exist: " + fileName1 + "\n"); + + is.clear(); + is.open( + fileName2.c_str(), + std::ios::in | std::ios::binary); + if( is.good() ) + { + log("Injecting SPIR-V file: " + fileName2 + "\n"); + } + else + { + log("Injection SPIR-V file doesn't exist: " + fileName2 + "\n"); + } + } + + if( is.good() ) + { + // The file exists. Figure out how big it is. + size_t filesize = 0; + + is.seekg( 0, std::ios::end ); + filesize = (size_t)is.tellg(); + is.seekg( 0, std::ios::beg ); + + char* newILBinary = new char[ filesize ]; + if( newILBinary == NULL ) + { + CLI_ASSERT( 0 ); + } + else + { + is.read(newILBinary, filesize); + + // Right now, this can still die in the ICD loader if the ICD loader + // exports this entry point but the vendor didn't implement it. It + // would be nice to enhance the ICD loader so it called into a safe + // stub function if the vendor didn't implement an entry point... + program = dispatch().clCreateProgramWithIL( + context, + newILBinary, + filesize, + errcode_ret ); + if( program ) + { + logf("Injection successful: clCreateProgramWithIL() returned %p\n", + program ); + } + + delete[] newILBinary; + } + + is.close(); + } + } + } + + m_OS.LeaveCriticalSection(); + return program; +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::autoCreateSPIRV( + const cl_program program, + const char* raw_options ) +{ + m_OS.EnterCriticalSection(); + + unsigned int programNumber = m_ProgramNumberMap[ program ]; + uint64_t programHash = m_ProgramHashMap[ program ]; + unsigned int compileCount = m_ProgramNumberCompileCountMap[ programNumber ]; + + std::string dumpDirectoryName; + std::string inputFileName; + std::string outputFileName; + + // Get the dump directory name. + { + OS().GetDumpDirectoryName( sc_DumpDirectoryName, dumpDirectoryName ); + } + + // Re-create the input file name. This will be a program source file we dumped + // earlier. It will have the form: + // CLI___source.cl + { + char numberString[256] = ""; + + if( config().OmitProgramNumber ) + { + CLI_SPRINTF( numberString, 256, "%08X", + (unsigned int)programHash ); + } + else + { + CLI_SPRINTF( numberString, 256, "%04u_%08X", + programNumber, + (unsigned int)programHash ); + } + + inputFileName = dumpDirectoryName; + inputFileName += "/CLI_"; + inputFileName += numberString; + inputFileName += "_source.cl"; + } + + // Make the output file name. It will have the form: + // CLI___.spv + { + char numberString[256] = ""; + + if( config().OmitProgramNumber ) + { + CLI_SPRINTF( numberString, 256, "%08X_%04u", + (unsigned int)programHash, + compileCount ); + } + else + { + CLI_SPRINTF( numberString, 256, "%04u_%08X_%04u", + programNumber, + (unsigned int)programHash, + compileCount ); + } + + outputFileName = dumpDirectoryName; + outputFileName += "/CLI_"; + outputFileName += numberString; + outputFileName += ".spv"; + } + + // Now make directories as appropriate. We can use either the input + // or output file name to do this. + { + OS().MakeDumpDirectories( inputFileName ); + } + + std::string options(raw_options ? raw_options : ""); + std::string command; + + // Create the command we will use to invoke CLANG with the right options. + // How we do this will depend on whether this is an OpenCL 1.x or 2.0 + // compilation. We don't distinguish between different versions of + // OpenCL 1.x right now, but we can add this in the future, if desired. + if( options.find( "-cl-std=CL2.0" ) != std::string::npos ) + { + // This is an OpenCL 2.0 compilation. + command = + config().SPIRVClang + + " " + config().OpenCL2Options + + " -include " + config().SPIRVCLHeader + + " " + options + + " -o " + outputFileName + + " " + inputFileName; + } + else + { + // This is an OpenCL 1.x compilation. + command = + config().SPIRVClang + + " " + config().DefaultOptions + + " -include " + config().SPIRVCLHeader + + " " + options + + " -o " + outputFileName + + " " + inputFileName; + } + + logf( "Running: %s\n", command.c_str() ); + OS().ExecuteCommand( command ); + + // Optionally, run spirv-dis to disassemble the generated module. + if( !config().SPIRVDis.empty() ) + { + command = + config().SPIRVDis + + " -o " + outputFileName + "t" + + " " + outputFileName; + + logf( "Running: %s\n", command.c_str() ); + OS().ExecuteCommand( command ); + } + + m_OS.LeaveCriticalSection(); +} + +/////////////////////////////////////////////////////////////////////////////// +// +cl_int CLIntercept::writeStringToMemory( + size_t param_value_size, + const std::string& param, + size_t* param_value_size_ret, + char* pointer ) const +{ + cl_int errorCode = CL_SUCCESS; + + size_t length = param.length() + 1; + + if( pointer != NULL ) + { + if( param_value_size < length ) + { + errorCode = CL_INVALID_VALUE; + } + else + { + strcpy_s( + pointer, + length, + param.c_str() ); + } + } + + if( param_value_size_ret != NULL ) + { + *param_value_size_ret = length; + } + + return errorCode; +} + +/////////////////////////////////////////////////////////////////////////////// +// +template< class T > +cl_int CLIntercept::writeParamToMemory( + size_t param_value_size, + T param, + size_t *param_value_size_ret, + T* pointer ) const +{ + cl_int errorCode = CL_SUCCESS; + + if( pointer != NULL ) + { + if( param_value_size < sizeof(param) ) + { + errorCode = CL_INVALID_VALUE; + } + else + { + *pointer = param; + } + } + + if( param_value_size_ret != NULL ) + { + *param_value_size_ret = sizeof(param); + } + + return errorCode; +} + +/////////////////////////////////////////////////////////////////////////////// +// +bool CLIntercept::overrideGetPlatformInfo( + cl_platform_info param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret, + cl_int& errorCode ) +{ + bool override = false; + + m_OS.EnterCriticalSection(); + + switch( param_name ) + { + case CL_PLATFORM_NAME: + if( m_Config.PlatformName != "" ) + { + char* ptr = (char*)param_value; + errorCode = writeStringToMemory( + param_value_size, + m_Config.PlatformName, + param_value_size_ret, + ptr ); + override = true; + } + break; + case CL_PLATFORM_VENDOR: + if( m_Config.PlatformVendor != "" ) + { + char* ptr = (char*)param_value; + errorCode = writeStringToMemory( + param_value_size, + m_Config.PlatformVendor, + param_value_size_ret, + ptr ); + override = true; + } + break; + case CL_PLATFORM_PROFILE: + if( m_Config.PlatformProfile != "" ) + { + char* ptr = (char*)param_value; + errorCode = writeStringToMemory( + param_value_size, + m_Config.PlatformProfile, + param_value_size_ret, + ptr ); + override = true; + } + break; + case CL_PLATFORM_VERSION: + if( m_Config.PlatformVersion != "" ) + { + char* ptr = (char*)param_value; + errorCode = writeStringToMemory( + param_value_size, + m_Config.PlatformVersion, + param_value_size_ret, + ptr ); + override = true; + } + break; + default: + break; + } + + m_OS.LeaveCriticalSection(); + + return override; +} + +/////////////////////////////////////////////////////////////////////////////// +// +bool CLIntercept::overrideGetDeviceInfo( + cl_device_id device, + cl_device_info param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret, + cl_int& errorCode ) +{ + bool override = false; + + m_OS.EnterCriticalSection(); + + switch( param_name ) + { + case CL_DEVICE_TYPE: + if( m_Config.DeviceType != 0 ) + { + cl_device_type* ptr = (cl_device_type*)param_value; + cl_device_type d = m_Config.DeviceType; + errorCode = writeParamToMemory( + param_value_size, + d, + param_value_size_ret, + ptr ); + override = true; + } + break; + case CL_DEVICE_NAME: + if( m_Config.DeviceName != "" ) + { + char* ptr = (char*)param_value; + errorCode = writeStringToMemory( + param_value_size, + m_Config.DeviceName, + param_value_size_ret, + ptr ); + override = true; + } + break; + case CL_DEVICE_EXTENSIONS: + if( m_Config.DeviceExtensions != "" ) + { + char* ptr = (char*)param_value; + errorCode = writeStringToMemory( + param_value_size, + m_Config.DeviceExtensions, + param_value_size_ret, + ptr ); + override = true; + } + break; + case CL_DEVICE_VENDOR: + if( m_Config.DeviceVendor != "" ) + { + char* ptr = (char*)param_value; + errorCode = writeStringToMemory( + param_value_size, + m_Config.DeviceVendor, + param_value_size_ret, + ptr ); + override = true; + } + break; + case CL_DEVICE_PROFILE: + if( m_Config.DeviceProfile != "" ) + { + char* ptr = (char*)param_value; + errorCode = writeStringToMemory( + param_value_size, + m_Config.DeviceProfile, + param_value_size_ret, + ptr ); + override = true; + } + break; + case CL_DEVICE_VERSION: + if( m_Config.DeviceVersion != "" ) + { + char* ptr = (char*)param_value; + errorCode = writeStringToMemory( + param_value_size, + m_Config.DeviceVersion, + param_value_size_ret, + ptr ); + override = true; + } + break; + case CL_DEVICE_OPENCL_C_VERSION: + if( m_Config.DeviceCVersion != "" ) + { + char* ptr = (char*)param_value; + errorCode = writeStringToMemory( + param_value_size, + m_Config.DeviceCVersion, + param_value_size_ret, + ptr ); + override = true; + } + break; + case CL_DEVICE_VENDOR_ID: + if( m_Config.DeviceVendorID != 0 ) + { + cl_uint* ptr = (cl_uint*)param_value; + errorCode = writeParamToMemory( + param_value_size, + m_Config.DeviceVendorID, + param_value_size_ret, + ptr ); + override = true; + } + break; + case CL_DEVICE_MAX_COMPUTE_UNITS: + if( m_Config.DeviceMaxComputeUnits != 0 ) + { + cl_uint* ptr = (cl_uint*)param_value; + errorCode = writeParamToMemory( + param_value_size, + m_Config.DeviceMaxComputeUnits, + param_value_size_ret, + ptr ); + override = true; + } + break; + case CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR: + if( m_Config.DevicePreferredVectorWidthChar != UINT_MAX ) + { + cl_uint* ptr = (cl_uint*)param_value; + errorCode = writeParamToMemory( + param_value_size, + m_Config.DevicePreferredVectorWidthChar, + param_value_size_ret, + ptr ); + override = true; + } + break; + case CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT: + if( m_Config.DevicePreferredVectorWidthShort != UINT_MAX ) + { + cl_uint* ptr = (cl_uint*)param_value; + errorCode = writeParamToMemory( + param_value_size, + m_Config.DevicePreferredVectorWidthShort, + param_value_size_ret, + ptr ); + override = true; + } + break; + case CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT: + if( m_Config.DevicePreferredVectorWidthInt != UINT_MAX ) + { + cl_uint* ptr = (cl_uint*)param_value; + errorCode = writeParamToMemory( + param_value_size, + m_Config.DevicePreferredVectorWidthInt, + param_value_size_ret, + ptr ); + override = true; + } + break; + case CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG: + if( m_Config.DevicePreferredVectorWidthLong != UINT_MAX ) + { + cl_uint* ptr = (cl_uint*)param_value; + errorCode = writeParamToMemory( + param_value_size, + m_Config.DevicePreferredVectorWidthLong, + param_value_size_ret, + ptr ); + override = true; + } + break; + case CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF: + if( m_Config.DevicePreferredVectorWidthHalf != UINT_MAX ) + { + cl_uint* ptr = (cl_uint*)param_value; + errorCode = writeParamToMemory( + param_value_size, + m_Config.DevicePreferredVectorWidthHalf, + param_value_size_ret, + ptr ); + override = true; + } + break; + case CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT: + if( m_Config.DevicePreferredVectorWidthFloat != UINT_MAX ) + { + cl_uint* ptr = (cl_uint*)param_value; + errorCode = writeParamToMemory( + param_value_size, + m_Config.DevicePreferredVectorWidthFloat, + param_value_size_ret, + ptr ); + override = true; + } + break; + case CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE: + if( m_Config.DevicePreferredVectorWidthDouble != UINT_MAX ) + { + cl_uint* ptr = (cl_uint*)param_value; + errorCode = writeParamToMemory( + param_value_size, + m_Config.DevicePreferredVectorWidthDouble, + param_value_size_ret, + ptr ); + override = true; + } + break; +#if 0 + // This is a hack to get Sandra to try to compile fp64 + // kernels on devices that do not report fp64 capabilities. + case CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE: + { + cl_uint* ptr = (cl_uint*)param_value; + errorCode = writeParamToMemory( + param_value_size, + (cl_uint)1, + param_value_size_ret, + ptr ); + override = true; + } + break; + case CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE: + { + cl_uint* ptr = (cl_uint*)param_value; + errorCode = writeParamToMemory( + param_value_size, + (cl_uint)1, + param_value_size_ret, + ptr ); + override = true; + } + break; +#endif +#if 0 + // This is a hack to get fp16 conformance tests to run on + // Broadwell. + case CL_DEVICE_HALF_FP_CONFIG: + { + cl_device_fp_config value = + CL_FP_ROUND_TO_NEAREST | + CL_FP_ROUND_TO_ZERO | + CL_FP_INF_NAN | + CL_FP_ROUND_TO_INF; + + cl_device_fp_config* ptr = (cl_device_fp_config*)param_value; + errorCode = writeParamToMemory( + param_value_size, + value, + param_value_size_ret, + ptr ); + override = true; + } + break; +#endif +#if 0 + // This is a hack to get fp32 denormal tests to run on Broadwell. + case CL_DEVICE_SINGLE_FP_CONFIG: + { + cl_device_fp_config value = 0; + errorCode = dispatch().clGetDeviceInfo( + device, + param_name, + sizeof(value), + &value, + NULL ); + if( errorCode == CL_SUCCESS ) + { + value |= CL_FP_DENORM; + + cl_device_fp_config* ptr = (cl_device_fp_config*)param_value; + errorCode = writeParamToMemory( + param_value_size, + value, + param_value_size_ret, + ptr ); + override = true; + } + + } + break; +#endif + default: + break; + } + + m_OS.LeaveCriticalSection(); + + return override; +} + +/////////////////////////////////////////////////////////////////////////////// +// +cl_int CLIntercept::ReadBuffer( + cl_command_queue commandQueue, + cl_mem srcBuffer, + cl_bool blockingRead, + size_t srcOffset, + size_t bytesToRead, + void* dstPtr, + cl_uint numEventsInWaitList, + const cl_event* eventWaitList, + cl_event* event ) +{ + m_OS.EnterCriticalSection(); + + cl_int errorCode = CL_SUCCESS; + + cl_context context = NULL; + + // Get the context for this command queue. + if( errorCode == CL_SUCCESS ) + { + errorCode = dispatch().clGetCommandQueueInfo( + commandQueue, + CL_QUEUE_CONTEXT, + sizeof( context ), + &context, + NULL ); + } + + size_t dstOffset = 0; + + // Align the passed-in pointer to a page boundary. + if( errorCode == CL_SUCCESS ) + { + const size_t alignSize = 4096; + + unsigned char* bptr = (unsigned char*)dstPtr; + uintptr_t uiptr = (uintptr_t)bptr; + + dstOffset = uiptr % alignSize; + bptr -= dstOffset; + + dstPtr = bptr; + } + + cl_mem dstBuffer = NULL; + + // Create a USE_HOST_PTR buffer for the passed-in pointer. + // The size of the buffer will be at least dstOffset + bytesToRead. + if( errorCode == CL_SUCCESS ) + { + size_t dstBufferSize = dstOffset + bytesToRead; + + dstBuffer = dispatch().clCreateBuffer( + context, + CL_MEM_USE_HOST_PTR | CL_MEM_WRITE_ONLY, + dstBufferSize, + dstPtr, + &errorCode ); + } + + if( errorCode == CL_SUCCESS ) + { + errorCode = CopyBufferHelper( + context, + commandQueue, + srcBuffer, + dstBuffer, + srcOffset, + dstOffset, + bytesToRead, + numEventsInWaitList, + eventWaitList, + event ); + } + + // Technically, we need to map and unmap the destination buffer + // to transfer data to our pointer. This will also handle + // blockingRead. + if( errorCode == CL_SUCCESS ) + { + void* mappedPointer = dispatch().clEnqueueMapBuffer( + commandQueue, + dstBuffer, + blockingRead, + CL_MAP_READ, + dstOffset, + bytesToRead, + 0, + NULL, + NULL, + &errorCode ); + + if( errorCode == CL_SUCCESS ) + { + errorCode = dispatch().clEnqueueUnmapMemObject( + commandQueue, + dstBuffer, + mappedPointer, + 0, + NULL, + NULL ); + } + } + + dispatch().clReleaseMemObject( dstBuffer ); + + m_OS.LeaveCriticalSection(); + + return errorCode; +} + +/////////////////////////////////////////////////////////////////////////////// +// +cl_int CLIntercept::WriteBuffer( + cl_command_queue commandQueue, + cl_mem dstBuffer, + cl_bool blockingWrite, + size_t dstOffset, + size_t bytesToWrite, + const void* srcPtr, + cl_uint numEventsInWaitList, + const cl_event* eventWaitList, + cl_event* event ) +{ + m_OS.EnterCriticalSection(); + + cl_int errorCode = CL_SUCCESS; + + cl_context context = NULL; + + // Get the context for this command queue. + if( errorCode == CL_SUCCESS ) + { + errorCode = dispatch().clGetCommandQueueInfo( + commandQueue, + CL_QUEUE_CONTEXT, + sizeof( context ), + &context, + NULL ); + } + + size_t srcOffset = 0; + + // Align the passed-in pointer to a page boundary. + if( errorCode == CL_SUCCESS ) + { + const size_t alignSize = 4096; + + unsigned char* bptr = (unsigned char*)srcPtr; + uintptr_t uiptr = (uintptr_t)bptr; + + srcOffset = uiptr % alignSize; + bptr -= srcOffset; + + srcPtr = bptr; + } + + cl_mem srcBuffer = NULL; + + // Create a USE_HOST_PTR buffer for the passed-in pointer. + // The size of the buffer will be at least srcOffset + bytesToWrite. + if( errorCode == CL_SUCCESS ) + { + size_t srcBufferSize = srcOffset + bytesToWrite; + + srcBuffer = dispatch().clCreateBuffer( + context, + CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY, + srcBufferSize, + (void*)srcPtr, + &errorCode ); + } + + if( errorCode == CL_SUCCESS ) + { + errorCode = CopyBufferHelper( + context, + commandQueue, + srcBuffer, + dstBuffer, + srcOffset, + dstOffset, + bytesToWrite, + numEventsInWaitList, + eventWaitList, + event ); + } + + if( errorCode == CL_SUCCESS ) + { + if( blockingWrite ) + { + errorCode = dispatch().clFinish( + commandQueue ); + } + } + + dispatch().clReleaseMemObject( srcBuffer ); + + m_OS.LeaveCriticalSection(); + + return errorCode; +} + +/////////////////////////////////////////////////////////////////////////////// +// +cl_int CLIntercept::CopyBuffer( + cl_command_queue commandQueue, + cl_mem srcBuffer, + cl_mem dstBuffer, + size_t srcOffset, + size_t dstOffset, + size_t bytesToCopy, + cl_uint numEventsInWaitList, + const cl_event* eventWaitList, + cl_event* event ) +{ + m_OS.EnterCriticalSection(); + + cl_int errorCode = CL_SUCCESS; + + cl_context context = NULL; + + // Get the context for this command queue. + if( errorCode == CL_SUCCESS ) + { + errorCode = dispatch().clGetCommandQueueInfo( + commandQueue, + CL_QUEUE_CONTEXT, + sizeof( context ), + &context, + NULL ); + } + + if( errorCode == CL_SUCCESS ) + { + errorCode = CopyBufferHelper( + context, + commandQueue, + srcBuffer, + dstBuffer, + srcOffset, + dstOffset, + bytesToCopy, + numEventsInWaitList, + eventWaitList, + event ); + } + + m_OS.LeaveCriticalSection(); + + return errorCode; +} + +/////////////////////////////////////////////////////////////////////////////// +// +cl_int CLIntercept::CopyBufferHelper( + cl_context context, + cl_command_queue commandQueue, + cl_mem srcBuffer, + cl_mem dstBuffer, + size_t srcOffset, + size_t dstOffset, + size_t bytesToCopy, + cl_uint numEventsInWaitList, + const cl_event* eventWaitList, + cl_event* event ) +{ + // This function assumes that it is being called from within a critical + // section, so it does not enter the critical section again. + + cl_int errorCode = CL_SUCCESS; + + SPrecompiledKernelOverrides* pOverrides = NULL; + + // Get the overrides for this context. + if( errorCode == CL_SUCCESS ) + { + pOverrides = m_PrecompiledKernelOverridesMap[ context ]; + if( pOverrides == NULL ) + { + errorCode = CL_INVALID_VALUE; + } + } + + if( false && // disabled - this kernel is slower than the UInt4 kernel + ( m_Config.ForceByteBufferOverrides == false ) && + ( ( srcOffset % 64 ) == 0 ) && + ( ( dstOffset % 64 ) == 0 ) ) + { + if( errorCode == CL_SUCCESS ) + { + errorCode |= dispatch().clSetKernelArg( + pOverrides->Kernel_CopyBufferUInt16s, + 0, + sizeof( srcBuffer ), + &srcBuffer ); + errorCode |= dispatch().clSetKernelArg( + pOverrides->Kernel_CopyBufferUInt16s, + 1, + sizeof( dstBuffer ), + &dstBuffer ); + + cl_uint uiSrcOffsetInUint16s = (cl_uint)( srcOffset / 64 ); + errorCode |= dispatch().clSetKernelArg( + pOverrides->Kernel_CopyBufferUInt16s, + 2, + sizeof( uiSrcOffsetInUint16s ), + &uiSrcOffsetInUint16s ); + + cl_uint uiDstOffsetInUint16s = (cl_uint)( dstOffset / 64 ); + errorCode |= dispatch().clSetKernelArg( + pOverrides->Kernel_CopyBufferUInt16s, + 3, + sizeof( uiDstOffsetInUint16s ), + &uiDstOffsetInUint16s ); + + cl_uint uiBytesToCopy = (cl_uint)( bytesToCopy ); + errorCode |= dispatch().clSetKernelArg( + pOverrides->Kernel_CopyBufferUInt16s, + 4, + sizeof( uiBytesToCopy ), + &uiBytesToCopy ); + + if( errorCode == CL_SUCCESS ) + { + size_t global_work_size = bytesToCopy / 64; + size_t local_work_size = 32; + + // Round up if we don't have an even multiple of UInt16s + if( ( bytesToCopy % 64 ) != 0 ) + { + global_work_size++; + } + + // Make sure global_work_size is an even multiple of local_work_size + if( ( global_work_size % local_work_size ) != 0 ) + { + global_work_size += + local_work_size - + ( global_work_size % local_work_size ); + } + + // Execute kernel + errorCode = dispatch().clEnqueueNDRangeKernel( + commandQueue, + pOverrides->Kernel_CopyBufferUInt16s, + 1, + NULL, + &global_work_size, + &local_work_size, + numEventsInWaitList, + eventWaitList, + event ); + } + } + } + else if( ( m_Config.ForceByteBufferOverrides == false ) && + ( ( srcOffset % 16 ) == 0 ) && + ( ( dstOffset % 16 ) == 0 ) ) + { + if( errorCode == CL_SUCCESS ) + { + errorCode |= dispatch().clSetKernelArg( + pOverrides->Kernel_CopyBufferUInt4s, + 0, + sizeof( srcBuffer ), + &srcBuffer ); + errorCode |= dispatch().clSetKernelArg( + pOverrides->Kernel_CopyBufferUInt4s, + 1, + sizeof( dstBuffer ), + &dstBuffer ); + + cl_uint uiSrcOffsetInUint4s = (cl_uint)( srcOffset / 16 ); + errorCode |= dispatch().clSetKernelArg( + pOverrides->Kernel_CopyBufferUInt4s, + 2, + sizeof( uiSrcOffsetInUint4s ), + &uiSrcOffsetInUint4s ); + + cl_uint uiDstOffsetInUint4s = (cl_uint)( dstOffset / 16 ); + errorCode |= dispatch().clSetKernelArg( + pOverrides->Kernel_CopyBufferUInt4s, + 3, + sizeof( uiDstOffsetInUint4s ), + &uiDstOffsetInUint4s ); + + cl_uint uiBytesToCopy = (cl_uint)( bytesToCopy ); + errorCode |= dispatch().clSetKernelArg( + pOverrides->Kernel_CopyBufferUInt4s, + 4, + sizeof( uiBytesToCopy ), + &uiBytesToCopy ); + + if( errorCode == CL_SUCCESS ) + { + size_t global_work_size = bytesToCopy / 16; + size_t local_work_size = 32; + + // Round up if we don't have an even multiple of UInt4s + if( ( bytesToCopy % 16 ) != 0 ) + { + global_work_size++; + } + + // Make sure global_work_size is an even multiple of local_work_size + if( ( global_work_size % local_work_size ) != 0 ) + { + global_work_size += + local_work_size - + ( global_work_size % local_work_size ); + } + + // Execute kernel + errorCode = dispatch().clEnqueueNDRangeKernel( + commandQueue, + pOverrides->Kernel_CopyBufferUInt4s, + 1, + NULL, + &global_work_size, + &local_work_size, + numEventsInWaitList, + eventWaitList, + event ); + } + } + } + else if( ( m_Config.ForceByteBufferOverrides == false ) && + ( ( srcOffset % 4 ) == 0 ) && + ( ( dstOffset % 4 ) == 0 ) ) + { + if( errorCode == CL_SUCCESS ) + { + errorCode |= dispatch().clSetKernelArg( + pOverrides->Kernel_CopyBufferUInts, + 0, + sizeof( srcBuffer ), + &srcBuffer ); + errorCode |= dispatch().clSetKernelArg( + pOverrides->Kernel_CopyBufferUInts, + 1, + sizeof( dstBuffer ), + &dstBuffer ); + + cl_uint uiSrcOffsetInUints = (cl_uint)( srcOffset / 4 ); + errorCode |= dispatch().clSetKernelArg( + pOverrides->Kernel_CopyBufferUInts, + 2, + sizeof( uiSrcOffsetInUints ), + &uiSrcOffsetInUints ); + + cl_uint uiDstOffsetInUints = (cl_uint)( dstOffset / 4 ); + errorCode |= dispatch().clSetKernelArg( + pOverrides->Kernel_CopyBufferUInts, + 3, + sizeof( uiDstOffsetInUints ), + &uiDstOffsetInUints ); + + cl_uint uiBytesToCopy = (cl_uint)( bytesToCopy ); + errorCode |= dispatch().clSetKernelArg( + pOverrides->Kernel_CopyBufferUInts, + 4, + sizeof( uiBytesToCopy ), + &uiBytesToCopy ); + + if( errorCode == CL_SUCCESS ) + { + size_t global_work_size = bytesToCopy / 4; + size_t local_work_size = 32; + + // Round up if we don't have an even multiple of UInts + if( ( bytesToCopy % 4 ) != 0 ) + { + global_work_size++; + } + + // Make sure global_work_size is an even multiple of local_work_size + if( ( global_work_size % local_work_size ) != 0 ) + { + global_work_size += + local_work_size - + ( global_work_size % local_work_size ); + } + + // Execute kernel + errorCode = dispatch().clEnqueueNDRangeKernel( + commandQueue, + pOverrides->Kernel_CopyBufferUInts, + 1, + NULL, + &global_work_size, + &local_work_size, + numEventsInWaitList, + eventWaitList, + event ); + } + } + } + else + { + if( errorCode == CL_SUCCESS ) + { + errorCode |= dispatch().clSetKernelArg( + pOverrides->Kernel_CopyBufferBytes, + 0, + sizeof( srcBuffer ), + &srcBuffer ); + errorCode |= dispatch().clSetKernelArg( + pOverrides->Kernel_CopyBufferBytes, + 1, + sizeof( dstBuffer ), + &dstBuffer ); + + cl_uint uiSrcOffset = (cl_uint)( srcOffset ); + errorCode |= dispatch().clSetKernelArg( + pOverrides->Kernel_CopyBufferBytes, + 2, + sizeof( uiSrcOffset ), + &uiSrcOffset ); + + cl_uint uiDstOffset = (cl_uint)( dstOffset ); + errorCode |= dispatch().clSetKernelArg( + pOverrides->Kernel_CopyBufferBytes, + 3, + sizeof( uiDstOffset ), + &uiDstOffset ); + + cl_uint uiBytesToCopy = (cl_uint)( bytesToCopy ); + errorCode |= dispatch().clSetKernelArg( + pOverrides->Kernel_CopyBufferBytes, + 4, + sizeof( uiBytesToCopy ), + &uiBytesToCopy ); + + if( errorCode == CL_SUCCESS ) + { + size_t global_work_size = bytesToCopy; + size_t local_work_size = 32; + + // Make sure global_work_size is an even multiple of local_work_size + if( ( global_work_size % local_work_size ) != 0 ) + { + global_work_size += + local_work_size - + ( global_work_size % local_work_size ); + } + + // Execute kernel + errorCode = dispatch().clEnqueueNDRangeKernel( + commandQueue, + pOverrides->Kernel_CopyBufferBytes, + 1, + NULL, + &global_work_size, + &local_work_size, + numEventsInWaitList, + eventWaitList, + event ); + } + } + } + + return errorCode; +} + +/////////////////////////////////////////////////////////////////////////////// +// +cl_int CLIntercept::ReadImage( + cl_command_queue commandQueue, + cl_mem srcImage, + cl_bool blockingRead, + const size_t* srcOrigin, + const size_t* region, + size_t dstRowPitch, + size_t dstSlicePitch, + void* dstPtr, + cl_uint numEventsInWaitList, + const cl_event* eventWaitList, + cl_event* event ) +{ + m_OS.EnterCriticalSection(); + + cl_int errorCode = CL_SUCCESS; + + // Basic error checking, to avoid possible null pointer dereferences. + if( errorCode == CL_SUCCESS ) + { + if( srcOrigin == NULL || region == NULL ) + { + errorCode = CL_INVALID_VALUE; + } + } + + cl_context context = NULL; + + // Get the context for this command queue. + if( errorCode == CL_SUCCESS ) + { + errorCode = dispatch().clGetCommandQueueInfo( + commandQueue, + CL_QUEUE_CONTEXT, + sizeof( context ), + &context, + NULL ); + } + + // Create a USE_HOST_PTR image for the passed-in pointer. + // The size of the buffer will be at least as big as the region to read. + + // We need to know what type of image to create. + // If region[2] is 1, then a 2D image will suffice, + // otherwise we'll need to create a 3D image. + + // The image will have the same image format as srcImage. + + cl_image_format srcFormat = { 0 }; + + if( errorCode == CL_SUCCESS ) + { + errorCode = dispatch().clGetImageInfo( + srcImage, + CL_IMAGE_FORMAT, + sizeof( srcFormat ), + &srcFormat, + NULL ); + } + + cl_mem dstImage = NULL; + + if( errorCode == CL_SUCCESS ) + { + if( region[2] == 1 ) + { + // 2D image + dstImage = dispatch().clCreateImage2D( + context, + CL_MEM_USE_HOST_PTR | CL_MEM_WRITE_ONLY, + &srcFormat, + region[0], + region[1], + dstRowPitch, + dstPtr, + &errorCode ); + } + else + { + // 3D image + dstImage = dispatch().clCreateImage3D( + context, + CL_MEM_USE_HOST_PTR | CL_MEM_WRITE_ONLY, + &srcFormat, + region[0], + region[1], + region[2], + dstRowPitch, + dstSlicePitch, + dstPtr, + &errorCode ); + } + } + + size_t dstOrigin[3] = { 0, 0, 0 }; + + if( errorCode == CL_SUCCESS ) + { + errorCode = CopyImageHelper( + context, + commandQueue, + srcImage, + dstImage, + srcOrigin, + dstOrigin, + region, + numEventsInWaitList, + eventWaitList, + event ); + } + + // Technically, we need to map and unmap the destination image + // to transfer data to our pointer. This will also handle + // blockingRead. + if( errorCode == CL_SUCCESS ) + { + void* mappedPointer = dispatch().clEnqueueMapImage( + commandQueue, + dstImage, + blockingRead, + CL_MAP_READ, + dstOrigin, + region, + &dstRowPitch, + &dstSlicePitch, + 0, + NULL, + NULL, + &errorCode ); + + if( errorCode == CL_SUCCESS ) + { + errorCode = dispatch().clEnqueueUnmapMemObject( + commandQueue, + dstImage, + mappedPointer, + 0, + NULL, + NULL ); + } + } + + dispatch().clReleaseMemObject( dstImage ); + + m_OS.LeaveCriticalSection(); + + return errorCode; +} + +/////////////////////////////////////////////////////////////////////////////// +// +cl_int CLIntercept::WriteImage( + cl_command_queue commandQueue, + cl_mem dstImage, + cl_bool blockingWrite, + const size_t* dstOrigin, + const size_t* region, + size_t srcRowPitch, + size_t srcSlicePitch, + const void* srcPtr, + cl_uint numEventsInWaitList, + const cl_event* eventWaitList, + cl_event* event ) +{ + m_OS.EnterCriticalSection(); + + cl_int errorCode = CL_SUCCESS; + + // Basic error checking, to avoid possible null pointer dereferences. + if( errorCode == CL_SUCCESS ) + { + if( dstOrigin == NULL || region == NULL ) + { + errorCode = CL_INVALID_VALUE; + } + } + + cl_context context = NULL; + + // Get the context for this command queue. + if( errorCode == CL_SUCCESS ) + { + errorCode = dispatch().clGetCommandQueueInfo( + commandQueue, + CL_QUEUE_CONTEXT, + sizeof( context ), + &context, + NULL ); + } + + // Create a USE_HOST_PTR image for the passed-in pointer. + // The size of the buffer will be at least as big as the region to read. + + // We need to know what type of image to create. + // If region[2] is 1, then a 2D image will suffice, + // otherwise we'll need to create a 3D image. + + // The image will have the same image format as srcImage. + + cl_image_format dstFormat = { 0 }; + + if( errorCode == CL_SUCCESS ) + { + errorCode = dispatch().clGetImageInfo( + dstImage, + CL_IMAGE_FORMAT, + sizeof( dstFormat ), + &dstFormat, + NULL ); + } + + cl_mem srcImage = NULL; + + if( errorCode == CL_SUCCESS ) + { + if( region[2] == 1 ) + { + // 2D image + srcImage = dispatch().clCreateImage2D( + context, + CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY, + &dstFormat, + region[0], + region[1], + srcRowPitch, + (void*)srcPtr, + &errorCode ); + } + else + { + // 3D image + srcImage = dispatch().clCreateImage3D( + context, + CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY, + &dstFormat, + region[0], + region[1], + region[2], + srcRowPitch, + srcSlicePitch, + (void*)srcPtr, + &errorCode ); + } + } + + size_t srcOrigin[3] = { 0, 0, 0 }; + + if( errorCode == CL_SUCCESS ) + { + errorCode = CopyImageHelper( + context, + commandQueue, + srcImage, + dstImage, + srcOrigin, + dstOrigin, + region, + numEventsInWaitList, + eventWaitList, + event ); + } + + if( errorCode == CL_SUCCESS ) + { + if( blockingWrite ) + { + errorCode = dispatch().clFinish( + commandQueue ); + } + } + + dispatch().clReleaseMemObject( srcImage ); + + m_OS.LeaveCriticalSection(); + + return errorCode; +} + +/////////////////////////////////////////////////////////////////////////////// +// +cl_int CLIntercept::CopyImage( + cl_command_queue commandQueue, + cl_mem srcImage, + cl_mem dstImage, + const size_t* srcOrigin, + const size_t* dstOrigin, + const size_t* region, + cl_uint numEventsInWaitList, + const cl_event* eventWaitList, + cl_event* event ) +{ + m_OS.EnterCriticalSection(); + + cl_int errorCode = CL_SUCCESS; + + cl_context context = NULL; + + // Get the context for this command queue. + if( errorCode == CL_SUCCESS ) + { + errorCode = dispatch().clGetCommandQueueInfo( + commandQueue, + CL_QUEUE_CONTEXT, + sizeof( context ), + &context, + NULL ); + } + + if( errorCode == CL_SUCCESS ) + { + errorCode = CopyImageHelper( + context, + commandQueue, + srcImage, + dstImage, + srcOrigin, + dstOrigin, + region, + numEventsInWaitList, + eventWaitList, + event ); + } + + m_OS.LeaveCriticalSection(); + + return errorCode; +} + +/////////////////////////////////////////////////////////////////////////////// +// +cl_int CLIntercept::CopyImageHelper( + cl_context context, + cl_command_queue commandQueue, + cl_mem srcImage, + cl_mem dstImage, + const size_t* srcOrigin, + const size_t* dstOrigin, + const size_t* region, + cl_uint numEventsInWaitList, + const cl_event* eventWaitList, + cl_event* event ) +{ + // This function assumes that it is being called from within a critical + // section, so it does not enter the critical section again. + + cl_int errorCode = CL_SUCCESS; + + SPrecompiledKernelOverrides* pOverrides = NULL; + + // Get the overrides for this context. + if( errorCode == CL_SUCCESS ) + { + pOverrides = m_PrecompiledKernelOverridesMap[ context ]; + if( pOverrides == NULL ) + { + errorCode = CL_INVALID_VALUE; + } + } + + // Figure out the type of the source image. + cl_mem_object_type srcType = CL_MEM_OBJECT_BUFFER; + if( errorCode == CL_SUCCESS ) + { + errorCode = dispatch().clGetMemObjectInfo( + srcImage, + CL_MEM_TYPE, + sizeof( srcType ), + &srcType, + NULL ); + } + + // Figure out the type of the destination image. + cl_mem_object_type dstType = CL_MEM_OBJECT_BUFFER; + if( errorCode == CL_SUCCESS ) + { + errorCode = dispatch().clGetMemObjectInfo( + srcImage, + CL_MEM_TYPE, + sizeof( dstType ), + &dstType, + NULL ); + } + + // Figure out the format of the source image. + cl_image_format srcFormat = { 0 }; + if( errorCode == CL_SUCCESS ) + { + errorCode = dispatch().clGetImageInfo( + srcImage, + CL_IMAGE_FORMAT, + sizeof( srcFormat ), + &srcFormat, + NULL ); + } + + // Figure out the format of the destination image. + cl_image_format dstFormat = { 0 }; + if( errorCode == CL_SUCCESS ) + { + errorCode = dispatch().clGetImageInfo( + dstImage, + CL_IMAGE_FORMAT, + sizeof( dstFormat ), + &dstFormat, + NULL ); + } + + // Image formats must match. + if( errorCode == CL_SUCCESS ) + { + if( ( srcFormat.image_channel_data_type != dstFormat.image_channel_data_type ) || + ( srcFormat.image_channel_order != dstFormat.image_channel_order ) ) + { + errorCode = CL_IMAGE_FORMAT_MISMATCH; + } + switch( srcType ) + { + case CL_MEM_OBJECT_IMAGE2D: + if( ( srcOrigin[2] != 0 ) || + ( region[2] != 1 ) ) + { + errorCode = CL_INVALID_VALUE; + } + break; + case CL_MEM_OBJECT_IMAGE3D: + break; + default: + errorCode = CL_INVALID_OPERATION; + break; + } + switch( dstType ) + { + case CL_MEM_OBJECT_IMAGE2D: + if( ( dstOrigin[2] != 0 ) || + ( region[2] != 1 ) ) + { + errorCode = CL_INVALID_VALUE; + } + break; + case CL_MEM_OBJECT_IMAGE3D: + break; + default: + errorCode = CL_INVALID_OPERATION; + break; + } + } + + cl_kernel kernel = NULL; + if( errorCode == CL_SUCCESS ) + { + switch( srcFormat.image_channel_data_type ) + { + case CL_UNORM_INT8: + case CL_UNORM_INT16: + case CL_SNORM_INT8: + case CL_SNORM_INT16: + case CL_HALF_FLOAT: + case CL_FLOAT: + // "Float" Images + switch( srcType ) + { + case CL_MEM_OBJECT_IMAGE2D: + switch( dstType ) + { + case CL_MEM_OBJECT_IMAGE2D: + // 2D to 2D + kernel = pOverrides->Kernel_CopyImage2Dto2DFloat; + break; + default: + CLI_ASSERT( 0 ); + errorCode = CL_INVALID_OPERATION; + break; + } + break; + default: + CLI_ASSERT( 0 ); + errorCode = CL_INVALID_OPERATION; + break; + } + break; + + case CL_SIGNED_INT8: + case CL_SIGNED_INT16: + case CL_SIGNED_INT32: + // "Int" Images + switch( srcType ) + { + case CL_MEM_OBJECT_IMAGE2D: + switch( dstType ) + { + case CL_MEM_OBJECT_IMAGE2D: + // 2D to 2D + kernel = pOverrides->Kernel_CopyImage2Dto2DInt; + break; + default: + CLI_ASSERT( 0 ); + errorCode = CL_INVALID_OPERATION; + break; + } + break; + default: + CLI_ASSERT( 0 ); + errorCode = CL_INVALID_OPERATION; + break; + } + break; + + case CL_UNSIGNED_INT8: + case CL_UNSIGNED_INT16: + case CL_UNSIGNED_INT32: + // "UInt" Images + switch( srcType ) + { + case CL_MEM_OBJECT_IMAGE2D: + switch( dstType ) + { + case CL_MEM_OBJECT_IMAGE2D: + // 2D to 2D + kernel = pOverrides->Kernel_CopyImage2Dto2DUInt; + break; + default: + CLI_ASSERT( 0 ); + errorCode = CL_INVALID_OPERATION; + break; + } + break; + default: + CLI_ASSERT( 0 ); + errorCode = CL_INVALID_OPERATION; + break; + } + break; + + default: + CLI_ASSERT( 0 ); + errorCode = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + break; + } + } + + if( errorCode == CL_SUCCESS ) + { + errorCode |= dispatch().clSetKernelArg( + kernel, + 0, + sizeof( srcImage ), + &srcImage ); + errorCode |= dispatch().clSetKernelArg( + kernel, + 1, + sizeof( dstImage ), + &dstImage ); + + cl_uint uiArg = (cl_uint)( srcOrigin[0] ); + errorCode |= dispatch().clSetKernelArg( + kernel, + 2, + sizeof( uiArg ), + &uiArg ); + uiArg = (cl_uint)( srcOrigin[1] ); + errorCode |= dispatch().clSetKernelArg( + kernel, + 3, + sizeof( uiArg ), + &uiArg ); + uiArg = (cl_uint)( srcOrigin[2] ); + errorCode |= dispatch().clSetKernelArg( + kernel, + 4, + sizeof( uiArg ), + &uiArg ); + + uiArg = (cl_uint)( dstOrigin[0] ); + errorCode |= dispatch().clSetKernelArg( + kernel, + 5, + sizeof( uiArg ), + &uiArg ); + uiArg = (cl_uint)( dstOrigin[1] ); + errorCode |= dispatch().clSetKernelArg( + kernel, + 6, + sizeof( uiArg ), + &uiArg ); + uiArg = (cl_uint)( dstOrigin[2] ); + errorCode |= dispatch().clSetKernelArg( + kernel, + 7, + sizeof( uiArg ), + &uiArg ); + + uiArg = (cl_uint)( region[0] ); + errorCode |= dispatch().clSetKernelArg( + kernel, + 8, + sizeof( uiArg ), + &uiArg ); + uiArg = (cl_uint)( region[1] ); + errorCode |= dispatch().clSetKernelArg( + kernel, + 9, + sizeof( uiArg ), + &uiArg ); + uiArg = (cl_uint)( region[2] ); + errorCode |= dispatch().clSetKernelArg( + kernel, + 10, + sizeof( uiArg ), + &uiArg ); + + if( errorCode == CL_SUCCESS ) + { + size_t global_work_size[3] = + { + region[0], + region[1], + region[2] + }; + size_t local_work_size[3] = + { + 32, + 1, + 1 + }; + + // Make sure global_work_size is an even multiple of local_work_size + if( ( global_work_size[0] % local_work_size[0] ) != 0 ) + { + global_work_size[0] += + local_work_size[0] - + ( global_work_size[0] % local_work_size[0] ); + } + CLI_ASSERT( local_work_size[1] == 1 ); + CLI_ASSERT( local_work_size[2] == 1 ); + + // Execute kernel + errorCode = dispatch().clEnqueueNDRangeKernel( + commandQueue, + kernel, + 3, + NULL, + global_work_size, + local_work_size, + numEventsInWaitList, + eventWaitList, + event ); + } + } + + return errorCode; +} + +/////////////////////////////////////////////////////////////////////////////// +// +cl_program CLIntercept::createProgramWithBuiltinKernels( + cl_context context ) +{ + m_OS.EnterCriticalSection(); + + cl_program program = NULL; + + SBuiltinKernelOverrides* pOverrides = m_BuiltinKernelOverridesMap[ context ]; + if( pOverrides ) + { + program = pOverrides->Program; + dispatch().clRetainProgram( program ); + } + + m_OS.LeaveCriticalSection(); + return program; +} + +/////////////////////////////////////////////////////////////////////////////// +// +cl_kernel CLIntercept::createBuiltinKernel( + cl_program program, + const std::string& kernel_name, + cl_int* errcode_ret ) +{ + m_OS.EnterCriticalSection(); + + cl_int errorCode = CL_SUCCESS; + + cl_context context = NULL; + cl_kernel kernel = NULL; + + // Get the context for this program. + if( errorCode == CL_SUCCESS ) + { + errorCode = dispatch().clGetProgramInfo( + program, + CL_PROGRAM_CONTEXT, + sizeof( context ), + &context, + NULL ); + } + + SBuiltinKernelOverrides* pOverrides = NULL; + + // Get the overrides for this context. + if( errorCode == CL_SUCCESS ) + { + pOverrides = m_BuiltinKernelOverridesMap[ context ]; + if( pOverrides != NULL ) + { + if( kernel_name == "block_motion_estimate_intel" ) + { + kernel = pOverrides->Kernel_block_motion_estimate_intel; + dispatch().clRetainKernel( kernel ); + if( errcode_ret ) + { + errcode_ret[0] = CL_SUCCESS; + } + } + } + } + + m_OS.LeaveCriticalSection(); + + return kernel; +} + +/////////////////////////////////////////////////////////////////////////////// +// +cl_int CLIntercept::NDRangeBuiltinKernel( + cl_command_queue commandQueue, + cl_kernel kernel, + cl_uint work_dim, + const size_t* global_work_offset, + const size_t* global_work_size, + const size_t* local_work_size, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event ) +{ + m_OS.EnterCriticalSection(); + + cl_int errorCode = CL_SUCCESS; + + cl_context context = NULL; + + // Get the context for this command queue. + if( errorCode == CL_SUCCESS ) + { + errorCode = dispatch().clGetCommandQueueInfo( + commandQueue, + CL_QUEUE_CONTEXT, + sizeof( context ), + &context, + NULL ); + } + + SBuiltinKernelOverrides* pOverrides = NULL; + + // Get the overrides for this context. + if( errorCode == CL_SUCCESS ) + { + pOverrides = m_BuiltinKernelOverridesMap[ context ]; + if( pOverrides == NULL ) + { + errorCode = CL_INVALID_VALUE; + } + } + + // See if this kernel is one of our overridden builtin kernels. + if( errorCode == CL_SUCCESS ) + { + if( kernel == pOverrides->Kernel_block_motion_estimate_intel ) + { + if( ( work_dim == 2 ) && + ( global_work_size != NULL ) && + ( local_work_size == NULL ) ) + { + const size_t BLOCK_SIZE = 16; + const size_t w = ( global_work_size[0] + BLOCK_SIZE - 1 ) / BLOCK_SIZE; + const size_t h = ( global_work_size[1] + BLOCK_SIZE - 1 ) / BLOCK_SIZE; +#if 0 + const size_t new_global_work_size[] = { w * BLOCK_SIZE, h }; + const size_t new_local_work_size[] = { BLOCK_SIZE, 1 }; + + int iterations = 1; +#else + const size_t new_global_work_size[] = { w * BLOCK_SIZE, 1 }; + const size_t new_local_work_size[] = { BLOCK_SIZE, 1 }; + + int iterations = (int)h; +#endif + errorCode = dispatch().clSetKernelArg( + kernel, + 6, + sizeof( iterations ), + &iterations ); + + if( errorCode == CL_SUCCESS ) + { + errorCode = dispatch().clEnqueueNDRangeKernel( + commandQueue, + kernel, + 2, + global_work_offset, + new_global_work_size, + new_local_work_size, + num_events_in_wait_list, + event_wait_list, + event ); + } + } + } + else + { + errorCode = CL_INVALID_VALUE; + } + } + + m_OS.LeaveCriticalSection(); + + return errorCode; +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::SIMDSurveyCreateProgramFromSource( + const cl_program program, + cl_context context, + cl_uint count, + const char** strings, + const size_t* lengths ) +{ + m_OS.EnterCriticalSection(); + + cl_int errorCode = CL_SUCCESS; + + SSIMDSurveyProgram* pSIMDSurveyProgram = + m_SIMDSurveyProgramMap[ program ]; + if( pSIMDSurveyProgram ) + { + errorCode = dispatch().clReleaseProgram( pSIMDSurveyProgram->SIMD8Program ); + errorCode = dispatch().clReleaseProgram( pSIMDSurveyProgram->SIMD16Program ); + errorCode = dispatch().clReleaseProgram( pSIMDSurveyProgram->SIMD32Program ); + + delete pSIMDSurveyProgram; + pSIMDSurveyProgram = NULL; + + m_SIMDSurveyProgramMap[ program ] = NULL; + } + + pSIMDSurveyProgram = new SSIMDSurveyProgram; + if( pSIMDSurveyProgram ) + { + log( "SIMD Survey: CreateProgramFromSource\n" ); + + pSIMDSurveyProgram->SIMD8Program = dispatch().clCreateProgramWithSource( + context, + count, + strings, + lengths, + &errorCode ); + pSIMDSurveyProgram->SIMD16Program = dispatch().clCreateProgramWithSource( + context, + count, + strings, + lengths, + &errorCode ); + pSIMDSurveyProgram->SIMD32Program = dispatch().clCreateProgramWithSource( + context, + count, + strings, + lengths, + &errorCode ); + + m_SIMDSurveyProgramMap[ program ] = pSIMDSurveyProgram; + } + + m_OS.LeaveCriticalSection(); +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::SIMDSurveyBuildProgram( + const cl_program program, + cl_uint numDevices, + const cl_device_id* deviceList, + const char* options ) +{ + m_OS.EnterCriticalSection(); + + cl_int errorCode = CL_SUCCESS; + + SSIMDSurveyProgram* pSIMDSurveyProgram = + m_SIMDSurveyProgramMap[ program ]; + if( pSIMDSurveyProgram ) + { + // Pre-pend the required subgroup size build option. This assumes that + // if the required subgroup size options string is already in the program + // options string then the later option will have precedence. + std::string userOptions( options ? options : "" ); + std::string simd8Options = config().SIMDSurveySIMD8Option + " " + userOptions; + std::string simd16Options = config().SIMDSurveySIMD16Option + " " + userOptions; + std::string simd32Options = config().SIMDSurveySIMD32Option + " " + userOptions; + + log( "SIMD Survey: Building SIMD8 kernel with options: " + simd8Options + "\n" ); + errorCode |= dispatch().clBuildProgram( + pSIMDSurveyProgram->SIMD8Program, + numDevices, + deviceList, + simd8Options.c_str(), + NULL, + NULL ); + log( "SIMD Survey: Building SIMD16 kernel with options: " + simd16Options + "\n" ); + errorCode |= dispatch().clBuildProgram( + pSIMDSurveyProgram->SIMD16Program, + numDevices, + deviceList, + simd16Options.c_str(), + NULL, + NULL ); + log( "SIMD Survey: Building SIMD32 kernel with options: " + simd16Options + "\n" ); + errorCode |= dispatch().clBuildProgram( + pSIMDSurveyProgram->SIMD32Program, + numDevices, + deviceList, + simd32Options.c_str(), + NULL, + NULL ); + if( errorCode != CL_SUCCESS ) + { + log( "SIMD Survey: Building done (with errors).\n" ); + } + else + { + log( "SIMD Survey: Building done.\n" ); + } + } + else + { + logf( "SIMD Survey: BuildProgram: Couldn't find info for program %p!?!?\n", + program ); + } + + m_OS.LeaveCriticalSection(); +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::SIMDSurveyCreateKernel( + const cl_program program, + const cl_kernel kernel, + const std::string& kernelName ) +{ + m_OS.EnterCriticalSection(); + + cl_int errorCode = CL_SUCCESS; + + SSIMDSurveyKernel* pSIMDSurveyKernel = + m_SIMDSurveyKernelMap[ kernel ]; + if( pSIMDSurveyKernel ) + { + // I don't think this should happen, assuming we've cleaned up + // correctly when the kernel is released.... + CLI_ASSERT( 0 ); + + // Remove the parent kernel and each of the child kernels from the map. + m_SIMDSurveyKernelMap.erase( kernel ); + + m_SIMDSurveyKernelMap.erase( pSIMDSurveyKernel->SIMD8Kernel ); + m_SIMDSurveyKernelMap.erase( pSIMDSurveyKernel->SIMD16Kernel ); + m_SIMDSurveyKernelMap.erase( pSIMDSurveyKernel->SIMD32Kernel ); + + errorCode = dispatch().clReleaseKernel( pSIMDSurveyKernel->SIMD8Kernel ); + errorCode = dispatch().clReleaseKernel( pSIMDSurveyKernel->SIMD16Kernel ); + errorCode = dispatch().clReleaseKernel( pSIMDSurveyKernel->SIMD32Kernel ); + + delete pSIMDSurveyKernel; + pSIMDSurveyKernel = NULL; + } + + SSIMDSurveyProgram* pSIMDSurveyProgram = + m_SIMDSurveyProgramMap[ program ]; + if( pSIMDSurveyProgram ) + { + pSIMDSurveyKernel = new SSIMDSurveyKernel; + if( pSIMDSurveyKernel ) + { + logf( "SIMD Survey: Creating kernels for %s\n", + kernelName.c_str() ); + + pSIMDSurveyKernel->SIMD8Kernel = dispatch().clCreateKernel( + pSIMDSurveyProgram->SIMD8Program, + kernelName.c_str(), + &errorCode ); + pSIMDSurveyKernel->SIMD16Kernel = dispatch().clCreateKernel( + pSIMDSurveyProgram->SIMD16Program, + kernelName.c_str(), + &errorCode ); + pSIMDSurveyKernel->SIMD32Kernel = dispatch().clCreateKernel( + pSIMDSurveyProgram->SIMD32Program, + kernelName.c_str(), + &errorCode ); + + pSIMDSurveyKernel->SIMD8ExecutionTimeNS = CL_ULONG_MAX; + pSIMDSurveyKernel->SIMD16ExecutionTimeNS = CL_ULONG_MAX; + pSIMDSurveyKernel->SIMD32ExecutionTimeNS = CL_ULONG_MAX; + + pSIMDSurveyKernel->ExecutionNumber = 0; + + // We'll install the same pointer into the map for the real + // parent kernel and for each of the child kernels compiled + // for specific SIMD sizes. The parent kernel is used to + // look up the kernel to execute, and the child kernels are + // used to aggregate the results. + + m_SIMDSurveyKernelMap[ kernel ] = pSIMDSurveyKernel; + + m_SIMDSurveyKernelMap[ pSIMDSurveyKernel->SIMD8Kernel ] = pSIMDSurveyKernel; + m_SIMDSurveyKernelMap[ pSIMDSurveyKernel->SIMD16Kernel ] = pSIMDSurveyKernel; + m_SIMDSurveyKernelMap[ pSIMDSurveyKernel->SIMD32Kernel ] = pSIMDSurveyKernel; + + // Also, keep the kernel name map up-to-date. This is necessary to + // print the right kernel names in e.g. device timing reports. The + // other maps, such as the kernel arg map, don't need to know about + // child kernels, so we don't add anything for them here. + m_KernelNameMap[ pSIMDSurveyKernel->SIMD8Kernel ] = kernelName; + m_KernelNameMap[ pSIMDSurveyKernel->SIMD16Kernel ] = kernelName; + m_KernelNameMap[ pSIMDSurveyKernel->SIMD32Kernel ] = kernelName; + } + } + else + { + logf( "SIMD Survey: CreateKernel: Couldn't find info for program %p!?!?\n", + program ); + } + + m_OS.LeaveCriticalSection(); +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::SIMDSurveySetKernelArg( + cl_kernel kernel, + cl_uint argIndex, + size_t argSize, + const void* argValue ) +{ + m_OS.EnterCriticalSection(); + + SSIMDSurveyKernel* pSIMDSurveyKernel = + m_SIMDSurveyKernelMap[ kernel ]; + if( pSIMDSurveyKernel ) + { + dispatch().clSetKernelArg( + pSIMDSurveyKernel->SIMD8Kernel, + argIndex, + argSize, + argValue ); + dispatch().clSetKernelArg( + pSIMDSurveyKernel->SIMD16Kernel, + argIndex, + argSize, + argValue ); + dispatch().clSetKernelArg( + pSIMDSurveyKernel->SIMD32Kernel, + argIndex, + argSize, + argValue ); + } + else + { + logf( "SIMD Survey: SerKernelArg: Couldn't find info for kernel %p!?!?\n", + kernel ); + } + + m_OS.LeaveCriticalSection(); +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::SIMDSurveyNDRangeKernel( + cl_kernel& kernel ) +{ + m_OS.EnterCriticalSection(); + + SSIMDSurveyKernel* pSIMDSurveyKernel = + m_SIMDSurveyKernelMap[ kernel ]; + if( pSIMDSurveyKernel ) + { + const std::string& kernelName = m_KernelNameMap[ kernel ]; + + const uint32_t cWarmupIterations = config().SIMDSurveyWarmupIterations; + if( pSIMDSurveyKernel->ExecutionNumber >= cWarmupIterations ) + { + const uint32_t cSample = + pSIMDSurveyKernel->ExecutionNumber - cWarmupIterations; + + // This just tries the three kernels in order from + // 8 -> 16 -> 32, one time each. + // + // Other things we can try: + // - executing each kernel multiple times + // - different orders + switch( cSample ) + { + case 0: + if( pSIMDSurveyKernel->SIMD8Kernel ) + { + log( "SIMD Survey: NDRange: Sampling SIMD8 kernel for " + kernelName + "\n" ); + kernel = pSIMDSurveyKernel->SIMD8Kernel; + } + else + { + log( "SIMD Survey: NDRange: Skipping sample, no SIMD8 kernel exists for " + kernelName + ".\n" ); + } + break; + case 1: + if( pSIMDSurveyKernel->SIMD16Kernel ) + { + log( "SIMD Survey: NDRange: Sampling SIMD16 kernel for " + kernelName + "\n" ); + kernel = pSIMDSurveyKernel->SIMD16Kernel; + } + else + { + log( "SIMD Survey: NDRange: Skipping sample, no SIMD16 kernel exists for " + kernelName + ".\n" ); + } + break; + case 2: + if( pSIMDSurveyKernel->SIMD32Kernel ) + { + log( "SIMD Survey: NDRange: Sampling SIMD32 kernel for " + kernelName + "\n" ); + kernel = pSIMDSurveyKernel->SIMD32Kernel; + } + else + { + log( "SIMD Survey: NDRange: Skipping sample, no SIMD32 kernel exists for " + kernelName + ".\n" ); + } + break; + default: + if( pSIMDSurveyKernel->SIMD8ExecutionTimeNS != CL_ULONG_MAX || + pSIMDSurveyKernel->SIMD16ExecutionTimeNS != CL_ULONG_MAX || + pSIMDSurveyKernel->SIMD32ExecutionTimeNS != CL_ULONG_MAX ) + { + cl_ulong fastestTimeNS = CL_ULONG_MAX; + cl_uint fastestSIMD = 0; + if( pSIMDSurveyKernel->SIMD8ExecutionTimeNS < fastestTimeNS ) + { + fastestTimeNS = pSIMDSurveyKernel->SIMD8ExecutionTimeNS; + fastestSIMD = 8; + kernel = pSIMDSurveyKernel->SIMD8Kernel; + } + if( pSIMDSurveyKernel->SIMD16ExecutionTimeNS < fastestTimeNS ) + { + fastestTimeNS = pSIMDSurveyKernel->SIMD16ExecutionTimeNS; + fastestSIMD = 16; + kernel = pSIMDSurveyKernel->SIMD16Kernel; + } + if( pSIMDSurveyKernel->SIMD32ExecutionTimeNS < fastestTimeNS ) + { + fastestTimeNS = pSIMDSurveyKernel->SIMD32ExecutionTimeNS; + fastestSIMD = 32; + kernel = pSIMDSurveyKernel->SIMD32Kernel; + } + + logf( "SIMD Survey: NDRange: Picking SIMD%u kernel for %s: SIMD8 Time = %u, SIMD16 Time = %u, SIMD32 Time = %u\n", + fastestSIMD, + kernelName.c_str(), + (cl_uint)pSIMDSurveyKernel->SIMD8ExecutionTimeNS, + (cl_uint)pSIMDSurveyKernel->SIMD16ExecutionTimeNS, + (cl_uint)pSIMDSurveyKernel->SIMD32ExecutionTimeNS ); + } + else + { + log( "SIMD Survey: NDRange: No samples for kernel " + kernelName + " (yet?)\n" ); + } + break; + } + } + else + { + logf( "SIMD Survey: NDRange: Executing warmup iteration %d of %d for kernel %s\n", + pSIMDSurveyKernel->ExecutionNumber + 1, + cWarmupIterations, + kernelName.c_str() ); + } + + pSIMDSurveyKernel->ExecutionNumber++; + } + else + { + logf( "SIMD Survey NDRange: Couldn't find info for kernel %p!?!?\n", + kernel ); + } + + m_OS.LeaveCriticalSection(); +} + +/////////////////////////////////////////////////////////////////////////////// +// +#define CHECK_RETURN_EXTENSION_FUNCTION(funcname) \ +{ \ + if( func_name == #funcname ) \ + { \ + if( dispatch() . funcname == NULL ) \ + { \ + void *func = NULL; \ + if( platform && \ + dispatch().clGetExtensionFunctionAddressForPlatform ) \ + { \ + func = dispatch().clGetExtensionFunctionAddressForPlatform( \ + platform, \ + #funcname ); \ + } else if( dispatch().clGetExtensionFunctionAddress ) \ + { \ + func = dispatch().clGetExtensionFunctionAddress(#funcname); \ + } \ + void** pfunc = (void**)( &m_Dispatch . funcname ); \ + *pfunc = func; \ + } \ + if( dispatch() . funcname ) \ + { \ + return (void*)( funcname ); \ + } \ + } \ +} + +/////////////////////////////////////////////////////////////////////////////// +// +void* CLIntercept::getExtensionFunctionAddress( + cl_platform_id platform, + const std::string& func_name ) const +{ + // KHR Extensions + + // cl_khr_gl_sharing + // Even though all of these functions except for clGetGLContextInfoKHR() + // are exported from the ICD DLL, still call CHECK_RETURN_EXTENSION_FUNCTION + // to handle the case where an intercepted DLL supports the extension but + // does not export the entry point. This will probably never happen in + // practice, but better safe than sorry. +#if defined(_WIN32) || defined(__linux__) + CHECK_RETURN_EXTENSION_FUNCTION( clCreateFromGLBuffer ); + CHECK_RETURN_EXTENSION_FUNCTION( clCreateFromGLTexture ); + CHECK_RETURN_EXTENSION_FUNCTION( clCreateFromGLTexture2D ); + CHECK_RETURN_EXTENSION_FUNCTION( clCreateFromGLTexture3D ); + CHECK_RETURN_EXTENSION_FUNCTION( clCreateFromGLRenderbuffer ); + CHECK_RETURN_EXTENSION_FUNCTION( clGetGLObjectInfo ); + CHECK_RETURN_EXTENSION_FUNCTION( clGetGLTextureInfo ); + CHECK_RETURN_EXTENSION_FUNCTION( clEnqueueAcquireGLObjects ); + CHECK_RETURN_EXTENSION_FUNCTION( clEnqueueReleaseGLObjects ); +#endif + CHECK_RETURN_EXTENSION_FUNCTION( clGetGLContextInfoKHR ); + // cl_khr_gl_event + CHECK_RETURN_EXTENSION_FUNCTION( clCreateEventFromGLsyncKHR ); +#if defined(_WIN32) + // cl_khr_d3d10_sharing + CHECK_RETURN_EXTENSION_FUNCTION( clGetDeviceIDsFromD3D10KHR ); + CHECK_RETURN_EXTENSION_FUNCTION( clCreateFromD3D10BufferKHR ); + CHECK_RETURN_EXTENSION_FUNCTION( clCreateFromD3D10Texture2DKHR ); + CHECK_RETURN_EXTENSION_FUNCTION( clCreateFromD3D10Texture3DKHR ); + CHECK_RETURN_EXTENSION_FUNCTION( clEnqueueAcquireD3D10ObjectsKHR ); + CHECK_RETURN_EXTENSION_FUNCTION( clEnqueueReleaseD3D10ObjectsKHR ); + // cl_khr_d3d11_sharing + CHECK_RETURN_EXTENSION_FUNCTION( clGetDeviceIDsFromD3D11KHR ); + CHECK_RETURN_EXTENSION_FUNCTION( clCreateFromD3D11BufferKHR ); + CHECK_RETURN_EXTENSION_FUNCTION( clCreateFromD3D11Texture2DKHR ); + CHECK_RETURN_EXTENSION_FUNCTION( clCreateFromD3D11Texture3DKHR ); + CHECK_RETURN_EXTENSION_FUNCTION( clEnqueueAcquireD3D11ObjectsKHR ); + CHECK_RETURN_EXTENSION_FUNCTION( clEnqueueReleaseD3D11ObjectsKHR ); + // cl_khr_dx9_media_sharing + CHECK_RETURN_EXTENSION_FUNCTION( clGetDeviceIDsFromDX9MediaAdapterKHR ); + CHECK_RETURN_EXTENSION_FUNCTION( clCreateFromDX9MediaSurfaceKHR ); + CHECK_RETURN_EXTENSION_FUNCTION( clEnqueueAcquireDX9MediaSurfacesKHR ); + CHECK_RETURN_EXTENSION_FUNCTION( clEnqueueReleaseDX9MediaSurfacesKHR ); +#endif + // cl_khr_il_program + CHECK_RETURN_EXTENSION_FUNCTION( clCreateProgramWithILKHR ); + // cl_khr_subgroups + CHECK_RETURN_EXTENSION_FUNCTION( clGetKernelSubGroupInfoKHR ); + // cl_khr_create_command_queue + CHECK_RETURN_EXTENSION_FUNCTION( clCreateCommandQueueWithPropertiesKHR ); + + // Intel Extensions + +#if defined(_WIN32) + // cl_intel_dx9_media_sharing + CHECK_RETURN_EXTENSION_FUNCTION( clGetDeviceIDsFromDX9INTEL ); + CHECK_RETURN_EXTENSION_FUNCTION( clCreateFromDX9MediaSurfaceINTEL ); + CHECK_RETURN_EXTENSION_FUNCTION( clEnqueueAcquireDX9ObjectsINTEL ); + CHECK_RETURN_EXTENSION_FUNCTION( clEnqueueReleaseDX9ObjectsINTEL ); +#endif + + // Unofficial MDAPI extension: + CHECK_RETURN_EXTENSION_FUNCTION( clCreatePerfCountersCommandQueueINTEL ); + CHECK_RETURN_EXTENSION_FUNCTION( clSetPerformanceConfigurationINTEL ); + + // cl_intel_accelerator + CHECK_RETURN_EXTENSION_FUNCTION( clCreateAcceleratorINTEL ); + CHECK_RETURN_EXTENSION_FUNCTION( clGetAcceleratorInfoINTEL ); + CHECK_RETURN_EXTENSION_FUNCTION( clRetainAcceleratorINTEL ); + CHECK_RETURN_EXTENSION_FUNCTION( clReleaseAcceleratorINTEL ); + + // cl_intel_va_api_media_sharing + CHECK_RETURN_EXTENSION_FUNCTION( clGetDeviceIDsFromVA_APIMediaAdapterINTEL ); + CHECK_RETURN_EXTENSION_FUNCTION( clCreateFromVA_APIMediaSurfaceINTEL ); + CHECK_RETURN_EXTENSION_FUNCTION( clEnqueueAcquireVA_APIMediaSurfacesINTEL ); + CHECK_RETURN_EXTENSION_FUNCTION( clEnqueueReleaseVA_APIMediaSurfacesINTEL ); + + return NULL; +} + +/////////////////////////////////////////////////////////////////////////////// +// +// This function assumes that CLIntercept already has entered its +// critical section. If it hasn't, bad things could happen. +void CLIntercept::log( const std::string& s ) +{ + std::string logString( m_Config.LogIndent, ' ' ); + logString += s; + if( m_Config.LogToFile ) + { + m_InterceptLog << logString; + m_InterceptLog.flush(); + } + if( m_Config.LogToDebugger ) + { + OS().OutputDebugString( logString ); + } + + if( ( m_Config.LogToFile == false ) && + ( m_Config.LogToDebugger == false ) ) + { + std::cerr << logString; + } +} +void CLIntercept::logf( const char* formatStr, ... ) +{ + va_list args; + va_start( args, formatStr ); + + char temp[ CLI_MAX_STRING_SIZE ] = ""; + int size = CLI_VSPRINTF( temp, CLI_MAX_STRING_SIZE, formatStr, args ); + if( size >= 0 && size < CLI_MAX_STRING_SIZE ) + { + log( std::string( temp ) ); + } + else + { + log( std::string( "too long" ) ); + } + + va_end( args ); +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::logPlatformInfo( cl_platform_id platform ) +{ + cl_int errorCode = CL_SUCCESS; + + char* platformName = NULL; + char* platformVendor = NULL; + char* platformVersion = NULL; + char* platformProfile = NULL; + char* platformExtensions = NULL; + + errorCode |= allocateAndGetPlatformInfoString( + platform, + CL_PLATFORM_NAME, + platformName ); + errorCode |= allocateAndGetPlatformInfoString( + platform, + CL_PLATFORM_VENDOR, + platformVendor ); + errorCode |= allocateAndGetPlatformInfoString( + platform, + CL_PLATFORM_VERSION, + platformVersion ); + errorCode |= allocateAndGetPlatformInfoString( + platform, + CL_PLATFORM_PROFILE, + platformProfile ); + errorCode |= allocateAndGetPlatformInfoString( + platform, + CL_PLATFORM_EXTENSIONS, + platformExtensions ); + + if( errorCode == CL_SUCCESS ) + { + logf( "\tName: %s\n", platformName ); + logf( "\tVendor: %s\n", platformVendor ); + logf( "\tDriver Version: %s\n", platformVersion ); + logf( "\tProfile: %s\n", platformProfile ); + + int numberOfExtensions = 0; + logf( "\tExtensions:\n" ); + if( platformExtensions ) + { + char* extension = NULL; + char* nextExtension = NULL; + extension = CLI_STRTOK( platformExtensions, " ", &nextExtension ); + while( extension != NULL ) + { + numberOfExtensions++; + logf( "\t\t%s\n", extension ); + extension = CLI_STRTOK( NULL, " ", &nextExtension ); + } + } + logf( "\t\t%d Platform Extensions Found\n", numberOfExtensions ); + } + else + { + log( "\tError getting platform info!\n" ); + } + + delete [] platformName; + delete [] platformVendor; + delete [] platformVersion; + delete [] platformProfile; + delete [] platformExtensions; +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::logDeviceInfo( cl_device_id device ) +{ + cl_int errorCode = CL_SUCCESS; + + cl_device_type deviceType; + char* deviceName = NULL; + char* deviceVendor = NULL; + char* deviceVersion = NULL; + char* driverVersion = NULL; + char* deviceExtensions = NULL; + + errorCode |= dispatch().clGetDeviceInfo( + device, + CL_DEVICE_TYPE, + sizeof( deviceType ), + &deviceType, + NULL ); + errorCode |= allocateAndGetDeviceInfoString( + device, + CL_DEVICE_NAME, + deviceName ); + errorCode |= allocateAndGetDeviceInfoString( + device, + CL_DEVICE_VENDOR, + deviceVendor ); + errorCode |= allocateAndGetDeviceInfoString( + device, + CL_DEVICE_VERSION, + deviceVersion ); + errorCode |= allocateAndGetDeviceInfoString( + device, + CL_DRIVER_VERSION, + driverVersion ); + errorCode |= allocateAndGetDeviceInfoString( + device, + CL_DEVICE_EXTENSIONS, + deviceExtensions ); + + if( errorCode == CL_SUCCESS ) + { + logf( "\tName: %s\n", deviceName ); + logf( "\tVendor: %s\n", deviceVendor ); + logf( "\tVersion: %s\n", deviceVersion ); + logf( "\tDriver Version: %s\n", driverVersion ); + logf( "\tType: %s\n", enumName().name_device_type( deviceType ).c_str() ); + + int numberOfExtensions = 0; + logf( "\tExtensions:\n" ); + if( deviceExtensions ) + { + char* extension = NULL; + char* nextExtension = NULL; + extension = CLI_STRTOK( deviceExtensions, " ", &nextExtension ); + while( extension != NULL ) + { + numberOfExtensions++; + logf( "\t\t%s\n", extension ); + extension = CLI_STRTOK( NULL, " ", &nextExtension ); + } + } + logf( "\t\t%d Device Extensions Found\n", numberOfExtensions ); + } + else + { + log( "Error getting device info!\n" ); + } + + delete [] deviceName; + delete [] deviceVendor; + delete [] deviceVersion; + delete [] driverVersion; + delete [] deviceExtensions; +} + +/////////////////////////////////////////////////////////////////////////////// +// +#if defined(_WIN32) || defined(__linux__) +#define INIT_EXPORTED_FUNC(funcname) \ +{ \ + void* func = OS().GetFunctionPointer(m_OpenCLLibraryHandle, #funcname); \ + if (func == NULL) \ + { \ + log( std::string("Couldn't get exported function pointer to: ") + #funcname + "\n" );\ + success = false; \ + } \ + else \ + { \ + void** pfunc = (void**)( &m_Dispatch . funcname ); \ + *pfunc = func; \ + } \ +} +bool CLIntercept::initDispatch( const std::string& dllName ) +{ + bool success = true; + + if( success ) + { + m_OpenCLLibraryHandle = OS().LoadLibrary( dllName.c_str() ); + if( m_OpenCLLibraryHandle == NULL ) + { + log( std::string("Couldn't load library from: ") + dllName + "\n"); + success = false; + } + } + + if( success ) + { + INIT_EXPORTED_FUNC(clGetPlatformIDs); + INIT_EXPORTED_FUNC(clGetPlatformInfo); + INIT_EXPORTED_FUNC(clGetDeviceIDs); + INIT_EXPORTED_FUNC(clGetDeviceInfo); + INIT_EXPORTED_FUNC(clCreateContext); + INIT_EXPORTED_FUNC(clCreateContextFromType); + INIT_EXPORTED_FUNC(clRetainContext); + INIT_EXPORTED_FUNC(clReleaseContext); + INIT_EXPORTED_FUNC(clGetContextInfo); + INIT_EXPORTED_FUNC(clCreateCommandQueue); + INIT_EXPORTED_FUNC(clRetainCommandQueue); + INIT_EXPORTED_FUNC(clReleaseCommandQueue); + INIT_EXPORTED_FUNC(clGetCommandQueueInfo); + INIT_EXPORTED_FUNC(clSetCommandQueueProperty); + INIT_EXPORTED_FUNC(clCreateBuffer); + INIT_EXPORTED_FUNC(clCreateImage2D); + INIT_EXPORTED_FUNC(clCreateImage3D); + INIT_EXPORTED_FUNC(clRetainMemObject); + INIT_EXPORTED_FUNC(clReleaseMemObject); + INIT_EXPORTED_FUNC(clGetSupportedImageFormats); + INIT_EXPORTED_FUNC(clGetMemObjectInfo); + INIT_EXPORTED_FUNC(clGetImageInfo); + INIT_EXPORTED_FUNC(clCreateSampler); + INIT_EXPORTED_FUNC(clRetainSampler); + INIT_EXPORTED_FUNC(clReleaseSampler); + INIT_EXPORTED_FUNC(clGetSamplerInfo); + INIT_EXPORTED_FUNC(clCreateProgramWithSource); + INIT_EXPORTED_FUNC(clCreateProgramWithBinary); + INIT_EXPORTED_FUNC(clRetainProgram); + INIT_EXPORTED_FUNC(clReleaseProgram); + INIT_EXPORTED_FUNC(clBuildProgram); + INIT_EXPORTED_FUNC(clUnloadCompiler); + INIT_EXPORTED_FUNC(clGetProgramInfo); + INIT_EXPORTED_FUNC(clGetProgramBuildInfo); + INIT_EXPORTED_FUNC(clCreateKernel); + INIT_EXPORTED_FUNC(clCreateKernelsInProgram); + INIT_EXPORTED_FUNC(clRetainKernel); + INIT_EXPORTED_FUNC(clReleaseKernel); + INIT_EXPORTED_FUNC(clSetKernelArg); + INIT_EXPORTED_FUNC(clGetKernelInfo); + INIT_EXPORTED_FUNC(clGetKernelWorkGroupInfo); + INIT_EXPORTED_FUNC(clWaitForEvents); + INIT_EXPORTED_FUNC(clGetEventInfo); + INIT_EXPORTED_FUNC(clRetainEvent); + INIT_EXPORTED_FUNC(clReleaseEvent); + INIT_EXPORTED_FUNC(clGetEventProfilingInfo); + INIT_EXPORTED_FUNC(clFlush); + INIT_EXPORTED_FUNC(clFinish); + INIT_EXPORTED_FUNC(clEnqueueReadBuffer); + INIT_EXPORTED_FUNC(clEnqueueWriteBuffer); + INIT_EXPORTED_FUNC(clEnqueueCopyBuffer); + INIT_EXPORTED_FUNC(clEnqueueReadImage); + INIT_EXPORTED_FUNC(clEnqueueWriteImage); + INIT_EXPORTED_FUNC(clEnqueueCopyImage); + INIT_EXPORTED_FUNC(clEnqueueCopyImageToBuffer); + INIT_EXPORTED_FUNC(clEnqueueCopyBufferToImage); + INIT_EXPORTED_FUNC(clEnqueueMapBuffer); + INIT_EXPORTED_FUNC(clEnqueueMapImage); + INIT_EXPORTED_FUNC(clEnqueueUnmapMemObject); + INIT_EXPORTED_FUNC(clEnqueueNDRangeKernel); + INIT_EXPORTED_FUNC(clEnqueueTask); + INIT_EXPORTED_FUNC(clEnqueueNativeKernel); + INIT_EXPORTED_FUNC(clEnqueueMarker); + INIT_EXPORTED_FUNC(clEnqueueWaitForEvents); + INIT_EXPORTED_FUNC(clEnqueueBarrier); + + bool savedSuccess = success; + + // Optional features? + INIT_EXPORTED_FUNC(clGetExtensionFunctionAddress); + INIT_EXPORTED_FUNC(clGetExtensionFunctionAddressForPlatform); + + // OpenCL 1.1 Entry Points (optional) + INIT_EXPORTED_FUNC(clCreateSubBuffer); + INIT_EXPORTED_FUNC(clSetMemObjectDestructorCallback); + INIT_EXPORTED_FUNC(clCreateUserEvent); + INIT_EXPORTED_FUNC(clSetUserEventStatus); + INIT_EXPORTED_FUNC(clSetEventCallback); + INIT_EXPORTED_FUNC(clEnqueueReadBufferRect); + INIT_EXPORTED_FUNC(clEnqueueWriteBufferRect); + INIT_EXPORTED_FUNC(clEnqueueCopyBufferRect); + + // OpenCL 1.2 Entry Points (optional) + INIT_EXPORTED_FUNC(clCompileProgram); + INIT_EXPORTED_FUNC(clCreateFromGLTexture); + INIT_EXPORTED_FUNC(clCreateImage); + INIT_EXPORTED_FUNC(clCreateProgramWithBuiltInKernels); + INIT_EXPORTED_FUNC(clCreateSubDevices); + INIT_EXPORTED_FUNC(clEnqueueBarrierWithWaitList); + INIT_EXPORTED_FUNC(clEnqueueFillBuffer); + INIT_EXPORTED_FUNC(clEnqueueFillImage); + INIT_EXPORTED_FUNC(clEnqueueMarkerWithWaitList); + INIT_EXPORTED_FUNC(clEnqueueMigrateMemObjects); + INIT_EXPORTED_FUNC(clGetKernelArgInfo); + INIT_EXPORTED_FUNC(clLinkProgram); + INIT_EXPORTED_FUNC(clReleaseDevice); + INIT_EXPORTED_FUNC(clRetainDevice); + INIT_EXPORTED_FUNC(clUnloadPlatformCompiler); + + // OpenCL 2.0 Entry Points (optional) + INIT_EXPORTED_FUNC(clSVMAlloc); + INIT_EXPORTED_FUNC(clSVMFree); + INIT_EXPORTED_FUNC(clEnqueueSVMFree); + INIT_EXPORTED_FUNC(clEnqueueSVMMemcpy); + INIT_EXPORTED_FUNC(clEnqueueSVMMemFill); + INIT_EXPORTED_FUNC(clEnqueueSVMMap); + INIT_EXPORTED_FUNC(clEnqueueSVMUnmap); + INIT_EXPORTED_FUNC(clSetKernelArgSVMPointer); + INIT_EXPORTED_FUNC(clSetKernelExecInfo); + INIT_EXPORTED_FUNC(clCreatePipe); + INIT_EXPORTED_FUNC(clGetPipeInfo); + INIT_EXPORTED_FUNC(clCreateCommandQueueWithProperties); + INIT_EXPORTED_FUNC(clCreateSamplerWithProperties); + + // OpenCL 2.1 Entry Points (optional) + INIT_EXPORTED_FUNC(clSetDefaultDeviceCommandQueue); + INIT_EXPORTED_FUNC(clGetDeviceAndHostTimer); + INIT_EXPORTED_FUNC(clGetHostTimer); + INIT_EXPORTED_FUNC(clCreateProgramWithIL); + INIT_EXPORTED_FUNC(clCloneKernel); + INIT_EXPORTED_FUNC(clGetKernelSubGroupInfo); + INIT_EXPORTED_FUNC(clEnqueueSVMMigrateMem); + + // OpenCL 2.2 Entry Points (optional) + INIT_EXPORTED_FUNC(clSetProgramReleaseCallback); + INIT_EXPORTED_FUNC(clSetProgramSpecializationConstant); + + // CL-GL Entry Points (optional) + INIT_EXPORTED_FUNC(clCreateFromGLBuffer); + INIT_EXPORTED_FUNC(clCreateFromGLTexture); + INIT_EXPORTED_FUNC(clCreateFromGLTexture2D); + INIT_EXPORTED_FUNC(clCreateFromGLTexture3D); + INIT_EXPORTED_FUNC(clCreateFromGLRenderbuffer); + INIT_EXPORTED_FUNC(clGetGLObjectInfo); + INIT_EXPORTED_FUNC(clGetGLTextureInfo ); + INIT_EXPORTED_FUNC(clEnqueueAcquireGLObjects); + INIT_EXPORTED_FUNC(clEnqueueReleaseGLObjects); + + // Extensions (optional) + // Extensions get loaded into the dispatch table on the fly. + + success = savedSuccess; + } + + if( !success ) + { + if( m_OpenCLLibraryHandle != NULL ) + { + OS().UnloadLibrary( m_OpenCLLibraryHandle ); + } + } + + return success; +} +/////////////////////////////////////////////////////////////////////////////// +// +#elif defined(__APPLE__) +#define INIT_CL_FUNC(funcname) \ +{ \ + m_Dispatch . funcname = funcname; \ +} +bool CLIntercept::initDispatch( void ) +{ + INIT_CL_FUNC(clGetPlatformIDs); + INIT_CL_FUNC(clGetPlatformInfo); + INIT_CL_FUNC(clGetDeviceIDs); + INIT_CL_FUNC(clGetDeviceInfo); + INIT_CL_FUNC(clCreateContext); + INIT_CL_FUNC(clCreateContextFromType); + INIT_CL_FUNC(clRetainContext); + INIT_CL_FUNC(clReleaseContext); + INIT_CL_FUNC(clGetContextInfo); + INIT_CL_FUNC(clCreateCommandQueue); + INIT_CL_FUNC(clRetainCommandQueue); + INIT_CL_FUNC(clReleaseCommandQueue); + INIT_CL_FUNC(clGetCommandQueueInfo); + INIT_CL_FUNC(clSetCommandQueueProperty); + INIT_CL_FUNC(clCreateBuffer); + INIT_CL_FUNC(clCreateImage2D); + INIT_CL_FUNC(clCreateImage3D); + INIT_CL_FUNC(clRetainMemObject); + INIT_CL_FUNC(clReleaseMemObject); + INIT_CL_FUNC(clGetSupportedImageFormats); + INIT_CL_FUNC(clGetMemObjectInfo); + INIT_CL_FUNC(clGetImageInfo); + INIT_CL_FUNC(clCreateSampler); + INIT_CL_FUNC(clRetainSampler); + INIT_CL_FUNC(clReleaseSampler); + INIT_CL_FUNC(clGetSamplerInfo); + INIT_CL_FUNC(clCreateProgramWithSource); + INIT_CL_FUNC(clCreateProgramWithBinary); + INIT_CL_FUNC(clRetainProgram); + INIT_CL_FUNC(clReleaseProgram); + INIT_CL_FUNC(clBuildProgram); + INIT_CL_FUNC(clUnloadCompiler); + INIT_CL_FUNC(clGetProgramInfo); + INIT_CL_FUNC(clGetProgramBuildInfo); + INIT_CL_FUNC(clCreateKernel); + INIT_CL_FUNC(clCreateKernelsInProgram); + INIT_CL_FUNC(clRetainKernel); + INIT_CL_FUNC(clReleaseKernel); + INIT_CL_FUNC(clSetKernelArg); + INIT_CL_FUNC(clGetKernelInfo); + INIT_CL_FUNC(clGetKernelWorkGroupInfo); + INIT_CL_FUNC(clWaitForEvents); + INIT_CL_FUNC(clGetEventInfo); + INIT_CL_FUNC(clRetainEvent); + INIT_CL_FUNC(clReleaseEvent); + INIT_CL_FUNC(clGetEventProfilingInfo); + INIT_CL_FUNC(clFlush); + INIT_CL_FUNC(clFinish); + INIT_CL_FUNC(clEnqueueReadBuffer); + INIT_CL_FUNC(clEnqueueWriteBuffer); + INIT_CL_FUNC(clEnqueueCopyBuffer); + INIT_CL_FUNC(clEnqueueReadImage); + INIT_CL_FUNC(clEnqueueWriteImage); + INIT_CL_FUNC(clEnqueueCopyImage); + INIT_CL_FUNC(clEnqueueCopyImageToBuffer); + INIT_CL_FUNC(clEnqueueCopyBufferToImage); + INIT_CL_FUNC(clEnqueueMapBuffer); + INIT_CL_FUNC(clEnqueueMapImage); + INIT_CL_FUNC(clEnqueueUnmapMemObject); + INIT_CL_FUNC(clEnqueueNDRangeKernel); + INIT_CL_FUNC(clEnqueueTask); + INIT_CL_FUNC(clEnqueueNativeKernel); + INIT_CL_FUNC(clEnqueueMarker); + INIT_CL_FUNC(clEnqueueWaitForEvents); + INIT_CL_FUNC(clEnqueueBarrier); + + // Optional features? + INIT_CL_FUNC(clGetExtensionFunctionAddress); + INIT_CL_FUNC(clGetExtensionFunctionAddressForPlatform); + + // OpenCL 1.1 Entry Points (optional) + INIT_CL_FUNC(clCreateSubBuffer); + INIT_CL_FUNC(clSetMemObjectDestructorCallback); + INIT_CL_FUNC(clCreateUserEvent); + INIT_CL_FUNC(clSetUserEventStatus); + INIT_CL_FUNC(clSetEventCallback); + INIT_CL_FUNC(clEnqueueReadBufferRect); + INIT_CL_FUNC(clEnqueueWriteBufferRect); + INIT_CL_FUNC(clEnqueueCopyBufferRect); + + // OpenCL 1.2 Entry Points (optional) + INIT_CL_FUNC(clCompileProgram); + INIT_CL_FUNC(clCreateFromGLTexture); + INIT_CL_FUNC(clCreateImage); + INIT_CL_FUNC(clCreateProgramWithBuiltInKernels); + INIT_CL_FUNC(clCreateSubDevices); + INIT_CL_FUNC(clEnqueueBarrierWithWaitList); + INIT_CL_FUNC(clEnqueueFillBuffer); + INIT_CL_FUNC(clEnqueueFillImage); + INIT_CL_FUNC(clEnqueueMarkerWithWaitList); + INIT_CL_FUNC(clEnqueueMigrateMemObjects); + INIT_CL_FUNC(clGetKernelArgInfo); + INIT_CL_FUNC(clLinkProgram); + INIT_CL_FUNC(clReleaseDevice); + INIT_CL_FUNC(clRetainDevice); + INIT_CL_FUNC(clUnloadPlatformCompiler); + + // CL-GL Entry Points (optional) + INIT_CL_FUNC(clCreateFromGLBuffer); + INIT_CL_FUNC(clCreateFromGLTexture); // OpenCL 1.2 + INIT_CL_FUNC(clCreateFromGLTexture2D); + INIT_CL_FUNC(clCreateFromGLTexture3D); + INIT_CL_FUNC(clCreateFromGLRenderbuffer); + INIT_CL_FUNC(clGetGLObjectInfo); + INIT_CL_FUNC(clGetGLTextureInfo); + INIT_CL_FUNC(clEnqueueAcquireGLObjects); + INIT_CL_FUNC(clEnqueueReleaseGLObjects); + + // Extensions (optional) + // Extensions get loaded into the dispatch table on the fly. + + return true; +} +#else +#error Unknown OS! +#endif + +/////////////////////////////////////////////////////////////////////////////// +// +#if defined(USE_ITT) +void CLIntercept::ittInit() +{ + if( m_ITTInitialized == false ) + { + m_OS.EnterCriticalSection(); + + if( m_ITTInitialized == false ) + { + log( "Initializing ITT...\n" ); + + m_ITTInitialized = true; + + m_ITTDomain = __itt_domain_create( "com.intel.clintercept" ); + if( m_ITTDomain == NULL ) + { + log( "__itt_domain_create() returned NULL!\n" ); + } + + //m_ITTQueuedState = __ittx_task_state_create( m_ITTDomain, "QUEUED" ); + //m_ITTSubmittedState = __ittx_task_state_create( m_ITTDomain, "SUBMITTED" ); + //m_ITTExecutingState = __ittx_task_state_create( m_ITTDomain, "EXECUTING" ); + + //m_ITTQueueTrackGroup = __itt_track_group_create( + // __itt_string_handle_create("Queue tracks"), + // __itt_track_group_type_normal ); + //if( m_ITTQueueTrackGroup == NULL ) + //{ + // log( "__itt_track_group_create() returned NULL!\n" ); + //} + + log( "... done!\n" ); + } + + m_OS.LeaveCriticalSection(); + } +} + +void CLIntercept::ittCallLoggingEnter( + const std::string& functionName, + const cl_kernel kernel ) +{ + std::string str( functionName ); + if( kernel ) + { + m_OS.EnterCriticalSection(); + + const std::string& kernelName = m_KernelNameMap[ kernel ]; + str += "( "; + str += kernelName; + str += " )"; + + m_OS.LeaveCriticalSection(); + } + + __itt_string_handle* itt_string_handle = __itt_string_handle_create( str.c_str() ); + __itt_task_begin(m_ITTDomain, __itt_null, __itt_null, itt_string_handle); +} + +void CLIntercept::ittCallLoggingExit() +{ + __itt_task_end(m_ITTDomain); +} + +void CLIntercept::ittRegisterCommandQueue( + cl_command_queue queue, + bool supportsPerfCounters ) +{ + m_OS.EnterCriticalSection(); + + cl_int errorCode = CL_SUCCESS; + + cl_device_id device = NULL; + cl_device_type deviceType = 0; + cl_command_queue_properties properties = 0; + + if( errorCode == CL_SUCCESS ) + { + errorCode = dispatch().clGetCommandQueueInfo( + queue, + CL_QUEUE_DEVICE, + sizeof(device), + &device, + NULL); + } + if( errorCode == CL_SUCCESS ) + { + errorCode = dispatch().clGetDeviceInfo( + device, + CL_DEVICE_TYPE, + sizeof(deviceType), + &deviceType, + NULL ); + } + if( errorCode == CL_SUCCESS ) + { + errorCode = dispatch().clGetCommandQueueInfo( + queue, + CL_QUEUE_PROPERTIES, + sizeof(properties), + &properties, + NULL ); + } + + SITTQueueInfo* pITTQueueInfo = NULL; + if( errorCode == CL_SUCCESS ) + { + pITTQueueInfo = new SITTQueueInfo; + if( pITTQueueInfo == NULL ) + { + errorCode = CL_OUT_OF_HOST_MEMORY; + } + else + { + pITTQueueInfo->pIntercept = this; + pITTQueueInfo->SupportsPerfCounters = supportsPerfCounters; + + pITTQueueInfo->itt_track = NULL; + pITTQueueInfo->itt_clock_domain = NULL; + pITTQueueInfo->CPUReferenceTime = 0; + pITTQueueInfo->CLReferenceTime = 0; + } + } + + if( errorCode == CL_SUCCESS ) + { + std::string trackName = "OpenCL"; + + if( properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE ) + { + trackName += " Out-Of-Order"; + } + else + { + trackName += " In-Order"; + } + + if( deviceType & CL_DEVICE_TYPE_CPU ) + { + trackName += " CPU"; + } + if( deviceType & CL_DEVICE_TYPE_GPU ) + { + trackName += " GPU"; + } + if( deviceType & CL_DEVICE_TYPE_ACCELERATOR ) + { + trackName += " ACCELERATOR"; + } + if( deviceType & CL_DEVICE_TYPE_CUSTOM ) + { + trackName += " CUSTOM"; + } + + trackName += " Queue, "; + + { + char str[CLI_MAX_STRING_SIZE] = ""; + CLI_SPRINTF( str, CLI_MAX_STRING_SIZE, "Handle = %p", queue ); + trackName = trackName + str; + } + + // Don't fail if the track cannot be created, it just means we + // won't be as detailed in our tracking. + //__itt_track* track = __itt_track_create( + // m_ITTQueueTrackGroup, + // __itt_string_handle_create(trackName.c_str()), + // __itt_track_type_queue ); + //if( track != NULL ) + //{ + // pITTQueueInfo->itt_track = track; + // + // __itt_set_track(track); + // + // __ittx_set_default_state( + // m_ITTDomain, + // m_ITTQueuedState ); + // + // __itt_set_track(NULL); + //} + + dispatch().clRetainCommandQueue( queue ); + + m_ITTQueueInfoMap[ queue ] = pITTQueueInfo; + } + + if( errorCode != CL_SUCCESS ) + { + delete pITTQueueInfo; + pITTQueueInfo = NULL; + } + + m_OS.LeaveCriticalSection(); +} + +void CLIntercept::ittReleaseCommandQueue( + cl_command_queue queue ) +{ + m_OS.EnterCriticalSection(); + + cl_int errorCode = CL_SUCCESS; + cl_uint refCount = 0; + + SITTQueueInfo* pITTQueueInfo = m_ITTQueueInfoMap[ queue ]; + if( pITTQueueInfo ) + { + if( errorCode == CL_SUCCESS ) + { + errorCode = dispatch().clGetCommandQueueInfo( + queue, + CL_QUEUE_REFERENCE_COUNT, + sizeof( refCount ), + &refCount, + NULL ); + } + + if( ( errorCode == CL_SUCCESS ) && + ( refCount == 1 ) ) + { + dispatch().clReleaseCommandQueue( queue ); + + // I guess we don't delete a track after we've created it? + // Or a clock domain? + + delete pITTQueueInfo; + pITTQueueInfo = NULL; + + m_ITTQueueInfoMap.erase( queue ); + } + } + + m_OS.LeaveCriticalSection(); +} + +void ITTAPI CLIntercept::ittClockInfoCallback( + __itt_clock_info* pClockInfo, + void* pData ) +{ + const SITTQueueInfo* pQueueInfo = (const SITTQueueInfo*)pData; + + uint64_t cpuTickDelta = + pQueueInfo->pIntercept->OS().GetTimer() - + pQueueInfo->CPUReferenceTime; + + uint64_t cpuDeltaNS = pQueueInfo->pIntercept->OS().TickToNS( cpuTickDelta ); + + pClockInfo->clock_base = pQueueInfo->CLReferenceTime + cpuDeltaNS; + pClockInfo->clock_freq = 1000000000; // NS +} + +void CLIntercept::ittTraceEvent( + const std::string& name, + cl_event event, + uint64_t queuedTime ) +{ + cl_int errorCode = CL_SUCCESS; + + cl_command_queue queue = NULL; + cl_command_type type = 0; + + cl_ulong commandQueued = 0; + cl_ulong commandSubmit = 0; + cl_ulong commandStart = 0; + cl_ulong commandEnd = 0; + + errorCode |= dispatch().clGetEventInfo( + event, + CL_EVENT_COMMAND_QUEUE, + sizeof( queue ), + &queue, + NULL ); + + errorCode |= dispatch().clGetEventInfo( + event, + CL_EVENT_COMMAND_TYPE, + sizeof(type), + &type, + NULL ); + + errorCode |= dispatch().clGetEventProfilingInfo( + event, + CL_PROFILING_COMMAND_QUEUED, + sizeof( commandQueued ), + &commandQueued, + NULL ); + errorCode |= dispatch().clGetEventProfilingInfo( + event, + CL_PROFILING_COMMAND_SUBMIT, + sizeof( commandSubmit ), + &commandSubmit, + NULL ); + errorCode |= dispatch().clGetEventProfilingInfo( + event, + CL_PROFILING_COMMAND_START, + sizeof( commandStart ), + &commandStart, + NULL ); + errorCode |= dispatch().clGetEventProfilingInfo( + event, + CL_PROFILING_COMMAND_END, + sizeof( commandEnd ), + &commandEnd, + NULL ); + + if( errorCode == CL_SUCCESS ) + { + // It's possible we don't have any ITT info for this queue. + SITTQueueInfo* pITTQueueInfo = m_ITTQueueInfoMap[ queue ]; + if( pITTQueueInfo != NULL ) + { + __itt_clock_domain* clockDomain = pITTQueueInfo->itt_clock_domain; + if( clockDomain == NULL ) + { + pITTQueueInfo->CPUReferenceTime = queuedTime; + pITTQueueInfo->CLReferenceTime = commandQueued; + + clockDomain = __itt_clock_domain_create( + ittClockInfoCallback, + pITTQueueInfo ); + if( clockDomain == NULL ) + { + log( "__itt_clock_domain_create() returned NULL!\n"); + } + + pITTQueueInfo->itt_clock_domain = clockDomain; + } + + __itt_track* track = pITTQueueInfo->itt_track; + uint64_t clockOffset = 0; + + if( commandQueued == 0 ) + { + clockOffset = queuedTime; + clockOffset -= pITTQueueInfo->CPUReferenceTime; + clockOffset = OS().TickToNS( clockOffset ); + } + + commandQueued += clockOffset; + commandSubmit += clockOffset; + commandStart += clockOffset; + commandEnd += clockOffset; + + __itt_set_track( track ); + + __itt_string_handle* nameHandle = __itt_string_handle_create( name.c_str() ); + __itt_id eventId = __itt_id_make( NULL, (uint64_t)event ); + + __itt_id_create_ex( m_ITTDomain, clockDomain, commandQueued, eventId ); + + if( config().ITTShowOnlyExecutingEvents ) + { + __itt_task_begin_overlapped_ex( m_ITTDomain, clockDomain, commandStart, eventId, __itt_null, nameHandle ); + //__ittx_task_set_state( m_ITTDomain, clockDomain, commandStart, eventId, m_ITTExecutingState ); + __itt_task_end_overlapped_ex( m_ITTDomain, clockDomain, commandEnd, eventId ); + } + else + { + __itt_task_begin_overlapped_ex( m_ITTDomain, clockDomain, commandQueued, eventId, __itt_null, nameHandle ); + //__ittx_task_set_state( m_ITTDomain, clockDomain, commandSubmit, eventId, m_ITTSubmittedState); + //__ittx_task_set_state( m_ITTDomain, clockDomain, commandStart, eventId, m_ITTExecutingState ); + __itt_task_end_overlapped_ex( m_ITTDomain, clockDomain, commandEnd, eventId ); + } + + if( pITTQueueInfo->SupportsPerfCounters ) + { + // TODO: This needs to be updated to use MDAPI. + CLI_ASSERT( 0 ); + } + + __itt_id_destroy_ex( m_ITTDomain, clockDomain, commandEnd, eventId ); + + __itt_set_track(NULL); + } + else + { + log( "ittTraceEvent(): no queue info\n" ); + } + } + else + { + log( "ittTraceEvent(): OpenCL error\n" ); + } +} + +#endif + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::chromeCallLoggingExit( + const std::string& functionName, + const cl_kernel kernel, + uint64_t tickStart, + uint64_t tickEnd ) +{ + std::string str; + str += functionName; + + if( kernel ) + { + const std::string& kernelName = m_KernelNameMap[ kernel ]; + str += "( "; + str += kernelName; + str += " )"; + } + + uint64_t processId = + OS().GetProcessID(); + uint64_t threadId = + OS().GetThreadID(); + + uint64_t usStart = + OS().TickToNS( tickStart - m_StartTime ) / 1000; + uint64_t usDelta = + OS().TickToNS( tickEnd - tickStart ) / 1000; + + m_InterceptTrace + << "{\"ph\":\"X\", \"pid\":" << processId + << ", \"tid\":" << threadId + << ", \"name\":\"" << str + << "\", \"ts\":" << usStart + << ", \"dur\":" << usDelta + << "},\n"; +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::chromeRegisterCommandQueue( + cl_command_queue queue ) +{ + m_OS.EnterCriticalSection(); + + cl_int errorCode = CL_SUCCESS; + + cl_device_id device = NULL; + cl_device_type deviceType = 0; + cl_command_queue_properties properties = 0; + + if( errorCode == CL_SUCCESS ) + { + errorCode = dispatch().clGetCommandQueueInfo( + queue, + CL_QUEUE_DEVICE, + sizeof(device), + &device, + NULL); + } + if( errorCode == CL_SUCCESS ) + { + errorCode = dispatch().clGetDeviceInfo( + device, + CL_DEVICE_TYPE, + sizeof(deviceType), + &deviceType, + NULL ); + } + if( errorCode == CL_SUCCESS ) + { + errorCode = dispatch().clGetCommandQueueInfo( + queue, + CL_QUEUE_PROPERTIES, + sizeof(properties), + &properties, + NULL ); + } + + if( errorCode == CL_SUCCESS ) + { + std::string trackName = "OpenCL"; + + if( properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE ) + { + trackName += " Out-Of-Order"; + } + else + { + trackName += " In-Order"; + } + + if( deviceType & CL_DEVICE_TYPE_CPU ) + { + trackName += " CPU"; + } + if( deviceType & CL_DEVICE_TYPE_GPU ) + { + trackName += " GPU"; + } + if( deviceType & CL_DEVICE_TYPE_ACCELERATOR ) + { + trackName += " ACCELERATOR"; + } + if( deviceType & CL_DEVICE_TYPE_CUSTOM ) + { + trackName += " CUSTOM"; + } + + trackName += " Queue"; + + //{ + // char str[CLI_MAX_STRING_SIZE] = ""; + // CLI_SPRINTF( str, CLI_MAX_STRING_SIZE, ", Handle = %p", queue ); + // trackName = trackName + str; + //} + + uint64_t processId = OS().GetProcessID(); + m_InterceptTrace + << "{\"ph\":\"M\", \"name\":\"thread_name\", \"pid\":" << processId + << ", \"tid\":-" << (uintptr_t)queue + << ", \"args\":{\"name\":\"" << trackName + << "\"}},\n"; + } + + m_OS.LeaveCriticalSection(); +} + +/////////////////////////////////////////////////////////////////////////////// +// +void CLIntercept::chromeTraceEvent( + const std::string& name, + cl_event event, + uint64_t queuedTime ) +{ + cl_int errorCode = CL_SUCCESS; + + cl_command_queue queue = NULL; + cl_command_type type = 0; + + cl_ulong commandQueued = 0; + cl_ulong commandSubmit = 0; + cl_ulong commandStart = 0; + cl_ulong commandEnd = 0; + + errorCode |= dispatch().clGetEventInfo( + event, + CL_EVENT_COMMAND_QUEUE, + sizeof( queue ), + &queue, + NULL ); + + errorCode |= dispatch().clGetEventInfo( + event, + CL_EVENT_COMMAND_TYPE, + sizeof(type), + &type, + NULL ); + + errorCode |= dispatch().clGetEventProfilingInfo( + event, + CL_PROFILING_COMMAND_QUEUED, + sizeof( commandQueued ), + &commandQueued, + NULL ); + errorCode |= dispatch().clGetEventProfilingInfo( + event, + CL_PROFILING_COMMAND_SUBMIT, + sizeof( commandSubmit ), + &commandSubmit, + NULL ); + errorCode |= dispatch().clGetEventProfilingInfo( + event, + CL_PROFILING_COMMAND_START, + sizeof( commandStart ), + &commandStart, + NULL ); + errorCode |= dispatch().clGetEventProfilingInfo( + event, + CL_PROFILING_COMMAND_END, + sizeof( commandEnd ), + &commandEnd, + NULL ); + + if( errorCode == CL_SUCCESS ) + { + uint64_t normalizedQueuedTimeNS = + OS().TickToNS( queuedTime - m_StartTime ); + uint64_t normalizedStartTimeNS = + ( commandStart - commandQueued ) + normalizedQueuedTimeNS; + + uint64_t usStart = normalizedStartTimeNS / 1000; + uint64_t usDelta = ( commandEnd - commandStart ) / 1000; + + uint64_t processId = OS().GetProcessID(); + m_InterceptTrace + << "{\"ph\":\"X\", \"pid\":" << processId + << ", \"tid\":-" << (uintptr_t)queue + << ", \"name\":\"" << name + << "\", \"ts\":" << usStart + << ", \"dur\":" << usDelta + << "},\n"; + + } + else + { + log( "chromeTraceEvent(): OpenCL error\n" ); + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +bool CLIntercept::checkAubCaptureKernelSignature( + const cl_kernel kernel, + cl_uint workDim, + const size_t* gws, + const size_t* lws ) +{ + m_OS.EnterCriticalSection(); + + bool match = true; + + // If the aubcapture kernel name is set, make sure it matches the name + // of the passed-in kernel: + + if( match && + m_Config.AubCaptureKernelName != "" && + m_KernelNameMap[ kernel ] != m_Config.AubCaptureKernelName ) + { + //logf( "Skipping aub capture: kernel name '%s' doesn't match the requested kernel name '%s'.\n", + // m_KernelNameMap[ kernel ].c_str(), + // m_Config.AubCaptureKernelName.c_str() ); + match = false; + } + + // If the aubcapture global work size is set, and it is not set to the + // wildcard ("*"), make sure it matches the passed-in global work size: + + if( match && + m_Config.AubCaptureKernelGWS != "" && + m_Config.AubCaptureKernelGWS != "*" ) + { + std::ostringstream ss; + if( gws ) + { + if( workDim >= 1 ) + { + ss << gws[0]; + } + if( workDim >= 2 ) + { + ss << "x" << gws[1]; + } + if( workDim >= 3 ) + { + ss << "x" << gws[2]; + } + } + else + { + ss << "NULL"; + } + if( m_Config.AubCaptureKernelGWS != ss.str() ) + { + //logf( "Skipping aub capture: global work size %s doesn't match the requested global work size %s.\n", + // ss.str(), + // m_Config.AubCaptureKernelGWS.c_str() ); + match = false; + } + } + + // If the aubcapture local work size is set, and it is not set to the + // wildcard ("*"), make sure it matches the passed-in local work size: + + if( match && + m_Config.AubCaptureKernelLWS != "" && + m_Config.AubCaptureKernelLWS != "*" ) + { + std::ostringstream ss; + if( lws ) + { + if( workDim >= 1 ) + { + ss << lws[0]; + } + if( workDim >= 2 ) + { + ss << "x" << lws[1]; + } + if( workDim >= 3 ) + { + ss << "x" << lws[2]; + } + } + else + { + ss << "NULL"; + } + if( m_Config.AubCaptureKernelLWS != ss.str() ) + { + //logf( "Skipping aub capture: local work size %s doesn't match the requested local work size %s.\n", + // ss.str(), + // m_Config.AubCaptureKernelLWS.c_str() ); + match = false; + } + } + + if( match && + m_Config.AubCaptureUniqueKernels ) + { + std::string key = m_KernelNameMap[ kernel ]; + + { + cl_program program = NULL; + dispatch().clGetKernelInfo( + kernel, + CL_KERNEL_PROGRAM, + sizeof(program), + &program, + NULL ); + if( program ) + { + unsigned int programNumber = m_ProgramNumberMap[ program ]; + uint64_t programHash = m_ProgramHashMap[ program ]; + unsigned int compileCount = m_ProgramNumberCompileCountMap[ programNumber ]; + + char hashString[256] = ""; + if( config().OmitProgramNumber ) + { + CLI_SPRINTF( hashString, 256, "(%08X_%04u)", + (unsigned int)programHash, + compileCount ); + } + else + { + CLI_SPRINTF( hashString, 256, "(%04u_%08X_%04u)", + programNumber, + (unsigned int)programHash, + compileCount ); + } + key += hashString; + } + } + + if( gws ) + { + std::ostringstream ss; + ss << " GWS[ "; + if( gws ) + { + if( workDim >= 1 ) + { + ss << gws[0]; + } + if( workDim >= 2 ) + { + ss << "x" << gws[1]; + } + if( workDim >= 3 ) + { + ss << "x" << gws[2]; + } + } + else + { + ss << "NULL"; + } + ss << " ]"; + key += ss.str(); + } + + { + std::ostringstream ss; + ss << " LWS[ "; + if( lws ) + { + if( workDim >= 1 ) + { + ss << lws[0]; + } + if( workDim >= 2 ) + { + ss << "x" << lws[1]; + } + if( workDim >= 3 ) + { + ss << "x" << lws[2]; + } + } + else + { + ss << "NULL"; + } + ss << " ]"; + key += ss.str(); + } + + if( m_AubCaptureSet.find( key ) == m_AubCaptureSet.end() ) + { + m_AubCaptureSet.insert( key ); + } + else + { + //logf( "Skipping aub capture: key %s was already captured.\n", + // key.c_str() ); + match = false; + } + } + + m_OS.LeaveCriticalSection(); + + return match; +} diff --git a/Src/intercept.h b/Src/intercept.h new file mode 100644 index 00000000..334054a6 --- /dev/null +++ b/Src/intercept.h @@ -0,0 +1,2109 @@ +/* +// Copyright (c) 2018 Intel Corporation +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +*/ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "common.h" +#include "enummap.h" +#include "dispatch.h" + +#include "instrumentation.h" + +#if defined(USE_MDAPI) +#include "MetricsDiscoveryHelper.h" +#endif + +#if defined(_WIN32) + +#elif defined(__linux__) || defined(__APPLE__) + +#include +#define strcpy_s( _dst, _size, _src ) strncpy( _dst, _src, _size ) + +#else +#error Unknown OS! +#endif + +#include "OS/OS.h" + +class CLIntercept +{ + struct Config; + +public: + static bool Create( void* pGlobalData, CLIntercept*& pIntercept ); + static void Delete( CLIntercept*& pIntercept ); + + void report(); + + void callLoggingEnter( + const std::string& functionName, + const cl_kernel kernel ); + void callLoggingEnter( + const std::string& functionName, + const cl_kernel kernel, + const char* formatStr, + ... ); + + void callLoggingInfo( + const std::string& str ); + void callLoggingInfo( + const char* formatStr, + ... ); + + void callLoggingExit( + const std::string& functionName, + const cl_kernel kernel, + const cl_event* event ); + void callLoggingExit( + const std::string& functionName, + const cl_kernel kernel, + const cl_event* event, + const char* formatStr, + ... ); + + cl_int allocateAndGetPlatformInfoString( + cl_platform_id platform, + cl_platform_info param_name, + char*& param_value ) const; + cl_int allocateAndGetDeviceInfoString( + cl_device_id device, + cl_device_info param_name, + char*& param_value ) const; + + void getPlatformInfoString( + cl_platform_id platform, + std::string& str ) const; + void getDeviceInfoString( + cl_uint num_devices, + const cl_device_id* devices, + std::string& str ) const; + void getEventListString( + cl_uint num_events, + const cl_event* event_list, + std::string& str ) const; + void getContextPropertiesString( + const cl_context_properties* properties, + std::string& str ) const; + void getSamplerPropertiesString( + const cl_sampler_properties* properties, + std::string& str ) const; + void getCommandQueuePropertiesString( + const cl_queue_properties* properties, + std::string& str ) const; + void getCreateKernelsInProgramRetString( + cl_int retVal, + cl_kernel* kernels, + cl_uint* num_kernels_ret, + std::string& str ) const; + void getKernelArgString( + cl_uint arg_index, + size_t arg_size, + const void* arg_value, + std::string& str ) const; + void getEnqueueNDRangeKernelArgsString( + cl_uint work_dim, + const size_t* global_work_offset, + const size_t* global_work_size, + const size_t* local_work_size, + std::string& str ) const; + void getCreateSubBufferArgsString( + cl_buffer_create_type createType, + const void *createInfo, + std::string& str ) const; + + void logCLInfo(); + void logBuild( + uint64_t buildTimeStart, + const cl_program program, + cl_uint num_devices, + const cl_device_id* device_list ); + void logError( + const std::string& functionName, + cl_int errorCode ); + void logFlushOrFinishAfterEnqueueStart( + const std::string& flushOrFinish, + const std::string& functionName ); + void logFlushOrFinishAfterEnqueueEnd( + const std::string& flushOrFinish, + const std::string& functionName, + cl_int errorCode ); + void logPreferredWorkGroupSizeMultiple( + const cl_kernel* kernels, + cl_uint numKernels ); + + void logCL_GLTextureDetails( cl_mem image, cl_GLenum target, cl_GLint miplevel, cl_GLuint texture ); + + struct SContextCallbackInfo + { + CLIntercept* pIntercept; + void (CL_CALLBACK* pApplicationCallback)( const char*, const void*, size_t, void* ); + void* pUserData; + }; + static void CL_CALLBACK contextCallbackCaller( + const char*, + const void*, + size_t, + void* ); + void contextCallback( + const std::string& errinfo, + const void* private_info, + size_t cb ); + void contextCallbackOverrideInit( + const cl_context_properties* properties, + void (CL_CALLBACK*& pCallback)( const char*, const void*, size_t, void* ), + void*& pUserData, + SContextCallbackInfo*& pContextCallbackInfo, + cl_context_properties*& pLocalContextProperties ); + void contextCallbackOverrideCleanup( + const cl_context context, + SContextCallbackInfo*& pContextCallbackInfo, + cl_context_properties*& pLocalContextProperties ); + + struct SEventCallbackInfo + { + CLIntercept* pIntercept; + void (CL_CALLBACK* pApplicationCallback)( cl_event, cl_int, void* ); + void* pUserData; + }; + static void CL_CALLBACK eventCallbackCaller( + cl_event, + cl_int, + void* ); + void eventCallback( + cl_event event, + cl_int status ); + + void incrementEnqueueCounter(); + uint64_t getEnqueueCounter(); + + void overrideNullLocalWorkSize( + const cl_uint work_dim, + const size_t* global_work_size, + const size_t*& local_work_size ); + + void combineProgramStrings( + cl_uint& count, + const char**& strings, + const size_t*& lengths, + char*& singleString ) const; + + void incrementProgramCompileCount( + const cl_program program ); + uint64_t hashString( + const char* singleString, + size_t length ); + void saveProgramHash( + const cl_program program, + uint64_t hash ); + + bool injectProgramSource( + const uint64_t hash, + cl_uint& count, + const char**& strings, + const size_t*& lengths, + char*& singleString ); + bool prependProgramSource( + const uint64_t hash, + cl_uint& count, + const char**& strings, + const size_t*& lengths, + char*& singleString ); + bool injectProgramSPIRV( + const uint64_t hash, + size_t& length, + const void*& il, + char*& injectedIL ); + bool injectProgramOptions( + const cl_program program, + const char*& options, + char*& newOptions ); + bool appendBuildOptions( + const char*& options, + char*& newOptions ); + void dumpProgramSourceScript( + const cl_program program, + const char* singleString ); + void dumpProgramSource( + uint64_t hash, + const cl_program program, + const char* singleString ); + void dumpInputProgramBinaries( + uint64_t hash, + const cl_program program, + cl_uint num_devices, + const cl_device_id* device_list, + const size_t* lengths, + const unsigned char** binaries ); + void dumpProgramSPIRV( + uint64_t hash, + const cl_program program, + const size_t length, + const void* il ); + void dumpProgramOptionsScript( + const cl_program program, + const char* options ); + void dumpProgramOptions( + const cl_program program, + const char* options ); + void dumpProgramBuildLog( + const cl_program program, + const cl_device_id device, + const char* buildLog, + const size_t buildLogSize ); + + cl_program createProgramWithInjectionBinaries( + uint64_t hash, + cl_context context, + cl_int* errcode_ret ); + void dumpProgramBinary( + const cl_program program ); + + cl_program createProgramWithInjectionSPIRV( + uint64_t hash, + cl_context context, + cl_int* errcode_ret ); + void autoCreateSPIRV( + const cl_program program, + const char* options ); + + void updateHostTimingStats( + const std::string& functionName , + const cl_kernel kernel, + uint64_t start, + uint64_t end ); + + void modifyCommandQueueProperties( + cl_command_queue_properties& props ) const; + void createCommandQueueOverrideInit( + const cl_queue_properties* properties, + cl_queue_properties*& pLocalQueueProperties ) const; + void createCommandQueueOverrideCleanup( + cl_queue_properties*& pLocalQueueProperties ) const; + void addTimingEvent( + const std::string& functionName, + const uint64_t queuedTime, + const cl_kernel kernel, + const cl_uint workDim, + const size_t* gws, + const size_t* lws, + cl_event event ); + void checkTimingEvents(); + + void addKernelName( + const cl_kernel kernel, + const std::string& kernelName ); + + void addKernelNames( + cl_kernel* kernels, + cl_uint numKernels ); + + void removeKernel( + const cl_kernel kernel ); + + void addBuffer( + cl_mem buffer ); + void addSampler( + cl_sampler sampler, + const std::string& str ); + void removeSampler( + cl_sampler sampler ); + bool getSampler( + size_t size, + const void *arg_value, + std::string& str ) const; + void addImage( + cl_mem image ); + void removeMemObj( + cl_mem memobj ); + void addSVMAllocation( + void* svmPtr, + size_t size ); + void removeSVMAllocation( + void* svmPtr ); + void setKernelArg( + cl_kernel kernel, + cl_uint arg_index, + cl_mem memobj ); + void setKernelArgSVMPointer( + cl_kernel kernel, + cl_uint arg_index, + const void* arg ); + void dumpBuffersForKernel( + const std::string& name, + cl_kernel kernel, + cl_command_queue command_queue ); + void dumpImagesForKernel( + const std::string& name, + cl_kernel kernel, + cl_command_queue command_queue ); + void dumpBuffer( + const std::string& name, + cl_mem memobj, + cl_command_queue command_queue, + void* ptr, + size_t offset, + size_t size ); + void dumpArgument( + cl_kernel kernel, + cl_int arg_index, + size_t size, + const void *pBuffer ); + + void startAubCapture( + const std::string& functionName, + const cl_kernel kernel, + const cl_uint workDim, + const size_t* gws, + const size_t* lws, + cl_command_queue commandQueue ); + void stopAubCapture( + cl_command_queue commandQueue ); + + void initPrecompiledKernelOverrides( + const cl_context context ); + void initBuiltinKernelOverrides( + const cl_context context ); + + cl_int writeStringToMemory( + size_t param_value_size, + const std::string& param, + size_t* param_value_size_ret, + char* pointer ) const; + template< class T > + cl_int writeParamToMemory( + size_t param_value_size, + T param, + size_t* param_value_size_ret, + T* pointer ) const; + + bool overrideGetPlatformInfo( + cl_platform_info param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret, + cl_int& errorCode ); + bool overrideGetDeviceInfo( + cl_device_id device, + cl_platform_info param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret, + cl_int& errorCode ); + + cl_int ReadBuffer( + cl_command_queue commandQueue, + cl_mem srcBuffer, + cl_bool blockingRead, + size_t srcOffset, + size_t bytesToRead, + void* dstPtr, + cl_uint numEventsInWaitList, + const cl_event* eventWaitList, + cl_event* event ); + cl_int WriteBuffer( + cl_command_queue commandQueue, + cl_mem dstBuffer, + cl_bool blockingWrite, + size_t dstOffset, + size_t bytesToWrite, + const void* srcPtr, + cl_uint numEventsInWaitList, + const cl_event* eventWaitList, + cl_event* event ); + cl_int CopyBuffer( + cl_command_queue commandQueue, + cl_mem srcBuffer, + cl_mem dstBuffer, + size_t srcOffset, + size_t dstOffset, + size_t bytesToCopy, + cl_uint numEventsInWaitList, + const cl_event* eventWaitList, + cl_event* event ); + cl_int CopyBufferHelper( + cl_context context, + cl_command_queue commandQueue, + cl_mem srcBuffer, + cl_mem dstBuffer, + size_t srcOffset, + size_t dstOffset, + size_t bytesToCopy, + cl_uint numEventsInWaitList, + const cl_event* eventWaitList, + cl_event* event ); + + cl_int ReadImage( + cl_command_queue commandQueue, + cl_mem srcImage, + cl_bool blockingRead, + const size_t* srcOrigin, + const size_t* region, + size_t dstRowPitch, + size_t dstSlicePitch, + void* dstPtr, + cl_uint numEventsInWaitList, + const cl_event* eventWaitList, + cl_event* event ); + cl_int WriteImage( + cl_command_queue commandQueue, + cl_mem dstImage, + cl_bool blockingWrite, + const size_t* dstOrigin, + const size_t* region, + size_t srcRowPitch, + size_t srcSlicePitch, + const void* srcPtr, + cl_uint numEventsInWaitList, + const cl_event* eventWaitList, + cl_event* event ); + cl_int CopyImage( + cl_command_queue commandQueue, + cl_mem srcImage, + cl_mem dstImage, + const size_t* srcOrigin, + const size_t* dstOrigin, + const size_t* region, + cl_uint numEventsInWaitList, + const cl_event* eventWaitList, + cl_event* event ); + cl_int CopyImageHelper( + cl_context context, + cl_command_queue commandQueue, + cl_mem srcImage, + cl_mem dstImage, + const size_t* srcOrigin, + const size_t* dstOrigin, + const size_t* region, + cl_uint numEventsInWaitList, + const cl_event* eventWaitList, + cl_event* event ); + + cl_program createProgramWithBuiltinKernels( + cl_context context ); + cl_kernel createBuiltinKernel( + cl_program program, + const std::string& kernel_name, + cl_int* errcode_ret ); + cl_int NDRangeBuiltinKernel( + cl_command_queue commandQueue, + cl_kernel kernel, + cl_uint work_dim, + const size_t* global_work_offset, + const size_t* global_work_size, + const size_t* local_work_size, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event ); + + void SIMDSurveyCreateProgramFromSource( + const cl_program program, + cl_context context, + cl_uint count, + const char** strings, + const size_t* lengths ); + // TODO? + // SIMDSurveyCreateProgramWithBinary + // SIMDSurveyCreateProgramWithIL + void SIMDSurveyBuildProgram( + const cl_program program, + cl_uint numDevices, + const cl_device_id* deviceList, + const char* options ); + void SIMDSurveyCreateKernel( + const cl_program program, + const cl_kernel kernel, + const std::string& kernelName ); + // TODO? + // SIMDSurveyCreateKernelsInProgram(); + // SIMDSurveyCloneKernel(); + void SIMDSurveySetKernelArg( + cl_kernel kernel, + cl_uint argIndex, + size_t argSize, + const void* argValue ); + void SIMDSurveyNDRangeKernel( + cl_kernel& kernel ); + + void* getExtensionFunctionAddress( + cl_platform_id platform, + const std::string& func_name ) const; + +#if defined(USE_MDAPI) + GTDI_CONFIGURATION_SET initCustomPerfCounters( + const std::string& setName ); + cl_command_queue createMDAPICommandQueue( + cl_context context, + cl_device_id device, + cl_command_queue_properties properties, + cl_int* errcode_ret ); + cl_command_queue createMDAPICommandQueue( + cl_context context, + cl_device_id device, + const cl_queue_properties* properties, + cl_int* errcode_ret ); +#endif + + const OS::Services& OS() const; + const CLdispatch& dispatch() const; + const CEnumNameMap& enumName() const; + + const Config& config() const; + + bool callLogging() const; + + bool nullEnqueue() const; + + bool dumpBufferForKernel( const cl_kernel kernel ); + bool dumpImagesForKernel( const cl_kernel kernel ); + bool checkDumpBufferEnqueueLimits() const; + bool checkDumpImageEnqueueLimits() const; + + bool checkAubCaptureEnqueueLimits() const; + bool checkAubCaptureKernelSignature( + const cl_kernel kernel, + cl_uint workDim, + const size_t* gws, + const size_t* lws); + + void saveProgramNumber( const cl_program program ); + unsigned int getProgramNumber() const; + + cl_device_type filterDeviceType( cl_device_type device_type ) const; + +#if defined(USE_ITT) + __itt_domain* ittDomain() const; + + void ittInit(); + + void ittCallLoggingEnter( + const std::string& functionName, + const cl_kernel kernel ); + void ittCallLoggingExit(); + + void ittRegisterCommandQueue( + cl_command_queue queue, + bool supportsPerfCounters ); + void ittReleaseCommandQueue( + cl_command_queue ); + void ittTraceEvent( + const std::string& name, + cl_event event, + uint64_t queuedTime ); +#endif + + void chromeCallLoggingExit( + const std::string& functionName, + const cl_kernel kernel, + uint64_t start, + uint64_t end ); + void chromeRegisterCommandQueue( + cl_command_queue queue ); + void chromeTraceEvent( + const std::string& name, + cl_event event, + uint64_t queuedTime ); + +private: + static const char* sc_URL; + static const char* sc_DumpDirectoryName; + static const char* sc_ReportFileName; + static const char* sc_LogFileName; + static const char* sc_TraceFileName; + static const char* sc_DumpPerfCountersFileNamePrefix; + +#if defined(CLINTERCEPT_CMAKE) + static const char* sc_GitDescribe; + static const char* sc_GitRefSpec; + static const char* sc_GitHash; +#endif + + CLIntercept( void* pGlobalData ); + ~CLIntercept(); + + bool init(); + void log(const std::string& s); + void logf(const char* str, ...); + + void logPlatformInfo( cl_platform_id platform ); + void logDeviceInfo( cl_device_id device ); + +#if defined(_WIN32) || defined(__linux__) + bool initDispatch( const std::string& dllName ); +#elif defined(__APPLE__) + bool initDispatch( void ); +#else +#error Unknown OS! +#endif + + void getCallLoggingPrefix( + std::string& str ); + + OS::Services m_OS; + CLdispatch m_Dispatch; + CEnumNameMap m_EnumNameMap; + + void* m_OpenCLLibraryHandle; + + std::ofstream m_InterceptLog; + std::ofstream m_InterceptTrace; + + bool m_LoggedCLInfo; + + uint64_t m_EnqueueCounter; + uint64_t m_StartTime; + + typedef std::map< uint64_t, unsigned int> CThreadNumberMap; + CThreadNumberMap m_ThreadNumberMap; + + unsigned int m_ProgramNumber; + + typedef std::map< const cl_program, unsigned int > CProgramNumberMap; + CProgramNumberMap m_ProgramNumberMap; + + typedef std::map< const cl_program, uint64_t > CProgramHashMap; + CProgramHashMap m_ProgramHashMap; + + typedef std::map< unsigned int, unsigned int > CProgramNumberCompileCountMap; + CProgramNumberCompileCountMap m_ProgramNumberCompileCountMap; + + struct SCpuTimingStats + { + uint64_t NumberOfCalls; + uint64_t MinTicks; + uint64_t MaxTicks; + uint64_t TotalTicks; + }; + + typedef std::map< std::string, SCpuTimingStats* > CCpuTimingStatsMap; + CCpuTimingStatsMap m_CpuTimingStatsMap; + + struct SDeviceTimingStats + { + uint64_t NumberOfCalls; + cl_ulong MinNS; + cl_ulong MaxNS; + cl_ulong TotalNS; + }; + + typedef std::map< std::string, SDeviceTimingStats* > CDeviceTimingStatsMap; + CDeviceTimingStatsMap m_DeviceTimingStatsMap; + + typedef std::map< const cl_kernel, std::string > CKernelNameMap; + CKernelNameMap m_KernelNameMap; + + struct SEventListNode + { + std::string FunctionName; + std::string KernelName; + uint64_t QueuedTime; + cl_kernel Kernel; + cl_event Event; + }; + + typedef std::list< SEventListNode* > CEventList; + CEventList m_EventList; + +#if defined(USE_MDAPI) + TimingProfile m_DeviceTimingProfile; + + typedef std::pair< std::string, char* > CMDDataEntry; + typedef std::queue< CMDDataEntry* > CMDDataList; + CMDDataList m_MDDataList; + + void saveMDAPICounters( + const std::string& name, + const cl_event event ); + void reportMDAPICounters( + std::ofstream& os ); +#endif + + unsigned int m_MemAllocNumber; + + typedef std::map< const void*, unsigned int > CMemAllocNumberMap; + CMemAllocNumberMap m_MemAllocNumberMap; + + typedef std::map< cl_sampler, std::string > CSamplerDataMap; + CSamplerDataMap m_SamplerDataMap; + + typedef std::map< const cl_mem, size_t > CBufferInfoMap; + CBufferInfoMap m_BufferInfoMap; + + typedef std::map< const void*, size_t > CSVMAllocInfoMap; + CSVMAllocInfoMap m_SVMAllocInfoMap; + + struct SImageInfo + { + size_t Region[3]; + size_t ElementSize; + }; + + typedef std::map< const cl_mem, SImageInfo > CImageInfoMap; + CImageInfoMap m_ImageInfoMap; + + typedef std::map< cl_uint, const void* > CKernelArgMemMap; + typedef std::map< const cl_kernel, CKernelArgMemMap > CKernelArgMap; + CKernelArgMap m_KernelArgMap; + + bool m_AubCaptureStarted; + cl_uint m_AubCaptureKernelEnqueueSkipCounter; + cl_uint m_AubCaptureKernelEnqueueCaptureCounter; + + typedef std::set CAubCaptureSet; + CAubCaptureSet m_AubCaptureSet; + + typedef std::map< const cl_context, SContextCallbackInfo* > CContextCallbackInfoMap; + CContextCallbackInfoMap m_ContextCallbackInfoMap; + + typedef std::map< const cl_event, SEventCallbackInfo* > CEventCallbackInfoMap; + CEventCallbackInfoMap m_EventCallbackInfoMap; + + struct SPrecompiledKernelOverrides + { + cl_program Program; + + cl_kernel Kernel_CopyBufferBytes; + cl_kernel Kernel_CopyBufferUInts; + cl_kernel Kernel_CopyBufferUInt4s; + cl_kernel Kernel_CopyBufferUInt16s; + + cl_kernel Kernel_CopyImage2Dto2DFloat; + cl_kernel Kernel_CopyImage2Dto2DInt; + cl_kernel Kernel_CopyImage2Dto2DUInt; + }; + + typedef std::map< const cl_context, SPrecompiledKernelOverrides* > CPrecompiledKernelOverridesMap; + CPrecompiledKernelOverridesMap m_PrecompiledKernelOverridesMap; + + struct SBuiltinKernelOverrides + { + cl_program Program; + + cl_kernel Kernel_block_motion_estimate_intel; + }; + + typedef std::map< const cl_context, SBuiltinKernelOverrides* > CBuiltinKernelOverridesMap; + CBuiltinKernelOverridesMap m_BuiltinKernelOverridesMap; + + struct SSIMDSurveyProgram + { + cl_program SIMD8Program; + cl_program SIMD16Program; + cl_program SIMD32Program; + }; + struct SSIMDSurveyKernel + { + cl_kernel SIMD8Kernel; + cl_kernel SIMD16Kernel; + cl_kernel SIMD32Kernel; + + cl_ulong SIMD8ExecutionTimeNS; + cl_ulong SIMD16ExecutionTimeNS; + cl_ulong SIMD32ExecutionTimeNS; + + uint32_t ExecutionNumber; + }; + + typedef std::map< const cl_program, SSIMDSurveyProgram* > CSIMDSurveyProgramMap; + CSIMDSurveyProgramMap m_SIMDSurveyProgramMap; + + typedef std::map< const cl_kernel, SSIMDSurveyKernel* > CSIMDSurveyKernelMap; + CSIMDSurveyKernelMap m_SIMDSurveyKernelMap; + + struct Config + { +#define CLI_CONTROL( _type, _name, _init, _desc ) _type _name; +#include "controls.h" +#undef CLI_CONTROL + } m_Config; + +#if defined(USE_ITT) + bool m_ITTInitialized; + + __itt_domain* m_ITTDomain; + + //__ittx_task_state* m_ITTQueuedState; + //__ittx_task_state* m_ITTSubmittedState; + //__ittx_task_state* m_ITTExecutingState; + + //__itt_track_group* m_ITTQueueTrackGroup; + + static void ITTAPI ittClockInfoCallback( + __itt_clock_info* pClockInfo, + void* pData ); + + struct SITTQueueInfo + { + const CLIntercept* pIntercept; + bool SupportsPerfCounters; + + __itt_track* itt_track; + __itt_clock_domain* itt_clock_domain; + uint64_t CPUReferenceTime; + cl_ulong CLReferenceTime; + }; + + typedef std::map< cl_command_queue, SITTQueueInfo* > CITTQueueInfoMap; + CITTQueueInfoMap m_ITTQueueInfoMap; +#endif + + DISALLOW_COPY_AND_ASSIGN( CLIntercept ); +}; + +/////////////////////////////////////////////////////////////////////////////// +// +inline const CLdispatch& CLIntercept::dispatch() const +{ + return m_Dispatch; +} + +/////////////////////////////////////////////////////////////////////////////// +// +inline const OS::Services& CLIntercept::OS() const +{ + return m_OS; +} + +/////////////////////////////////////////////////////////////////////////////// +// +inline const CEnumNameMap& CLIntercept::enumName() const +{ + return m_EnumNameMap; +} + +/////////////////////////////////////////////////////////////////////////////// +// +inline const CLIntercept::Config& CLIntercept::config() const +{ + return m_Config; +} + +/////////////////////////////////////////////////////////////////////////////// +// +#define LOG_CLINFO() \ + if( pIntercept->config().CLInfoLogging ) \ + { \ + pIntercept->logCLInfo(); \ + } + +/////////////////////////////////////////////////////////////////////////////// +// +#define BUILD_LOGGING_INIT() \ + uint64_t buildTimeStart = 0; \ + if( pIntercept->config().BuildLogging ) \ + { \ + buildTimeStart = pIntercept->OS().GetTimer(); \ + } + +#define BUILD_LOGGING( program, num_devices, device_list ) \ + if( pIntercept->config().BuildLogging ) \ + { \ + pIntercept->logBuild( \ + buildTimeStart, \ + program, \ + num_devices, \ + device_list ); \ + } + +/////////////////////////////////////////////////////////////////////////////// +// +inline bool CLIntercept::callLogging() const +{ + return m_Config.CallLogging; +} + +#define CALL_LOGGING_ENTER(...) \ + if( pIntercept->callLogging() ) \ + { \ + pIntercept->callLoggingEnter( __FUNCTION__, NULL, ##__VA_ARGS__ ); \ + } \ + ITT_CALL_LOGGING_ENTER( NULL ); + +#define CALL_LOGGING_ENTER_KERNEL(kernel, ...) \ + if( pIntercept->callLogging() ) \ + { \ + pIntercept->callLoggingEnter( __FUNCTION__, kernel, ##__VA_ARGS__ );\ + } \ + ITT_CALL_LOGGING_ENTER( kernel ); + +#define CALL_LOGGING_INFO(...) \ + if( pIntercept->callLogging() ) \ + { \ + pIntercept->callLoggingInfo( __VA_ARGS__ ); \ + } \ + +#define CALL_LOGGING_EXIT(...) \ + if( pIntercept->callLogging() ) \ + { \ + pIntercept->callLoggingExit( __FUNCTION__, NULL, NULL, ##__VA_ARGS__ ); \ + } \ + ITT_CALL_LOGGING_EXIT(); + +#define CALL_LOGGING_EXIT_EVENT(event, ...) \ + if( pIntercept->callLogging() ) \ + { \ + pIntercept->callLoggingExit( __FUNCTION__, NULL, event, ##__VA_ARGS__ );\ + } \ + ITT_CALL_LOGGING_EXIT(); + +/////////////////////////////////////////////////////////////////////////////// +// +#define CHECK_ERROR_INIT( pErrorCode ) \ + cl_int localErrorCode = CL_SUCCESS; \ + if( ( pIntercept->config().ErrorLogging || \ + pIntercept->config().ErrorAssert || \ + pIntercept->config().NoErrors ) && \ + ( pErrorCode == NULL ) ) \ + { \ + pErrorCode = &localErrorCode; \ + } + +#define CHECK_ERROR( errorCode ) \ + if( ( pIntercept->config().ErrorLogging || \ + pIntercept->config().ErrorAssert || \ + pIntercept->config().NoErrors ) && \ + ( errorCode != CL_SUCCESS ) ) \ + { \ + if( pIntercept->config().ErrorLogging ) \ + { \ + pIntercept->logError( __FUNCTION__, errorCode ); \ + } \ + if( pIntercept->config().ErrorAssert ) \ + { \ + CLI_DEBUG_BREAK(); \ + } \ + if( pIntercept->config().NoErrors ) \ + { \ + errorCode = CL_SUCCESS; \ + } \ + } + +/////////////////////////////////////////////////////////////////////////////// +// +#define CREATE_CONTEXT_OVERRIDE_INIT( _props, _func, _data, _newprops ) \ + CLIntercept::SContextCallbackInfo* pContextCallbackInfo = NULL; \ + if( pIntercept->config().ContextCallbackLogging ) \ + { \ + pIntercept->contextCallbackOverrideInit( \ + _props, \ + _func, \ + _data, \ + pContextCallbackInfo, \ + _newprops ); \ + } + +#define CREATE_CONTEXT_OVERRIDE_CLEANUP( _context, _newprops ) \ + if( pIntercept->config().ContextCallbackLogging ) \ + { \ + pIntercept->contextCallbackOverrideCleanup( \ + _context, \ + pContextCallbackInfo, \ + _newprops ); \ + } + +/////////////////////////////////////////////////////////////////////////////// +// +#define EVENT_CALLBACK_OVERRIDE_INIT( _func, _data ) \ + CLIntercept::SEventCallbackInfo* pEventCallbackInfo = NULL; \ + if( pIntercept->config().EventCallbackLogging ) \ + { \ + pEventCallbackInfo = new CLIntercept::SEventCallbackInfo; \ + if( pEventCallbackInfo ) \ + { \ + pEventCallbackInfo->pIntercept = pIntercept; \ + pEventCallbackInfo->pApplicationCallback = _func; \ + pEventCallbackInfo->pUserData = _data; \ + \ + _func = CLIntercept::eventCallbackCaller; \ + _data = pEventCallbackInfo; \ + } \ + } + +#define EVENT_CALLBACK_OVERRIDE_CLEANUP( _errCode ) \ + if( pIntercept->config().EventCallbackLogging && \ + _errCode != CL_SUCCESS ) \ + { \ + delete pEventCallbackInfo; \ + pEventCallbackInfo = NULL; \ + } + +/////////////////////////////////////////////////////////////////////////////// +// +#define FINISH_OR_FLUSH_AFTER_ENQUEUE( _command_queue ) \ + pIntercept->incrementEnqueueCounter(); \ + if( pIntercept->config().FinishAfterEnqueue ) \ + { \ + pIntercept->logFlushOrFinishAfterEnqueueStart( \ + "clFinish", \ + __FUNCTION__ ); \ + cl_int e = pIntercept->dispatch().clFinish( _command_queue ); \ + pIntercept->logFlushOrFinishAfterEnqueueEnd( \ + "clFinish", \ + __FUNCTION__, \ + e ); \ + pIntercept->checkTimingEvents(); \ + } \ + else if( pIntercept->config().FlushAfterEnqueue ) \ + { \ + /*pIntercept->logFlushOrFinishAfterEnqueueStart(*/ \ + /* "clFlush", */ \ + /* __FUNCTION__ ); */ \ + /* cl_int e = */ pIntercept->dispatch().clFlush( _command_queue ); \ + /*pIntercept->logFlushOrFinishAfterEnqueueEnd(*/ \ + /* "clFlush", */ \ + /* __FUNCTION__, */ \ + /* e ); */ \ + } + +#define FLUSH_AFTER_ENQUEUE_BARRIER( _command_queue ) \ + if( pIntercept->config().FlushAfterEnqueueBarrier ) \ + { \ + /*pIntercept->logFlushOrFinishAfterEnqueueStart(*/ \ + /* "clFlush (for barrier)", */ \ + /* __FUNCTION__ ); */ \ + /* cl_int e = */ pIntercept->dispatch().clFlush( _command_queue ); \ + /*pIntercept->logFlushOrFinishAfterEnqueueEnd(*/ \ + /* "clFlush (for barrier)", */ \ + /* __FUNCTION__, */ \ + /* e ); */ \ + } + +/////////////////////////////////////////////////////////////////////////////// +// +inline bool CLIntercept::nullEnqueue() const +{ + return m_Config.NullEnqueue; +} + +/////////////////////////////////////////////////////////////////////////////// +// +inline bool CLIntercept::dumpBufferForKernel( const cl_kernel kernel ) +{ + return m_Config.DumpBuffersForKernel.empty() || + m_KernelNameMap[ kernel ] == m_Config.DumpBuffersForKernel; +} + +inline bool CLIntercept::dumpImagesForKernel( const cl_kernel kernel ) +{ + return m_Config.DumpImagesForKernel.empty() || + m_KernelNameMap[ kernel ] == m_Config.DumpImagesForKernel; +} + +inline bool CLIntercept::checkDumpBufferEnqueueLimits() const +{ + return ( m_EnqueueCounter >= m_Config.DumpBuffersMinEnqueue ) && + ( m_EnqueueCounter <= m_Config.DumpBuffersMaxEnqueue ); +} + +inline bool CLIntercept::checkDumpImageEnqueueLimits() const +{ + return ( m_EnqueueCounter >= m_Config.DumpImagesMinEnqueue ) && + ( m_EnqueueCounter <= m_Config.DumpImagesMaxEnqueue ); +} + +#define ADD_BUFFER( buffer ) \ + if( buffer && \ + ( pIntercept->config().DumpBuffersAfterCreate || \ + pIntercept->config().DumpBuffersAfterMap || \ + pIntercept->config().DumpBuffersBeforeUnmap || \ + pIntercept->config().DumpBuffersBeforeEnqueue || \ + pIntercept->config().DumpBuffersAfterEnqueue ) ) \ + { \ + pIntercept->addBuffer( buffer ); \ + } + +#define ADD_IMAGE( image ) \ + if( image && \ + ( pIntercept->config().DumpImagesBeforeEnqueue || \ + pIntercept->config().DumpImagesAfterEnqueue ) ) \ + { \ + pIntercept->addImage( image ); \ + } + +#define REMOVE_MEMOBJ( memobj ) \ + if( memobj && \ + ( pIntercept->config().DumpBuffersAfterCreate || \ + pIntercept->config().DumpBuffersAfterMap || \ + pIntercept->config().DumpBuffersBeforeUnmap || \ + pIntercept->config().DumpBuffersBeforeEnqueue || \ + pIntercept->config().DumpBuffersAfterEnqueue || \ + pIntercept->config().DumpImagesBeforeEnqueue || \ + pIntercept->config().DumpImagesAfterEnqueue ) ) \ + { \ + pIntercept->removeMemObj( memobj ); \ + } + +#define ADD_SAMPLER( sampler, str ) \ + if( sampler && \ + pIntercept->callLogging() ) \ + { \ + pIntercept->addSampler( sampler, str ); \ + } + +#define REMOVE_SAMPLER( sampler ) \ + if( sampler && \ + pIntercept->callLogging() ) \ + { \ + pIntercept->removeSampler( sampler ); \ + } + +#define ADD_SVM_ALLOCATION( svmPtr, size ) \ + if( svmPtr && \ + ( pIntercept->config().DumpBuffersBeforeEnqueue || \ + pIntercept->config().DumpBuffersAfterEnqueue ) ) \ + { \ + pIntercept->addSVMAllocation( svmPtr, size ); \ + } + +#define REMOVE_SVM_ALLOCATION( svmPtr ) \ + if( svmPtr && \ + ( pIntercept->config().DumpBuffersBeforeEnqueue || \ + pIntercept->config().DumpBuffersAfterEnqueue ) ) \ + { \ + pIntercept->removeSVMAllocation( svmPtr ); \ + } + +#define SET_KERNEL_ARG( kernel, arg_index, arg_size, arg_value ) \ + if( ( pIntercept->config().DumpBuffersBeforeEnqueue || \ + pIntercept->config().DumpBuffersAfterEnqueue || \ + pIntercept->config().DumpImagesBeforeEnqueue || \ + pIntercept->config().DumpImagesAfterEnqueue ) && \ + ( arg_value != NULL ) && \ + ( arg_size == sizeof(cl_mem) ) ) \ + { \ + cl_mem* pMem = (cl_mem*)arg_value; \ + pIntercept->setKernelArg( kernel, arg_index, pMem[0] ); \ + } + +#define SET_KERNEL_ARG_SVM_POINTER( kernel, arg_index, arg_value ) \ + if( pIntercept->config().DumpBuffersBeforeEnqueue || \ + pIntercept->config().DumpBuffersAfterEnqueue ) \ + { \ + pIntercept->setKernelArgSVMPointer( kernel, arg_index, arg_value ); \ + } + +#define INITIALIZE_BUFFER_CONTENTS_INIT( _flags, _size, _ptr ) \ + char* zeroData = NULL; \ + if( pIntercept->config().InitializeBuffers && \ + _ptr == NULL && \ + !( _flags & ( CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR ) ) ) \ + { \ + zeroData = new char[ _size ]; \ + if( zeroData != NULL ) \ + { \ + memset( zeroData, 0, _size ); \ + _ptr = zeroData; \ + _flags |= (cl_mem_flags)CL_MEM_COPY_HOST_PTR; \ + } \ + } + +// Note: The cleanup setup currently does not reset the flags or host pointer. +// This mostly means that initialized buffers may be dumped after creation, +// whereas if the flags were reset then the dump buffer after create step +// would not be triggered. +#define INITIALIZE_BUFFER_CONTENTS_CLEANUP( _flags, _ptr ) \ + if( zeroData != NULL ) \ + { \ + delete [] zeroData; \ + zeroData = NULL; \ + } + + +#define DUMP_BUFFER_AFTER_CREATE( memobj, flags, ptr, size ) \ + if( memobj && \ + ( flags & ( CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR ) ) && \ + pIntercept->checkDumpBufferEnqueueLimits() && \ + pIntercept->config().DumpBuffersAfterCreate ) \ + { \ + pIntercept->dumpBuffer( "Create", memobj, NULL, ptr, 0, size ); \ + } + +#define DUMP_BUFFER_AFTER_MAP( command_queue, memobj, blocking_map, flags, ptr, offset, size ) \ + if( memobj && \ + !( flags & CL_MAP_WRITE_INVALIDATE_REGION ) && \ + pIntercept->checkDumpBufferEnqueueLimits() && \ + pIntercept->config().DumpBuffersAfterMap ) \ + { \ + if( blocking_map == false ) \ + { \ + pIntercept->dispatch().clFinish( command_queue ); \ + } \ + pIntercept->dumpBuffer( "Map", memobj, NULL, ptr, offset, size ); \ + } + +#define DUMP_BUFFER_BEFORE_UNMAP( memobj, command_queue) \ + if( memobj && \ + command_queue && \ + pIntercept->checkDumpBufferEnqueueLimits() && \ + pIntercept->config().DumpBuffersBeforeUnmap ) \ + { \ + pIntercept->dumpBuffer( "Unmap", memobj, command_queue, NULL, 0, 0 );\ + } + +#define DUMP_BUFFERS_BEFORE_ENQUEUE( kernel, command_queue ) \ + if( pIntercept->checkDumpBufferEnqueueLimits() && \ + pIntercept->config().DumpBuffersBeforeEnqueue && \ + pIntercept->dumpBufferForKernel( kernel ) ) \ + { \ + pIntercept->dumpBuffersForKernel( "Pre", kernel, command_queue ); \ + } + +#define DUMP_BUFFERS_AFTER_ENQUEUE( kernel, command_queue ) \ + if( pIntercept->checkDumpBufferEnqueueLimits() && \ + pIntercept->config().DumpBuffersAfterEnqueue && \ + pIntercept->dumpBufferForKernel( kernel ) ) \ + { \ + pIntercept->dumpBuffersForKernel( "Post", kernel, command_queue ); \ + } + +#define DUMP_IMAGES_BEFORE_ENQUEUE( kernel, command_queue ) \ + if( pIntercept->checkDumpImageEnqueueLimits() && \ + pIntercept->config().DumpImagesBeforeEnqueue && \ + pIntercept->dumpImagesForKernel( kernel ) ) \ + { \ + pIntercept->dumpImagesForKernel( "Pre", kernel, command_queue ); \ + } + +#define DUMP_IMAGES_AFTER_ENQUEUE( kernel, command_queue ) \ + if( pIntercept->checkDumpImageEnqueueLimits() && \ + pIntercept->config().DumpImagesAfterEnqueue && \ + pIntercept->dumpImagesForKernel( kernel ) ) \ + { \ + pIntercept->dumpImagesForKernel( "Post", kernel, command_queue ); \ + } + +/////////////////////////////////////////////////////////////////////////////// +// +inline bool CLIntercept::checkAubCaptureEnqueueLimits() const +{ + return ( m_EnqueueCounter >= m_Config.AubCaptureMinEnqueue ) && + ( m_EnqueueCounter <= m_Config.AubCaptureMaxEnqueue ); +} + +// Note: We do not individually aub capture non-kernel enqueues at the moment. +#define CHECK_AUBCAPTURE_START( command_queue ) \ + if( pIntercept->config().AubCapture && \ + pIntercept->checkAubCaptureEnqueueLimits() && \ + !pIntercept->config().AubCaptureIndividualEnqueues ) \ + { \ + pIntercept->startAubCapture( \ + __FUNCTION__, NULL, 0, NULL, NULL, command_queue ); \ + } + +#define CHECK_AUBCAPTURE_START_KERNEL( kernel, wd, gws, lws, command_queue )\ + if( pIntercept->config().AubCapture && \ + pIntercept->checkAubCaptureEnqueueLimits() && \ + pIntercept->checkAubCaptureKernelSignature( kernel, wd, gws, lws ) )\ + { \ + pIntercept->startAubCapture( \ + __FUNCTION__, kernel, wd, gws, lws, command_queue ); \ + } + +#define CHECK_AUBCAPTURE_STOP( command_queue ) \ + if( pIntercept->config().AubCapture && \ + ( pIntercept->config().AubCaptureIndividualEnqueues || \ + !pIntercept->checkAubCaptureEnqueueLimits() ) ) \ + { \ + pIntercept->stopAubCapture( command_queue ); \ + } + +/////////////////////////////////////////////////////////////////////////////// +// + +// Shared: + +#define SAVE_PROGRAM_HASH( program, hash ) \ + if( pIntercept->config().DevicePerformanceTimeHashTracking || \ + pIntercept->config().DumpProgramSource || \ + pIntercept->config().DumpInputProgramBinaries || \ + pIntercept->config().DumpProgramBinaries || \ + pIntercept->config().DumpProgramSPIRV || \ + pIntercept->config().DumpProgramBuildLogs || \ + pIntercept->config().InjectProgramSource || \ + pIntercept->config().AutoCreateSPIRV || \ + pIntercept->config().AubCaptureUniqueKernels ) \ + { \ + pIntercept->saveProgramHash( program, hash ); \ + } + +// Called from clCreateProgramWithSource: + +#define CREATE_COMBINED_PROGRAM_STRING( count, strings, lengths, singleString, hash ) \ + if( pIntercept->config().DevicePerformanceTimeHashTracking || \ + pIntercept->config().SimpleDumpProgramSource || \ + pIntercept->config().DumpProgramSourceScript || \ + pIntercept->config().DumpProgramSource || \ + pIntercept->config().DumpProgramBinaries || \ + pIntercept->config().DumpProgramSPIRV || \ + pIntercept->config().DumpProgramBuildLogs || \ + pIntercept->config().InjectProgramSource || \ + pIntercept->config().InjectProgramBinaries || \ + pIntercept->config().PrependProgramSource || \ + pIntercept->config().AutoCreateSPIRV || \ + pIntercept->config().AubCaptureUniqueKernels ) \ + { \ + pIntercept->combineProgramStrings( \ + count, \ + strings, \ + lengths, \ + singleString ); \ + hash = pIntercept->hashString( \ + singleString, \ + strlen( singleString ) ); \ + } + +#define INJECT_PROGRAM_SOURCE( count, strings, lengths, singleString, hash ) \ + bool injected = false; \ + if( pIntercept->config().InjectProgramSource ) \ + { \ + injected = pIntercept->injectProgramSource( \ + hash, \ + count, \ + strings, \ + lengths, \ + singleString ); \ + } + +#define PREPEND_PROGRAM_SOURCE( count, strings, lengths, singleString, hash ) \ + if( pIntercept->config().PrependProgramSource ) \ + { \ + injected |= pIntercept->prependProgramSource( \ + hash, \ + count, \ + strings, \ + lengths, \ + singleString ); \ + } + +#define DUMP_PROGRAM_SOURCE( program, singleString, hash ) \ + if( ( injected == false ) && \ + ( pIntercept->config().DumpProgramSource || \ + pIntercept->config().AutoCreateSPIRV ) ) \ + { \ + pIntercept->dumpProgramSource( hash, program, singleString ); \ + } \ + else if( ( injected == false ) && \ + ( pIntercept->config().SimpleDumpProgramSource || \ + pIntercept->config().DumpProgramSourceScript ) ) \ + { \ + pIntercept->dumpProgramSourceScript( program, singleString ); \ + } \ + else \ + { \ + pIntercept->saveProgramNumber( program ); \ + } + +#define DELETE_COMBINED_PROGRAM_STRING( singleString ) \ + delete [] singleString; \ + singleString = NULL; + +// Called from clCreateProgramWithBinary: + +// Note: This does not currently combine program binaries before computing +// the hash. This will work fine for single-device binaries, but may be +// incomplete or incorrect for multi-device binaries. +#define COMPUTE_BINARY_HASH( _num, _lengths, _binaries, _hash ) \ + if( _lengths && _binaries && \ + ( pIntercept->config().DumpInputProgramBinaries || \ + pIntercept->config().DumpProgramBinaries ) ) \ + { \ + _hash = pIntercept->hashString( \ + (const char*)_binaries[0], \ + _lengths[0] ); \ + } + +#define DUMP_INPUT_PROGRAM_BINARIES( _program, _num, _devs, _lengths, _binaries, _hash ) \ + if( pIntercept->config().DumpInputProgramBinaries ) \ + { \ + pIntercept->dumpInputProgramBinaries( \ + _hash, \ + _program, \ + _num, \ + _devs, \ + _lengths, \ + _binaries ); \ + } + +// Called from clCreateProgramWithIL: + +#define COMPUTE_SPIRV_HASH( _length, _il, _hash ) \ + if( _length && _il && pIntercept->config().DumpProgramSPIRV ) \ + { \ + _hash = pIntercept->hashString( \ + (const char*)_il, \ + _length ); \ + } + +#define INJECT_PROGRAM_SPIRV( _length, _il, _injectedSPIRV, _hash ) \ + bool injected = false; \ + if( pIntercept->config().InjectProgramSPIRV ) \ + { \ + injected = pIntercept->injectProgramSPIRV( \ + _hash, \ + _length, \ + _il, \ + _injectedSPIRV ); \ + } + +#define DUMP_PROGRAM_SPIRV( program, length, il, hash ) \ + if( ( injected == false ) && \ + pIntercept->config().DumpProgramSPIRV ) \ + { \ + pIntercept->dumpProgramSPIRV( hash, program, length, il ); \ + } \ + else \ + { \ + pIntercept->saveProgramNumber( program ); \ + } + +#define DELETE_INJECTED_SPIRV( _injectedSPIRV ) \ + delete [] _injectedSPIRV; \ + _injectedSPIRV = NULL; + +// Called from clBuildProgram: + +#define MODIFY_PROGRAM_OPTIONS( program, options, newOptions ) \ + bool modified = false; \ + if( pIntercept->config().InjectProgramSource ) \ + { \ + modified |= pIntercept->injectProgramOptions( \ + program, \ + options, \ + newOptions ); \ + } \ + if( !pIntercept->config().AppendBuildOptions.empty() ) \ + { \ + modified |= pIntercept->appendBuildOptions( \ + options, \ + newOptions ); \ + } + +#define DUMP_PROGRAM_OPTIONS( program, options ) \ + if( ( modified == false ) && \ + ( pIntercept->config().DumpProgramSource || \ + pIntercept->config().DumpProgramBinaries || \ + pIntercept->config().DumpProgramSPIRV ) ) \ + { \ + pIntercept->dumpProgramOptions( program, options ); \ + } \ + else if( ( modified == false ) && \ + ( pIntercept->config().SimpleDumpProgramSource || \ + pIntercept->config().DumpProgramSourceScript ) ) \ + { \ + pIntercept->dumpProgramOptionsScript( program, options ); \ + } + +#define DUMP_OUTPUT_PROGRAM_BINARIES( program ) \ + if( pIntercept->config().DumpProgramBinaries ) \ + { \ + pIntercept->dumpProgramBinary( program ); \ + } + +#define AUTO_CREATE_SPIRV( _program, _options ) \ + if( _program && pIntercept->config().AutoCreateSPIRV ) \ + { \ + pIntercept->autoCreateSPIRV( _program, _options ); \ + } + +#define INCREMENT_PROGRAM_COMPILE_COUNT( _program ) \ + if( _program && \ + ( pIntercept->config().BuildLogging || \ + pIntercept->config().DevicePerformanceTimeHashTracking || \ + pIntercept->config().InjectProgramSource || \ + pIntercept->config().DumpProgramSourceScript || \ + pIntercept->config().DumpProgramSource || \ + pIntercept->config().DumpProgramBinaries || \ + pIntercept->config().DumpProgramSPIRV || \ + pIntercept->config().DumpProgramBuildLogs || \ + pIntercept->config().AutoCreateSPIRV || \ + pIntercept->config().AubCaptureUniqueKernels ) ) \ + { \ + pIntercept->incrementProgramCompileCount( _program ); \ + } + +#define DELETE_MODIFIED_OPTIONS( newOptions ) \ + delete [] newOptions; \ + newOptions = NULL; + +/////////////////////////////////////////////////////////////////////////////// +// +#define INIT_PRECOMPILED_KERNEL_OVERRIDES( context ) \ + if( ( context != NULL ) && \ + ( pIntercept->config().OverrideReadBuffer || \ + pIntercept->config().OverrideWriteBuffer || \ + pIntercept->config().OverrideCopyBuffer || \ + pIntercept->config().OverrideReadImage || \ + pIntercept->config().OverrideWriteImage || \ + pIntercept->config().OverrideCopyImage ) ) \ + { \ + pIntercept->initPrecompiledKernelOverrides( context ); \ + } + +/////////////////////////////////////////////////////////////////////////////// +// +#define INIT_BUILTIN_KERNEL_OVERRIDES( context ) \ + if( ( context != NULL ) && \ + pIntercept->config().OverrideBuiltinKernels ) \ + { \ + pIntercept->initBuiltinKernelOverrides( context ); \ + } + +/////////////////////////////////////////////////////////////////////////////// +// +#define CPU_PERFORMANCE_TIMING_START() \ + uint64_t cpuStart = 0, cpuEnd = 0; \ + if( pIntercept->config().HostPerformanceTiming || \ + pIntercept->config().ChromeCallLogging ) \ + { \ + cpuStart = pIntercept->OS().GetTimer(); \ + } + +#define CPU_PERFORMANCE_TIMING_END() \ + if( pIntercept->config().HostPerformanceTiming || \ + pIntercept->config().ChromeCallLogging ) \ + { \ + cpuEnd = pIntercept->OS().GetTimer(); \ + if( pIntercept->config().HostPerformanceTiming ) \ + { \ + pIntercept->updateHostTimingStats( \ + __FUNCTION__, \ + NULL, \ + cpuStart, \ + cpuEnd ); \ + } \ + if( pIntercept->config().ChromeCallLogging ) \ + { \ + pIntercept->chromeCallLoggingExit( \ + __FUNCTION__, \ + NULL, \ + cpuStart, \ + cpuEnd ); \ + } \ + } + +#define CPU_PERFORMANCE_TIMING_END_KERNEL( _kernel ) \ + if( pIntercept->config().HostPerformanceTiming || \ + pIntercept->config().ChromeCallLogging ) \ + { \ + cpuEnd = pIntercept->OS().GetTimer(); \ + if( pIntercept->config().HostPerformanceTiming ) \ + { \ + pIntercept->updateHostTimingStats( \ + __FUNCTION__, \ + _kernel, \ + cpuStart, \ + cpuEnd ); \ + } \ + if( pIntercept->config().ChromeCallLogging ) \ + { \ + pIntercept->chromeCallLoggingExit( \ + __FUNCTION__, \ + _kernel, \ + cpuStart, \ + cpuEnd ); \ + } \ + } + +/////////////////////////////////////////////////////////////////////////////// +// +#define CREATE_COMMAND_QUEUE_OVERRIDE_INIT( _props, _newprops ) \ + if( pIntercept->config().DevicePerformanceTiming || \ + pIntercept->config().ITTPerformanceTiming || \ + pIntercept->config().ChromePerformanceTiming || \ + pIntercept->config().SIMDSurvey || \ + !pIntercept->config().DevicePerfCounterCustom.empty() || \ + pIntercept->config().InOrderQueue ) \ + { \ + pIntercept->createCommandQueueOverrideInit( \ + _props, \ + _newprops ); \ + } + +#define CREATE_COMMAND_QUEUE_OVERRIDE_CLEANUP( _queue, _newprops ) \ + if( pIntercept->config().DevicePerformanceTiming || \ + pIntercept->config().ITTPerformanceTiming || \ + pIntercept->config().ChromePerformanceTiming || \ + pIntercept->config().SIMDSurvey || \ + !pIntercept->config().DevicePerfCounterCustom.empty() || \ + pIntercept->config().InOrderQueue ) \ + { \ + pIntercept->createCommandQueueOverrideCleanup( \ + _newprops ); \ + } + +#define DEVICE_PERFORMANCE_TIMING_START( pEvent ) \ + uint64_t queuedTime = 0; \ + cl_event local_event = NULL; \ + bool retainAppEvent = true; \ + if( pIntercept->config().DevicePerformanceTiming || \ + pIntercept->config().ITTPerformanceTiming || \ + pIntercept->config().ChromePerformanceTiming || \ + pIntercept->config().SIMDSurvey || \ + !pIntercept->config().DevicePerfCounterCustom.empty() ) \ + { \ + queuedTime = pIntercept->OS().GetTimer(); \ + if( pEvent == NULL ) \ + { \ + pEvent = &local_event; \ + retainAppEvent = false; \ + } \ + } + +#define DEVICE_PERFORMANCE_TIMING_END( pEvent ) \ + if( ( pIntercept->config().DevicePerformanceTiming || \ + pIntercept->config().ITTPerformanceTiming || \ + pIntercept->config().ChromePerformanceTiming || \ + pIntercept->config().SIMDSurvey || \ + !pIntercept->config().DevicePerfCounterCustom.empty() ) && \ + ( pEvent != NULL ) ) \ + { \ + if( pIntercept->config().DevicePerformanceTimingSkipUnmap && \ + std::string(__FUNCTION__) == "clEnqueueUnmapMemObject" ) \ + { \ + if( retainAppEvent == false ) \ + { \ + pIntercept->dispatch().clReleaseEvent( pEvent[0] ); \ + pEvent = NULL; \ + } \ + } \ + else \ + { \ + pIntercept->addTimingEvent( \ + __FUNCTION__, \ + queuedTime, \ + NULL, \ + 0, NULL, NULL, \ + pEvent[0] ); \ + if( retainAppEvent ) \ + { \ + pIntercept->dispatch().clRetainEvent( pEvent[0] ); \ + } \ + else \ + { \ + pEvent = NULL; \ + } \ + } \ + } + +#define DEVICE_PERFORMANCE_TIMING_END_KERNEL( pEvent, kernel, wd, gws, lws )\ + if( ( pIntercept->config().DevicePerformanceTiming || \ + pIntercept->config().ITTPerformanceTiming || \ + pIntercept->config().ChromePerformanceTiming || \ + pIntercept->config().SIMDSurvey || \ + !pIntercept->config().DevicePerfCounterCustom.empty() ) && \ + ( pEvent != NULL ) ) \ + { \ + pIntercept->addTimingEvent( \ + __FUNCTION__, \ + queuedTime, \ + kernel, \ + wd, gws, lws, \ + pEvent[0] ); \ + if( retainAppEvent ) \ + { \ + pIntercept->dispatch().clRetainEvent( pEvent[0] ); \ + } \ + else \ + { \ + pEvent = NULL; \ + } \ + } + +#define DEVICE_PERFORMANCE_TIMING_CHECK() \ + if( pIntercept->config().DevicePerformanceTiming || \ + pIntercept->config().ITTPerformanceTiming || \ + pIntercept->config().ChromePerformanceTiming || \ + pIntercept->config().SIMDSurvey || \ + !pIntercept->config().DevicePerfCounterCustom.empty() ) \ + { \ + pIntercept->checkTimingEvents(); \ + } + +/////////////////////////////////////////////////////////////////////////////// +// +#define SIMD_SURVEY_CREATE_PROGRAM_FROM_SOURCE( _program, _context, _count, _strings, _lengths ) \ + if( pIntercept->config().SIMDSurvey && \ + _program != NULL ) \ + { \ + pIntercept->SIMDSurveyCreateProgramFromSource( \ + _program, \ + _context, \ + _count, \ + _strings, \ + _lengths ); \ + } + +#define SIMD_SURVEY_BUILD_PROGRAM( _program, _numDevices, _deviceList, _options ) \ + if( pIntercept->config().SIMDSurvey && \ + _program != NULL ) \ + { \ + pIntercept->SIMDSurveyBuildProgram( \ + _program, \ + _numDevices, \ + _deviceList, \ + _options ); \ + } + +#define SIMD_SURVEY_CREATE_KERNEL( _program, _kernel, _name ) \ + if( pIntercept->config().SIMDSurvey && \ + _kernel != NULL ) \ + { \ + pIntercept->SIMDSurveyCreateKernel( \ + _program, \ + _kernel, \ + _name ); \ + } + +#define SIMD_SURVEY_SET_KERNEL_ARG( _kernel, _argIndex, _argSize, _argValue ) \ + if( pIntercept->config().SIMDSurvey ) \ + { \ + pIntercept->SIMDSurveySetKernelArg( \ + _kernel, \ + _argIndex, \ + _argSize, \ + _argValue ); \ + } + +#define SIMD_SURVEY_NDRANGE_KERNEL( _kernel ) \ + if( pIntercept->config().SIMDSurvey ) \ + { \ + pIntercept->SIMDSurveyNDRangeKernel( _kernel ); \ + } + +/////////////////////////////////////////////////////////////////////////////// +// +inline void CLIntercept::saveProgramNumber( const cl_program program ) +{ + m_OS.EnterCriticalSection(); + + m_ProgramNumberMap[ program ] = m_ProgramNumber; + m_ProgramNumber++; + + m_OS.LeaveCriticalSection(); +} + +inline unsigned int CLIntercept::getProgramNumber() const +{ + return m_ProgramNumber; +} + +/////////////////////////////////////////////////////////////////////////////// +// +inline cl_device_type CLIntercept::filterDeviceType( cl_device_type device_type ) const +{ + if( m_Config.DeviceType & device_type ) + { + device_type = CL_DEVICE_TYPE_ALL; + } + device_type &= (cl_device_type)m_Config.DeviceTypeFilter; + return device_type; +} + +/////////////////////////////////////////////////////////////////////////////// +// +#if defined(USE_ITT) + +inline __itt_domain* CLIntercept::ittDomain() const +{ + return m_ITTDomain; +} + +#endif + +/////////////////////////////////////////////////////////////////////////////// +// +inline void CLIntercept::logCL_GLTextureDetails( cl_mem image, cl_GLenum target, cl_GLint miplevel, cl_GLuint texture ) +{ + CLIntercept* pIntercept = this; + + cl_image_format cl_format = { 0 }; + size_t cl_elementSize = 0; + size_t cl_rowPitch = 0; + size_t cl_slicePitch = 0; + size_t cl_width = 0; + size_t cl_height = 0; + size_t cl_depth = 0; + + cl_int subErrorCode = CL_SUCCESS; + + if( subErrorCode == CL_SUCCESS ) + { + subErrorCode = dispatch().clGetImageInfo( + image, + CL_IMAGE_FORMAT, + sizeof(cl_format), + &cl_format, + NULL); + } + + if( subErrorCode == CL_SUCCESS ) + { + subErrorCode = dispatch().clGetImageInfo( + image, + CL_IMAGE_ELEMENT_SIZE, + sizeof(cl_elementSize), + &cl_elementSize, + NULL); + } + + if( subErrorCode == CL_SUCCESS ) + { + subErrorCode = dispatch().clGetImageInfo( + image, + CL_IMAGE_ROW_PITCH, + sizeof(cl_rowPitch), + &cl_rowPitch, + NULL); + } + + if( subErrorCode == CL_SUCCESS ) + { + subErrorCode = dispatch().clGetImageInfo( + image, + CL_IMAGE_SLICE_PITCH, + sizeof(cl_slicePitch), + &cl_slicePitch, + NULL); + } + + if( subErrorCode == CL_SUCCESS ) + { + subErrorCode = dispatch().clGetImageInfo( + image, + CL_IMAGE_WIDTH, + sizeof(cl_width), + &cl_width, + NULL); + } + + if( subErrorCode == CL_SUCCESS ) + { + subErrorCode = dispatch().clGetImageInfo( + image, + CL_IMAGE_HEIGHT, + sizeof(cl_height), + &cl_height, + NULL); + } + + if( subErrorCode == CL_SUCCESS ) + { + subErrorCode = dispatch().clGetImageInfo( + image, + CL_IMAGE_DEPTH, + sizeof(cl_depth), + &cl_depth, + NULL); + } + + CALL_LOGGING_INFO( + "CL Channel Order = %s, " + "CL Channel Data Type = %s, " + "CL Row Pitch = %d, " + "CL Slice Pitch = %d, " + "CL Width = %d, " + "CL Height = %d, " + "CL Depth = %d, ", + enumName().name( cl_format.image_channel_order ).c_str(), + enumName().name( cl_format.image_channel_data_type ).c_str(), + cl_rowPitch, + cl_slicePitch, + cl_width, + cl_height, + cl_depth ); + + // OpenGL.lib is not linked into CLIntercept - the OpenGL calls are performed by using GetProcAddress, + // which is only available on windows. +#ifdef _WIN32 + HMODULE glModule = GetModuleHandle( "Opengl32.dll" ); + + if( glModule != NULL ) + { + FARPROC ptrGetTexLevel = GetProcAddress( glModule, "glGetTexLevelParameteriv" ); + FARPROC ptrGetIntegerv = GetProcAddress( glModule, "glGetIntegerv" ); + FARPROC ptrBindTexture = GetProcAddress( glModule, "glBindTexture" ); + FARPROC ptrGetError = GetProcAddress( glModule, "glGetError" ); + + if( ( ptrGetTexLevel != NULL ) && + ( ptrGetIntegerv != NULL ) && + ( ptrBindTexture != NULL ) && + ( ptrGetError != NULL ) ) + { + PFNGLGETTEXLEVELPARAMETERIVPROC _glGetTexLevel = (PFNGLGETTEXLEVELPARAMETERIVPROC)ptrGetTexLevel; + PFNGLGETINTEGERVPROC _glGetInteger = (PFNGLGETINTEGERVPROC)ptrGetIntegerv; + PFNGLBINDTEXTUREPROC _glBindTexture = (PFNGLBINDTEXTUREPROC)ptrBindTexture; + PFNGLGETERRORPROC _glGetError = (PFNGLGETERRORPROC)ptrGetError; + + GLenum gl_error = _glGetError(); + + if( gl_error == GL_FALSE ) + { + GLint restoreTextureId = 0; + + // Save the currently bound texture - we need to to rebind a different + // texture to query it. + switch( target ) + { + case GL_TEXTURE_1D: + _glGetInteger( GL_TEXTURE_BINDING_1D, &restoreTextureId ); + break; + case GL_TEXTURE_1D_ARRAY: + _glGetInteger( GL_TEXTURE_BINDING_1D_ARRAY, &restoreTextureId ); + break; + case GL_TEXTURE_2D: + _glGetInteger( GL_TEXTURE_BINDING_2D, &restoreTextureId ); + break; + case GL_TEXTURE_2D_ARRAY: + _glGetInteger( GL_TEXTURE_BINDING_2D_ARRAY, &restoreTextureId ); + break; + case GL_TEXTURE_3D: + _glGetInteger( GL_TEXTURE_BINDING_3D, &restoreTextureId ); + break; + case GL_TEXTURE_CUBE_MAP: + _glGetInteger( GL_TEXTURE_BINDING_CUBE_MAP, &restoreTextureId ); + break; + case GL_TEXTURE_BUFFER: + _glGetInteger( GL_TEXTURE_BINDING_BUFFER, &restoreTextureId ); + break; + default: + // unexpected texture type + gl_error = GL_TRUE; + break; + } + + if( gl_error == GL_FALSE ) + { + GLint gl_width = 0; + GLint gl_height = 0; + GLint gl_depth = 0; + GLint gl_internal_format = 0; + GLint gl_buffer_size = 0; + GLint gl_buffer_offset = 0; + GLint gl_compressed_texture = GL_FALSE; + + // Bind the texture we want to query + _glBindTexture( target, texture ); + gl_error = _glGetError(); + + + if( gl_error == GL_FALSE ) + { + _glGetTexLevel( + target, + miplevel > 0 ? miplevel : 0, + GL_TEXTURE_INTERNAL_FORMAT, + &gl_internal_format ); + gl_error = _glGetError(); + } + + if( gl_error == GL_FALSE ) + { + _glGetTexLevel( + target, + miplevel > 0 ? miplevel : 0, + GL_TEXTURE_WIDTH, + &gl_width ); + gl_error = _glGetError(); + } + + if( gl_error == GL_FALSE ) + { + _glGetTexLevel( + target, + miplevel > 0 ? miplevel : 0, + GL_TEXTURE_HEIGHT, + &gl_height ); + gl_error = _glGetError(); + } + + if( gl_error == GL_FALSE ) + { + _glGetTexLevel( + target, + miplevel > 0 ? miplevel : 0, + GL_TEXTURE_DEPTH, + &gl_depth ); + gl_error = _glGetError(); + } + + if( gl_error == GL_FALSE ) + { + _glGetTexLevel( + target, + miplevel > 0 ? miplevel : 0, + GL_TEXTURE_BUFFER_SIZE, + &gl_buffer_size ); + gl_error = _glGetError(); + } + + if( gl_error == GL_FALSE ) + { + _glGetTexLevel( + target, + miplevel > 0 ? miplevel : 0, + GL_TEXTURE_BUFFER_OFFSET, + &gl_buffer_offset ); + gl_error = _glGetError(); + } + + // restore original bound texture + _glBindTexture( target, restoreTextureId ); + gl_error = _glGetError(); + + CALL_LOGGING_INFO( + "GL Internal Format = %s (%d), " + "GL Width = %d, " + "GL Height = %d, " + "GL Depth = %d, " + "GL Buffer Size = %d, " + "GL Buffer Offset = %d ", + enumName().name_gl( gl_internal_format ).c_str(), + gl_internal_format, + gl_width, + gl_height, + gl_depth, + gl_buffer_size, + gl_buffer_offset ); + } + } + } + } +#endif +} + +/////////////////////////////////////////////////////////////////////////////// +// +extern CLIntercept* g_pIntercept; + +inline CLIntercept* GetIntercept() +{ + return g_pIntercept; +} diff --git a/Src/main.cpp b/Src/main.cpp new file mode 100644 index 00000000..c36a620b --- /dev/null +++ b/Src/main.cpp @@ -0,0 +1,86 @@ +/* +// Copyright (c) 2018 Intel Corporation +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +*/ + +#include "common.h" +#include "intercept.h" +#ifdef __ANDROID__ +#include +#endif + +CLIntercept* g_pIntercept = NULL; + +#if defined(_WIN32) + +#include + +BOOL APIENTRY DllMain(HINSTANCE hInstance, DWORD dwReasonForCall, LPVOID lpReserved) +{ + switch(dwReasonForCall) { + case DLL_PROCESS_ATTACH: + if( CLIntercept::Create( hInstance, g_pIntercept ) == false ) + { + return FALSE; + } + break; + + case DLL_PROCESS_DETACH: + CLIntercept::Delete( g_pIntercept ); + break; + + case DLL_THREAD_ATTACH: + break; + + case DLL_THREAD_DETACH: + break; + + default: + CLI_ASSERT(0); + break; + } + + return TRUE; +} + +#elif defined(__linux__) || defined(__APPLE__) + +void __attribute__((constructor)) CLIntercept_Load(void); +void __attribute__((destructor)) CLIntercept_Unload(void); + +void CLIntercept_Load(void) +{ +#ifdef __ANDROID__ + __android_log_print( ANDROID_LOG_INFO, "clIntercept", ">>Load.pid=%d\n", getpid() ); +#endif + CLIntercept::Create( NULL, g_pIntercept ); +#ifdef __ANDROID__ + __android_log_print( ANDROID_LOG_INFO, "clIntercept", "< [ ...]) +# +# Returns the refspec and sha hash of the current head revision +# +# git_describe( [ ...]) +# +# Returns the results of git describe on the source tree, and adjusting +# the output so that it tests false if an error occurs. +# +# git_get_exact_tag( [ ...]) +# +# Returns the results of git describe --exact-match on the source tree, +# and adjusting the output so that it tests false if there was no exact +# matching tag. +# +# Requires CMake 2.6 or newer (uses the 'function' command) +# +# Original Author: +# 2009-2010 Ryan Pavlik +# http://academic.cleardefinition.com +# Iowa State University HCI Graduate Program/VRAC +# +# Copyright Iowa State University 2009-2010. +# Distributed under the Boost Software License, Version 1.0. +# (See accompanying file LICENSE_1_0.txt or copy at +# http://www.boost.org/LICENSE_1_0.txt) + +if(__get_git_revision_description) + return() +endif() +set(__get_git_revision_description YES) + +# We must run the following at "include" time, not at function call time, +# to find the path to this module rather than the path to a calling list file +get_filename_component(_gitdescmoddir ${CMAKE_CURRENT_LIST_FILE} PATH) + +function(get_git_head_revision _refspecvar _hashvar) + set(GIT_PARENT_DIR "${CMAKE_CURRENT_SOURCE_DIR}") + set(GIT_DIR "${GIT_PARENT_DIR}/.git") + while(NOT EXISTS "${GIT_DIR}") # .git dir not found, search parent directories + set(GIT_PREVIOUS_PARENT "${GIT_PARENT_DIR}") + get_filename_component(GIT_PARENT_DIR ${GIT_PARENT_DIR} PATH) + if(GIT_PARENT_DIR STREQUAL GIT_PREVIOUS_PARENT) + # We have reached the root directory, we are not in git + set(${_refspecvar} "GITDIR-NOTFOUND" PARENT_SCOPE) + set(${_hashvar} "GITDIR-NOTFOUND" PARENT_SCOPE) + return() + endif() + set(GIT_DIR "${GIT_PARENT_DIR}/.git") + endwhile() + # check if this is a submodule + if(NOT IS_DIRECTORY ${GIT_DIR}) + file(READ ${GIT_DIR} submodule) + string(REGEX REPLACE "gitdir: (.*)\n$" "\\1" GIT_DIR_RELATIVE ${submodule}) + get_filename_component(SUBMODULE_DIR ${GIT_DIR} PATH) + get_filename_component(GIT_DIR ${SUBMODULE_DIR}/${GIT_DIR_RELATIVE} ABSOLUTE) + endif() + set(GIT_DATA "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/git-data") + if(NOT EXISTS "${GIT_DATA}") + file(MAKE_DIRECTORY "${GIT_DATA}") + endif() + + if(NOT EXISTS "${GIT_DIR}/HEAD") + return() + endif() + set(HEAD_FILE "${GIT_DATA}/HEAD") + configure_file("${GIT_DIR}/HEAD" "${HEAD_FILE}" COPYONLY) + + configure_file("${_gitdescmoddir}/GetGitRevisionDescription.cmake.in" + "${GIT_DATA}/grabRef.cmake" + @ONLY) + include("${GIT_DATA}/grabRef.cmake") + + set(${_refspecvar} "${HEAD_REF}" PARENT_SCOPE) + set(${_hashvar} "${HEAD_HASH}" PARENT_SCOPE) +endfunction() + +function(git_describe _var) + if(NOT GIT_FOUND) + find_package(Git QUIET) + endif() + get_git_head_revision(refspec hash) + if(NOT GIT_FOUND) + set(${_var} "GIT-NOTFOUND" PARENT_SCOPE) + return() + endif() + if(NOT hash) + set(${_var} "HEAD-HASH-NOTFOUND" PARENT_SCOPE) + return() + endif() + + # TODO sanitize + #if((${ARGN}" MATCHES "&&") OR + # (ARGN MATCHES "||") OR + # (ARGN MATCHES "\\;")) + # message("Please report the following error to the project!") + # message(FATAL_ERROR "Looks like someone's doing something nefarious with git_describe! Passed arguments ${ARGN}") + #endif() + + #message(STATUS "Arguments to execute_process: ${ARGN}") + + execute_process(COMMAND + "${GIT_EXECUTABLE}" + describe + ${hash} + ${ARGN} + WORKING_DIRECTORY + "${CMAKE_CURRENT_SOURCE_DIR}" + RESULT_VARIABLE + res + OUTPUT_VARIABLE + out + ERROR_QUIET + OUTPUT_STRIP_TRAILING_WHITESPACE) + if(NOT res EQUAL 0) + set(out "${out}-${res}-NOTFOUND") + endif() + + set(${_var} "${out}" PARENT_SCOPE) +endfunction() + +function(git_get_exact_tag _var) + git_describe(out --exact-match ${ARGN}) + set(${_var} "${out}" PARENT_SCOPE) +endfunction() diff --git a/cmake_modules/GetGitRevisionDescription.cmake.in b/cmake_modules/GetGitRevisionDescription.cmake.in new file mode 100644 index 00000000..6d8b708e --- /dev/null +++ b/cmake_modules/GetGitRevisionDescription.cmake.in @@ -0,0 +1,41 @@ +# +# Internal file for GetGitRevisionDescription.cmake +# +# Requires CMake 2.6 or newer (uses the 'function' command) +# +# Original Author: +# 2009-2010 Ryan Pavlik +# http://academic.cleardefinition.com +# Iowa State University HCI Graduate Program/VRAC +# +# Copyright Iowa State University 2009-2010. +# Distributed under the Boost Software License, Version 1.0. +# (See accompanying file LICENSE_1_0.txt or copy at +# http://www.boost.org/LICENSE_1_0.txt) + +set(HEAD_HASH) + +file(READ "@HEAD_FILE@" HEAD_CONTENTS LIMIT 1024) + +string(STRIP "${HEAD_CONTENTS}" HEAD_CONTENTS) +if(HEAD_CONTENTS MATCHES "ref") + # named branch + string(REPLACE "ref: " "" HEAD_REF "${HEAD_CONTENTS}") + if(EXISTS "@GIT_DIR@/${HEAD_REF}") + configure_file("@GIT_DIR@/${HEAD_REF}" "@GIT_DATA@/head-ref" COPYONLY) + else() + configure_file("@GIT_DIR@/packed-refs" "@GIT_DATA@/packed-refs" COPYONLY) + file(READ "@GIT_DATA@/packed-refs" PACKED_REFS) + if(${PACKED_REFS} MATCHES "([0-9a-z]*) ${HEAD_REF}") + set(HEAD_HASH "${CMAKE_MATCH_1}") + endif() + endif() +else() + # detached HEAD + configure_file("@GIT_DIR@/HEAD" "@GIT_DATA@/head-ref" COPYONLY) +endif() + +if(NOT HEAD_HASH) + file(READ "@GIT_DATA@/head-ref" HEAD_HASH LIMIT 1024) + string(STRIP "${HEAD_HASH}" HEAD_HASH) +endif() diff --git a/config/CLIConfig.cpp b/config/CLIConfig.cpp new file mode 100644 index 00000000..aad5091d --- /dev/null +++ b/config/CLIConfig.cpp @@ -0,0 +1,1116 @@ +/* +// Copyright (c) 2018 Intel Corporation +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +*/ + +#include "CLIConfig.h" + +#include + +#include + +std::wstring ToWString( const std::string& str ) +{ + int size_needed = MultiByteToWideChar( CP_UTF8, 0, &str[0], (int)str.size(), NULL, 0 ); + std::wstring wstrTo( size_needed, 0 ); + MultiByteToWideChar( CP_UTF8, 0, &str[0], (int)str.size(), &wstrTo[0], size_needed ); + return wstrTo; +} +std::wstring ToWString( const std::vector& v ) +{ + int size_needed = MultiByteToWideChar( CP_UTF8, 0, &v[0], (int)v.size(), NULL, 0 ); + std::wstring wstrTo( size_needed, 0 ); + MultiByteToWideChar( CP_UTF8, 0, &v[0], (int)v.size(), &wstrTo[0], size_needed ); + return wstrTo; +} +std::string ToString( const std::wstring &wstr) +{ + int size_needed = WideCharToMultiByte(CP_UTF8, 0, &wstr[0], (int)wstr.size(), NULL, 0, NULL, NULL); + std::string strTo( size_needed, 0 ); + WideCharToMultiByte(CP_UTF8, 0, &wstr[0], (int)wstr.size(), &strTo[0], size_needed, NULL, NULL); + return strTo; +} + +CVariableState::CVariableState() +{ + CurrentIconState.resize(cNumVars, ICON_STATE_DEFAULT); + CurrentIntValue.resize(cNumVars); + CurrentStringValue.resize(cNumVars); + + SetDefaultStates(); +} + +// Initializes Variable State and Icon State to defaults +void CVariableState::SetDefaultStates() +{ + for (int i = 0; i < cNumVars; i++) + { + CurrentIntValue[i] = cVars[i].defIntValue; + CurrentStringValue[i] = ToWString(cVars[i].defStrValue); + CurrentIconState[i] = + cVars[i].Type == CONTROL_TYPE_SEPARATOR ? + ICON_STATE_SEPARATOR : + ICON_STATE_DEFAULT; + } +} + +// Updates all Icon States from Variable States +void CVariableState::UpdateIconStates() +{ + for (int i = 0; i < cNumVars; i++) + { + switch (cVars[i].Type) + { + case CONTROL_TYPE_BOOL: + case CONTROL_TYPE_INT: + if (CurrentIntValue[i] == cVars[i].defIntValue) + { + CurrentIconState[i] = ICON_STATE_DEFAULT; + } + else + { + CurrentIconState[i] = ICON_STATE_NONDEFAULT; + } + break; + case CONTROL_TYPE_STRING: + if (CurrentStringValue[i] == ToWString(cVars[i].defStrValue)) + { + CurrentIconState[i] = ICON_STATE_DEFAULT; + } + else + { + CurrentIconState[i] = ICON_STATE_NONDEFAULT; + } + break; + case CONTROL_TYPE_SEPARATOR: + CurrentIconState[i] = ICON_STATE_SEPARATOR; + break; + } + } +} + + +CControlsPage::CControlsPage( CVariableState* pVariableState ) : + CPropertyPage(IDD_VARS_PAGE, IDS_USER), + m_pVariableState( pVariableState ) +{ +} + +BOOL CControlsPage::OnInitDialog() +{ + CPropertyPage::OnInitDialog(); + + HICON hIcon[NUM_ICON_STATES]; + + m_ImageList.Create(16, 16, 0, NUM_ICON_STATES, 0); + hIcon[ICON_STATE_NONDEFAULT] = AfxGetApp()->LoadIcon(IDI_STATE_NONDEFAULT); + hIcon[ICON_STATE_MODIFIED_NONDEFAULT] = AfxGetApp()->LoadIcon(IDI_STATE_MODIFIED_NONDEFAULT); + hIcon[ICON_STATE_MODIFIED_DEFAULT] = AfxGetApp()->LoadIcon(IDI_STATE_MODIFIED_DEFAULT); + hIcon[ICON_STATE_DEFAULT] = AfxGetApp()->LoadIcon(IDI_STATE_DEFAULT); + hIcon[ICON_STATE_SEPARATOR] = AfxGetApp()->LoadIcon(IDI_STATE_SEPARATOR); + + for( int i = 0; i < NUM_ICON_STATES; i++ ) + { + m_ImageList.Add(hIcon[i]); + } + + m_pListCtrl = (CListCtrl*)GetDlgItem(IDC_VAR_LIST); + m_pListCtrl->SetImageList(&m_ImageList, LVSIL_SMALL); + + m_pListCtrl->InsertColumn(0, L"Header"); + for( int i = 0; i < cNumVars; i++ ) + { + m_pListCtrl->InsertItem( + i, + ToWString( cVars[i].Name ).c_str(), + m_pVariableState->CurrentIconState[i] ); + } + m_pListCtrl->SetColumnWidth(0, LVSCW_AUTOSIZE); + + m_SelectedItem = 0; + m_pListCtrl->SetFocus(); + m_pListCtrl->SetItemState(m_SelectedItem, LVIS_SELECTED, LVIS_SELECTED); + + // Read previous Variable State from the registry + ReadSettingsFromRegistry(); + + // Sync Icon State with Variable State + m_pVariableState->UpdateIconStates(); + UpdateIcons(); + + SetModified(FALSE); + + return TRUE; +} + +// Updates all Icons based on Icon State +void CControlsPage::UpdateIcons() const +{ + for( int i = 0; i < cNumVars; i++ ) + { + UpdateIcon(i); + } +} + +// Updates one Icon based on Icon State +void CControlsPage::UpdateIcon( int vIndex ) const +{ + m_pListCtrl->SetItem( + vIndex, + 0, + LVIF_TEXT | LVIF_IMAGE, + ToWString( cVars[vIndex].Name ).c_str(), + m_pVariableState->CurrentIconState[vIndex], + 0, + 0, + 0 ); +} + +void CControlsPage::ResetDefaults() +{ + m_pVariableState->SetDefaultStates(); + UpdateIcons(); + + WriteSettingsToRegistry(); + + UpdateControl(m_SelectedItem); + + SetModified(FALSE); +} + +// Updates Variable State from the Registry +void CControlsPage::ReadSettingsFromRegistry() +{ + HKEY key; + + LSTATUS success = RegOpenKeyEx( + HKEY_CURRENT_USER, + REGISTRY_KEY, + 0, + KEY_READ, + &key ); + if( success == ERROR_SUCCESS ) + { + for( int i = 0; i < cNumVars; i++ ) + { + std::vector regQuery(128); + + unsigned long regQuerySize = regQuery.size() * sizeof(regQuery[0]); + + success = RegQueryValueEx( + key, + ToWString(cVars[i].Name).c_str(), + NULL, + NULL, + (LPBYTE)regQuery.data(), + ®QuerySize ); + + if( success == ERROR_SUCCESS ) + { + switch (cVars[i].Type) + { + case CONTROL_TYPE_BOOL: + { + int* pInt = (int*)regQuery.data(); + m_pVariableState->CurrentIntValue[i] = pInt[0] ? TRUE : FALSE; + } + break; + case CONTROL_TYPE_INT: + { + int* pInt = (int*)regQuery.data(); + m_pVariableState->CurrentIntValue[i] = pInt[0]; + } + break; + case CONTROL_TYPE_STRING: + { + m_pVariableState->CurrentStringValue[i].assign( regQuery.begin(), regQuery.end() ); + } + break; + case CONTROL_TYPE_SEPARATOR: + break; + } + + m_pVariableState->CurrentIconState[i] = ICON_STATE_NONDEFAULT; + } + + } + + RegCloseKey(key); + } +} + +// Writes Variable State to the Registry +void CControlsPage::WriteSettingsToRegistry() const +{ + HKEY key; + + // 32-bit registry keys. + LSTATUS success = RegCreateKeyEx( + HKEY_CURRENT_USER, + REGISTRY_KEY, + 0, + NULL, + REG_OPTION_NON_VOLATILE, + KEY_SET_VALUE, + NULL, + &key, + NULL ); + if( success == ERROR_SUCCESS ) + { + WriteSettingsToRegistryHelper(key); + RegCloseKey(key); + } + + // 64-bit registry keys. This probably isn't needed, but better + // safe than sorry. + success = RegCreateKeyEx( + HKEY_CURRENT_USER, + REGISTRY_KEY, + 0, + NULL, + REG_OPTION_NON_VOLATILE, + KEY_WOW64_64KEY | KEY_SET_VALUE, + NULL, + &key, + NULL ); + if( success == ERROR_SUCCESS ) + { + WriteSettingsToRegistryHelper(key); + RegCloseKey(key); + } +} + +void CControlsPage::WriteSettingsToRegistryHelper( HKEY key ) const +{ + for (int i = 0; i < cNumVars; i++) + { + switch (cVars[i].Type) { + case CONTROL_TYPE_BOOL: + case CONTROL_TYPE_INT: + if( m_pVariableState->CurrentIntValue[i] != cVars[i].defIntValue ) + { + DWORD dwValue = (DWORD)m_pVariableState->CurrentIntValue[i]; + + RegSetValueEx( + key, + ToWString(cVars[i].Name).c_str(), + 0, + REG_DWORD, + (CONST BYTE *)&dwValue, + sizeof(DWORD)); + } + else + { + RegDeleteValue( + key, + ToWString(cVars[i].Name).c_str() ); + } + break; + case CONTROL_TYPE_STRING: + if( m_pVariableState->CurrentStringValue[i] != ToWString(cVars[i].defStrValue) ) + { + RegSetValueEx( + key, + ToWString(cVars[i].Name).c_str(), + 0, + REG_SZ, + (const BYTE*)m_pVariableState->CurrentStringValue[i].data(), + m_pVariableState->CurrentStringValue[i].length() * sizeof(wchar_t) ); + } + else + { + RegDeleteValue( + key, + ToWString(cVars[i].Name).c_str() ); + } + break; + case CONTROL_TYPE_SEPARATOR: + break; + } + } +} + +// Updates a Control Based on Current State +void CControlsPage::UpdateControl( int vIndex ) +{ + CButton *cCurEnabled, *cDefEnabled; + CEdit *cCurEdit, *cDefEdit; + + cCurEnabled = (CButton *)GetDlgItem(IDC_CHECK_CUR_ENABLED); + cDefEnabled = (CButton *)GetDlgItem(IDC_CHECK_DEF_ENABLED); + cCurEdit = (CEdit *)GetDlgItem(IDC_CUR_EDIT); + cDefEdit = (CEdit *)GetDlgItem(IDC_DEF_EDIT); + + switch (cVars[vIndex].Type) + { + case CONTROL_TYPE_BOOL: + cCurEnabled->ShowWindow(SW_SHOW); + cDefEnabled->ShowWindow(SW_SHOW); + cCurEdit->ShowWindow(SW_HIDE); + cDefEdit->ShowWindow(SW_HIDE); + + if (m_pVariableState->CurrentIntValue[vIndex]) + { + cCurEnabled->SetCheck(TRUE); + } + else + { + cCurEnabled->SetCheck(FALSE); + } + + if (cVars[vIndex].defIntValue) + { + cDefEnabled->SetCheck(TRUE); + } + else + { + cDefEnabled->SetCheck(FALSE); + } + break; + + // For int and String variables, setting the + // current and default value causes the ON_EN_CHANGE + // method to get called spuriously setting the varsChanged flag + // to workaround this, we first hide the edit control + // set the text to the required value, and then show the control + // The varChange method checks for visibility before doing anything + case CONTROL_TYPE_INT: + cCurEdit->ShowWindow(SW_HIDE); + SetDlgItemInt(IDC_CUR_EDIT, m_pVariableState->CurrentIntValue[vIndex]); + cCurEdit->ShowWindow(SW_SHOW); + cDefEdit->ShowWindow(SW_SHOW); + SetDlgItemInt(IDC_DEF_EDIT, cVars[vIndex].defIntValue); + cCurEnabled->ShowWindow(SW_HIDE); + cDefEnabled->ShowWindow(SW_HIDE); + break; + case CONTROL_TYPE_STRING: + cCurEdit->ShowWindow(SW_HIDE); + SetDlgItemText(IDC_CUR_EDIT, m_pVariableState->CurrentStringValue[vIndex].c_str()); + cCurEdit->ShowWindow(SW_SHOW); + cDefEdit->ShowWindow(SW_SHOW); + SetDlgItemText(IDC_DEF_EDIT, ToWString(cVars[vIndex].defStrValue).c_str()); + cCurEnabled->ShowWindow(SW_HIDE); + cDefEnabled->ShowWindow(SW_HIDE); + break; + case CONTROL_TYPE_SEPARATOR: + cCurEnabled->ShowWindow(SW_HIDE); + cDefEnabled->ShowWindow(SW_HIDE); + cCurEdit->ShowWindow(SW_HIDE); + cDefEdit->ShowWindow(SW_HIDE); + break; + } + + // Set the help text for the variable + SetDlgItemText(IDC_EDIT_HELP, ToWString(cVars[vIndex].HelpText).c_str()); +} + +// Updates a Variable State and Icon State based on a Control +void CControlsPage::UpdateVarState(int vIndex) +{ + CButton *cCurEnabled = (CButton*)GetDlgItem(IDC_CHECK_CUR_ENABLED); + + switch (cVars[vIndex].Type) + { + case CONTROL_TYPE_BOOL: + if (cCurEnabled->GetCheck() == TRUE) + { + m_pVariableState->CurrentIntValue[vIndex] = TRUE; + } + else + { + m_pVariableState->CurrentIntValue[vIndex] = FALSE; + } + break; + + case CONTROL_TYPE_INT: + m_pVariableState->CurrentIntValue[vIndex] = GetDlgItemInt(IDC_CUR_EDIT); + break; + + case CONTROL_TYPE_STRING: + { + int length = GetDlgItem( IDC_CUR_EDIT )->GetWindowTextLength(); + + m_pVariableState->CurrentStringValue[vIndex].resize( length ); + + GetDlgItemText( + IDC_CUR_EDIT, + &m_pVariableState->CurrentStringValue[vIndex][0], + m_pVariableState->CurrentStringValue[vIndex].size() * sizeof(wchar_t) ); + } + break; + + case CONTROL_TYPE_SEPARATOR: + break; + } + + switch (cVars[vIndex].Type) + { + case CONTROL_TYPE_BOOL: + case CONTROL_TYPE_INT: + if (m_pVariableState->CurrentIntValue[vIndex] == cVars[vIndex].defIntValue) + { + if (m_pVariableState->CurrentIconState[vIndex] == ICON_STATE_MODIFIED_NONDEFAULT) + { + m_pVariableState->CurrentIconState[vIndex] = ICON_STATE_DEFAULT; + } + else if (m_pVariableState->CurrentIconState[vIndex] == ICON_STATE_NONDEFAULT) + { + m_pVariableState->CurrentIconState[vIndex] = ICON_STATE_MODIFIED_DEFAULT; + } + } + else + { + if (m_pVariableState->CurrentIconState[vIndex] == ICON_STATE_DEFAULT) + { + m_pVariableState->CurrentIconState[vIndex] = ICON_STATE_MODIFIED_NONDEFAULT; + } + else if (m_pVariableState->CurrentIconState[vIndex] == ICON_STATE_MODIFIED_DEFAULT) + { + m_pVariableState->CurrentIconState[vIndex] = ICON_STATE_NONDEFAULT; + } + } + break; + case CONTROL_TYPE_STRING: + if (m_pVariableState->CurrentStringValue[vIndex] == ToWString(cVars[vIndex].defStrValue)) + { + if (m_pVariableState->CurrentIconState[vIndex] == ICON_STATE_MODIFIED_NONDEFAULT) + { + m_pVariableState->CurrentIconState[vIndex] = ICON_STATE_DEFAULT; + } + else if (m_pVariableState->CurrentIconState[vIndex] == ICON_STATE_NONDEFAULT) + { + m_pVariableState->CurrentIconState[vIndex] = ICON_STATE_MODIFIED_DEFAULT; + } + } + else + { + if (m_pVariableState->CurrentIconState[vIndex] == ICON_STATE_DEFAULT) + { + m_pVariableState->CurrentIconState[vIndex] = ICON_STATE_MODIFIED_NONDEFAULT; + } + else if (m_pVariableState->CurrentIconState[vIndex] == ICON_STATE_MODIFIED_DEFAULT) + { + m_pVariableState->CurrentIconState[vIndex] = ICON_STATE_NONDEFAULT; + } + } + break; + + case CONTROL_TYPE_SEPARATOR: + m_pVariableState->CurrentIconState[vIndex] = ICON_STATE_SEPARATOR; + break; + } +} + +// Handle the Apply or OK Buttons +void CControlsPage::ApplyChanges() +{ + WriteSettingsToRegistry(); + + m_pVariableState->UpdateIconStates(); + UpdateIcons(); + + SetModified(FALSE); +} + +// Called when a boolean Control changes +void CControlsPage::OnVarChangeBool() +{ + CButton *cCurEnabled = (CButton *)GetDlgItem(IDC_CHECK_CUR_ENABLED); + if (cCurEnabled->IsWindowVisible() == FALSE) { + // If the window is not visible, do nothing + return; + } + + UpdateVarState(m_SelectedItem); + UpdateIcon(m_SelectedItem); + + SetModified(TRUE); +} + +// Called when a int/str Control changes +void CControlsPage::OnVarChangeEdit() +{ + CEdit *cCurEdit = (CEdit *)GetDlgItem(IDC_CUR_EDIT); + if (cCurEdit->IsWindowVisible() == FALSE) { + // If the window is not visible, do nothing + return; + } + + UpdateVarState(m_SelectedItem); + UpdateIcon(m_SelectedItem); + SetModified(TRUE); +} + +// Called when the user selects a different item in the list +void CControlsPage::OnSelectionChange() +{ + POSITION p = m_pListCtrl->GetFirstSelectedItemPosition(); + int vIndex; + + if (p == NULL) { + return; + } else { + vIndex = m_pListCtrl->GetNextSelectedItem(p); + } + + UpdateControl(vIndex); + m_SelectedItem = vIndex; +} + +// handles changes in selection of a listCtrl Item +void CControlsPage::OnNotify( NMHDR * pNMHDR, LRESULT * pResult ) +{ + NM_LISTVIEW* pNMListView = (NM_LISTVIEW*)pNMHDR; + + // call the OnSelectionChange function + // only if the state has LVIS_SELECTED + + if (pNMListView->uNewState & LVIS_SELECTED) { + OnSelectionChange(); + } + + *pResult = 0; +} + +BEGIN_MESSAGE_MAP(CControlsPage, CPropertyPage) + ON_BN_CLICKED(IDC_CMD_DEFAULTS, ResetDefaults) + ON_BN_CLICKED(IDC_CHECK_CUR_ENABLED, OnVarChangeBool) + ON_EN_CHANGE(IDC_CUR_EDIT, OnVarChangeEdit) + ON_NOTIFY(LVN_ITEMCHANGED, IDC_VAR_LIST, OnNotify) +END_MESSAGE_MAP() + +/////////////////////////////////////////////////////////////////////////////// + +// ========================================================================== +// About page + +typedef cl_int (CL_API_CALL *pfnGetPlatformIDs) ( + cl_uint num_entries, + cl_platform_id* platforms, + cl_uint* num_platforms ); +typedef cl_int (CL_API_CALL *pfnGetPlatformInfo) ( + cl_platform_id platform, + cl_platform_info param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret ); +typedef cl_int (CL_API_CALL *pfnGetDeviceIDs) ( + cl_platform_id platform, + cl_device_type device_type, + cl_uint num_entries, + cl_device_id* devices, + cl_uint* num_devices ); +typedef cl_int (CL_API_CALL *pfnGetDeviceInfo) ( + cl_device_id device, + cl_device_info param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret ); + +CAboutPage::CAboutPage( CVariableState* pVariableState ) : + CPropertyPage(IDD_ABOUT_PAGE, IDS_INFO), + m_pVariableState( pVariableState ) +{ +} + +BOOL CAboutPage::OnInitDialog() +{ + CPropertyPage::OnInitDialog(); + + CComboBox* pPlatformComboBox = (CComboBox*)GetDlgItem(IDC_PLATFORM_LIST); + + HMODULE hModule = ::LoadLibraryA( "opencl.dll" ); + if( hModule ) + { + pfnGetPlatformIDs dclGetPlatformIDs = (pfnGetPlatformIDs)::GetProcAddress( hModule, "clGetPlatformIDs" ); + pfnGetPlatformInfo dclGetPlatformInfo = (pfnGetPlatformInfo)::GetProcAddress( hModule, "clGetPlatformInfo" ); + + cl_int errorCode = CL_SUCCESS; + cl_uint numPlatforms = 0; + + if( errorCode == CL_SUCCESS && + dclGetPlatformIDs ) + { + errorCode = dclGetPlatformIDs( + 0, + NULL, + &numPlatforms ); + } + if( errorCode != CL_SUCCESS || + numPlatforms == 0 ) + { + pPlatformComboBox->AddString(L"No OpenCL platforms detected!"); + } + + m_Platforms.resize( numPlatforms ); + + if( errorCode == CL_SUCCESS && + dclGetPlatformIDs && + dclGetPlatformInfo && + numPlatforms != 0 ) + { + errorCode = dclGetPlatformIDs( + numPlatforms, + m_Platforms.data(), + NULL ); + + for( cl_uint p = 0; p < numPlatforms; p++ ) + { + size_t stringLength = 0; + if( errorCode == CL_SUCCESS ) + { + errorCode = dclGetPlatformInfo( + m_Platforms[p], + CL_PLATFORM_NAME, + 0, + NULL, + &stringLength ); + } + if( errorCode == CL_SUCCESS ) + { + std::vector str( stringLength ); + errorCode = dclGetPlatformInfo( + m_Platforms[p], + CL_PLATFORM_NAME, + str.size(), + str.data(), + NULL ); + if( errorCode == CL_SUCCESS ) + { + pPlatformComboBox->AddString( + ToWString(str).c_str() ); + } + } + } + } + + ::FreeLibrary( hModule ); + } + + pPlatformComboBox->SetCurSel(0); + + OnPlatformListChange(); + OnDeviceListChange(); + + return TRUE; +} + +BOOL CAboutPage::OnSetActive() +{ + CPropertyPage::OnSetActive(); + + CListBox* pConfigSummaryList = (CListBox*)GetDlgItem(IDC_CONFIG_SUMMARY); + + // First, delete any existing strings in the config summary list. + for( int i = pConfigSummaryList->GetCount() - 1; i >= 0; i-- ) + { + pConfigSummaryList->DeleteString( i ); + } + + int n = 0; + for( int i = 0; i < cNumVars; i++ ) + { + if( m_pVariableState->CurrentIconState[i] == ICON_STATE_NONDEFAULT ) + { + std::ostringstream ss; + ss << cVars[i].Name << " = "; + switch( cVars[i].Type ) + { + case CONTROL_TYPE_BOOL: + ss << ( m_pVariableState->CurrentIntValue[i] ? + "true" : + "false" ); + break; + case CONTROL_TYPE_INT: + ss << m_pVariableState->CurrentIntValue[i]; + break; + case CONTROL_TYPE_STRING: + ss << ToString( m_pVariableState->CurrentStringValue[i] ); + break; + default: + ss << ""; + } + pConfigSummaryList->InsertString(n++, ToWString( ss.str() ).c_str() ); + } + } + if( n == 0 ) + { + pConfigSummaryList->InsertString(n++, L"No non-default controls." ); + } + + return TRUE; +} + +static cl_int GetDeviceInfoString( + pfnGetDeviceInfo dclGetDeviceInfo, + cl_device_id device, + cl_device_info param_name, + std::vector& param_value ) +{ + cl_int errorCode = CL_SUCCESS; + size_t size = 0; + + if( errorCode == CL_SUCCESS ) + { + errorCode = dclGetDeviceInfo( + device, + param_name, + 0, + NULL, + &size ); + } + + if( errorCode == CL_SUCCESS ) + { + param_value.resize( size ); + errorCode = dclGetDeviceInfo( + device, + param_name, + param_value.size(), + param_value.data(), + NULL ); + } + + return errorCode; +} + +void CAboutPage::OnPlatformListChange() +{ + CComboBox* pPlatformComboBox = (CComboBox*)GetDlgItem(IDC_PLATFORM_LIST); + CComboBox* pDeviceComboBox = (CComboBox*)GetDlgItem(IDC_DEVICE_LIST); + + // First, delete any existing strings in the device list. + for( int i = pDeviceComboBox->GetCount() - 1; i >= 0; i-- ) + { + pDeviceComboBox->DeleteString( i ); + } + + // Get the currently selected platform index. + cl_uint platformIndex = pPlatformComboBox->GetCurSel(); + + HMODULE hModule = ::LoadLibraryA( "opencl.dll" ); + if( hModule ) + { + pfnGetDeviceIDs dclGetDeviceIDs = (pfnGetDeviceIDs)::GetProcAddress( hModule, "clGetDeviceIDs" ); + pfnGetDeviceInfo dclGetDeviceInfo = (pfnGetDeviceInfo)::GetProcAddress( hModule, "clGetDeviceInfo" ); + + // Get the array of platforms. + cl_int errorCode = CL_SUCCESS; + cl_uint numDevices = 0; + + if( errorCode == CL_SUCCESS && + dclGetDeviceIDs && + platformIndex < m_Platforms.size() ) + { + errorCode = dclGetDeviceIDs( + m_Platforms[platformIndex], + CL_DEVICE_TYPE_ALL, + 0, + NULL, + &numDevices ); + if( errorCode != CL_SUCCESS || + numDevices == 0 ) + { + pDeviceComboBox->AddString(L"No OpenCL devices detected!"); + } + } + else + { + pDeviceComboBox->AddString(L"No OpenCL platforms detected!"); + } + + m_Devices.resize( numDevices ); + + if( errorCode == CL_SUCCESS && + dclGetDeviceIDs && + dclGetDeviceInfo ) + { + errorCode = dclGetDeviceIDs( + m_Platforms[platformIndex], + CL_DEVICE_TYPE_ALL, + m_Devices.size(), + m_Devices.data(), + NULL ); + for( cl_uint d = 0; d < numDevices; d++ ) + { + std::vector str; + if( errorCode == CL_SUCCESS ) + { + errorCode = GetDeviceInfoString( + dclGetDeviceInfo, + m_Devices[d], + CL_DEVICE_NAME, + str ); + } + if( errorCode == CL_SUCCESS ) + { + pDeviceComboBox->AddString( + ToWString(str).c_str() ); + } + } + } + + ::FreeLibrary( hModule ); + } + + pDeviceComboBox->SetCurSel(0); + + OnDeviceListChange(); +} + +void CAboutPage::OnDeviceListChange() +{ + CComboBox* pDeviceComboBox = (CComboBox*)GetDlgItem(IDC_DEVICE_LIST); + CListBox* pDeviceInfoList = (CListBox*)GetDlgItem(IDC_DEVICE_INFO); + + // First, delete any existing strings in the device info list. + for( int i = pDeviceInfoList->GetCount() - 1; i >= 0; i-- ) + { + pDeviceInfoList->DeleteString( i ); + } + + // Get the currently selected device index. + cl_uint deviceIndex = pDeviceComboBox->GetCurSel(); + if( deviceIndex < m_Devices.size() ) + { + HMODULE hModule = ::LoadLibraryA( "opencl.dll" ); + if( hModule ) + { + pfnGetDeviceInfo dclGetDeviceInfo = (pfnGetDeviceInfo)::GetProcAddress( hModule, "clGetDeviceInfo" ); + + cl_int errorCode = CL_SUCCESS; + + if( dclGetDeviceInfo ) + { + std::vector vendor; + errorCode |= GetDeviceInfoString( + dclGetDeviceInfo, + m_Devices[deviceIndex], + CL_DEVICE_VENDOR, + vendor ); + std::vector version; + errorCode |= GetDeviceInfoString( + dclGetDeviceInfo, + m_Devices[deviceIndex], + CL_DEVICE_VERSION, + version ); + std::vector driverVersion; + errorCode |= GetDeviceInfoString( + dclGetDeviceInfo, + m_Devices[deviceIndex], + CL_DRIVER_VERSION, + driverVersion ); + + if( errorCode == CL_SUCCESS ) + { + int n = 0; + pDeviceInfoList->InsertString(n++, ToWString(vendor).c_str() ); + pDeviceInfoList->InsertString(n++, ToWString(version).c_str() ); + pDeviceInfoList->InsertString(n++, ToWString(driverVersion).c_str() ); + } + else + { + pDeviceInfoList->InsertString(0, L"Error getting device info!"); + } + } + else + { + pDeviceInfoList->InsertString(0, L"Error getting device info function pointer!"); + } + + ::FreeLibrary( hModule ); + } + } +} + +BEGIN_MESSAGE_MAP(CAboutPage, CPropertyPage) + ON_CBN_SELCHANGE(IDC_PLATFORM_LIST, OnPlatformListChange) + ON_CBN_SELCHANGE(IDC_DEVICE_LIST, OnDeviceListChange) +END_MESSAGE_MAP() + +// ========================================================================== +// General utility routines + +// This is the message handler for the Apply Button +// Calls the Apply Method for each enabled page + +void CCLInterceptConfigSheet::OnApplyNow() +{ + m_UserPage.ApplyChanges(); + m_UserPage.SetModified(FALSE); + + PostMessage(WM_RESIZEPAGE); +} + +void CCLInterceptConfigSheet::OnOK() +{ + // call the ApplyNow method since the user clicked OK + OnApplyNow(); + + // Signal the end of the dialog + CPropertySheet::EndDialog(IDOK); +} + +BOOL CCLInterceptConfigSheet::OnInitDialog( ) +{ + CPropertySheet::OnInitDialog(); + + ModifyStyle(0, WS_THICKFRAME); + + RECT r; + GetWindowRect(&r); + m_baseSize.x = r.right - r.left; + m_baseSize.y = r.bottom - r.top; + + return TRUE; +} + +void CCLInterceptConfigSheet::OnSize(UINT nType, int cx, int cy) +{ + ModifyStyle(0, WS_THICKFRAME); + + CPropertySheet::OnSize(nType, cx, cy); + + PostMessage(WM_RESIZEPAGE); +} + +afx_msg LRESULT CCLInterceptConfigSheet::OnResizePage(WPARAM wParam, LPARAM lParam) +{ + Invalidate(); + return 0; +} + +BOOL CCLInterceptConfigSheet::OnNotify(WPARAM wParam, LPARAM lParam, LRESULT* pResult) +{ + NMHDR* pnmh = (LPNMHDR) lParam; + + // the sheet resizes the page whenever it is activated + // so we need to resize it to what we want + if (TCN_SELCHANGE == pnmh->code) { + // user-defined message needs to be posted because page must + // be resized after TCN_SELCHANGE has been processed + PostMessage (WM_RESIZEPAGE); + } + + return CPropertySheet::OnNotify(wParam, lParam, pResult); +} + +void CCLInterceptConfigSheet::OnGetMinMaxInfo(MINMAXINFO FAR* lpMMI) +{ + lpMMI->ptMinTrackSize.x = m_baseSize.x; + lpMMI->ptMinTrackSize.y = m_baseSize.y; + CPropertySheet::OnGetMinMaxInfo(lpMMI); +} + +// Message map for the property sheet +BEGIN_MESSAGE_MAP( CCLInterceptConfigSheet, CPropertySheet ) + ON_MESSAGE (WM_RESIZEPAGE, OnResizePage) + ON_WM_GETMINMAXINFO() + ON_BN_CLICKED(ID_APPLY_NOW, OnApplyNow) + ON_BN_CLICKED(IDOK, OnOK) +END_MESSAGE_MAP() + +//======================================================================== + +LONG WINAPI WndProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam) +{ + return DefWindowProc(hWnd, msg, wParam, lParam); +} + +BOOL GetPlatformInfo() +{ + return TRUE; +} + +CLInterceptConfigApp::CLInterceptConfigApp(void) +{ + // create a mutex to ensure only one instance of the OpenGL config app + hMutex = CreateMutex(NULL, FALSE, L"CLInterceptConfig"); + mutexState = GetLastError(); +} + +CLInterceptConfigApp::~CLInterceptConfigApp(void) +{ + // release the mutex + if (hMutex) + { + CloseHandle(hMutex); + hMutex = NULL; + } +} + +BOOL CLInterceptConfigApp::IsAnotherInstanceRunning(void) +{ + return (ERROR_ALREADY_EXISTS == mutexState); +} + +static BOOL CheckHKLMRegistryKey() +{ + HKEY key; + + LSTATUS success = RegOpenKeyEx( + HKEY_LOCAL_MACHINE, + REGISTRY_KEY, + 0, + KEY_READ, + &key ); + if( success == ERROR_SUCCESS ) + { + RegCloseKey(key); + } + + return success == ERROR_SUCCESS; +} + +BOOL CLInterceptConfigApp::InitInstance(void) +{ + if( IsAnotherInstanceRunning() == TRUE ) + { + MessageBox( + NULL, + L"The Intercept Layer for OpenCL Applications Configuration App is already Running!", + L"Error!", + MB_OK); + + AfxGetApp()->ExitInstance(); + return FALSE; + } + + if( CheckHKLMRegistryKey() == TRUE ) + { + MessageBox( + NULL, + L"The Intercept Layer for OpenCL Applications now stores its registry keys " + L"in HKEY_CURRENT_USER, but it appears as though there are registry keys in " + L"HKEY_LOCAL_MACHINE. To avoid confusion it is strongly recommended to " + L"remove the old registry keys in HKEY_LOCAL_MACHINE!", + L"Warning", + MB_OK); + } + + CCLInterceptConfigSheet cliSheet; + m_pMainWnd = &cliSheet; + + cliSheet.DoModal(); + + return FALSE; +} + +CLInterceptConfigApp cliConfigApp; diff --git a/config/CLIConfig.h b/config/CLIConfig.h new file mode 100644 index 00000000..bdd91c80 --- /dev/null +++ b/config/CLIConfig.h @@ -0,0 +1,170 @@ +/* +// Copyright (c) 2018 Intel Corporation +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +*/ + +#pragma once + +// MFC include files +#include +#include +#include +#include + +#include + +#include "resource.h" + +#define WM_RESIZEPAGE WM_USER + 111 + +const TCHAR* REGISTRY_KEY = L"SOFTWARE\\INTEL\\IGFX\\CLINTERCEPT"; + +#include +#include "envVars.h" + +enum ICON_STATE +{ + ICON_STATE_NONDEFAULT = 0, + ICON_STATE_MODIFIED_NONDEFAULT = 1, + ICON_STATE_MODIFIED_DEFAULT = 2, + ICON_STATE_DEFAULT = 3, + ICON_STATE_SEPARATOR = 4, + NUM_ICON_STATES +}; + +struct CVariableState +{ + CVariableState(); + + std::vector CurrentIconState; + std::vector CurrentIntValue; + std::vector CurrentStringValue; + + void SetDefaultStates(); + void UpdateIconStates(); +}; + +class CControlsPage : public CPropertyPage +{ +public: + CControlsPage( CVariableState* pVariableState ); + + void ApplyChanges(); + +private: + CVariableState* m_pVariableState; + + CImageList m_ImageList; + CListCtrl* m_pListCtrl; + + int m_SelectedItem; + + void ResetDefaults(); + + void ReadSettingsFromRegistry(); + void WriteSettingsToRegistry() const; + void WriteSettingsToRegistryHelper( HKEY key ) const; + + void UpdateIcons() const; + + void UpdateVarState(int vIndex); + void UpdateControl(int vIndex); + void UpdateIcon(int vIndex) const; + + BOOL OnInitDialog(); + + afx_msg void OnVarChangeBool(); + afx_msg void OnVarChangeEdit(); + afx_msg void OnSelectionChange(); + afx_msg void OnNotify( NMHDR * pNotifyStruct, LRESULT * result ); + + DECLARE_MESSAGE_MAP() +}; + +class CAboutPage : public CPropertyPage +{ +public: + CAboutPage( CVariableState* pVariableState ); + +private: + CVariableState* m_pVariableState; + + std::vector m_Platforms; + std::vector m_Devices; + + BOOL OnInitDialog(); + BOOL OnSetActive(); + + afx_msg void OnPlatformListChange(); + afx_msg void OnDeviceListChange(); + + DECLARE_MESSAGE_MAP() +}; + +class CCLInterceptConfigSheet : public CPropertySheet +{ +public: + CVariableState m_VariableState; + + CAboutPage m_AboutPage; + CControlsPage m_UserPage; + + POINT m_baseSize; + POINT m_lastSize; + + // constructor + CCLInterceptConfigSheet() : + CPropertySheet( L"Intercept Layer for OpenCL Applications Configuration App"), + m_AboutPage( &m_VariableState ), + m_UserPage( &m_VariableState ) + { + m_baseSize.x = m_baseSize.y = 0; + + AddPage( &m_UserPage ); + AddPage( &m_AboutPage ); + } + + BOOL OnInitDialog(); + void OnSize(UINT nType, int cx, int cy); + BOOL OnNotify(WPARAM wParam, LPARAM lParam, LRESULT* pResult); + + afx_msg void OnOK(); + afx_msg void OnApplyNow(); + afx_msg LRESULT OnResizePage(WPARAM wParam, LPARAM lParam); + afx_msg void OnGetMinMaxInfo(MINMAXINFO FAR* lpMMI); + + DECLARE_MESSAGE_MAP() +}; + +// the main configApp class is derived from a CWinApp +// application class +class CLInterceptConfigApp : public CWinApp +{ +public: + HANDLE hMutex; // to allow only one instance + DWORD mutexState; // + + CLInterceptConfigApp(); + ~CLInterceptConfigApp(); + + BOOL IsAnotherInstanceRunning(); + + virtual BOOL InitInstance(); +}; diff --git a/config/CLIConfig.rc b/config/CLIConfig.rc new file mode 100644 index 00000000..7270fca1 --- /dev/null +++ b/config/CLIConfig.rc @@ -0,0 +1,202 @@ +// Microsoft Visual C++ generated resource script. +// +#include "resource.h" + +#define APSTUDIO_READONLY_SYMBOLS +///////////////////////////////////////////////////////////////////////////// +// +// Generated from the TEXTINCLUDE 2 resource. +// +#include "afxres.h" +///////////////////////////////////////////////////////////////////////////// +#undef APSTUDIO_READONLY_SYMBOLS + +///////////////////////////////////////////////////////////////////////////// +// English (United States) resources + +#if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_ENU) +LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US +#pragma code_page(1252) + +#ifdef APSTUDIO_INVOKED +///////////////////////////////////////////////////////////////////////////// +// +// TEXTINCLUDE +// + +1 TEXTINCLUDE +BEGIN + "resource.h\0" +END + +2 TEXTINCLUDE +BEGIN + "#include ""afxres.h""\0" +END + +3 TEXTINCLUDE +BEGIN + "#include ""CLIConfig_version.rc2""\0" +END + +#endif // APSTUDIO_INVOKED + + +///////////////////////////////////////////////////////////////////////////// +// +// Dialog +// + +IDD_ABOUT_PAGE DIALOGEX 0, 0, 393, 301 +STYLE DS_SETFONT | DS_MODALFRAME | DS_3DLOOK | WS_CHILD | WS_CAPTION +CAPTION "Information" +FONT 8, "MS Sans Serif", 0, 0, 0x1 +BEGIN + LTEXT "Platform:",IDC_PLATFORM_LABEL,13,40,28,8 + LTEXT "Device:",IDC_DEVICE_LABEL,13,59,28,8 + LISTBOX IDC_DEVICE_INFO,229,27,150,40,LBS_SORT | LBS_NOINTEGRALHEIGHT | WS_VSCROLL | WS_TABSTOP,WS_EX_CLIENTEDGE + GROUPBOX "Device Info",IDC_DEVICE_INFO_FRAME,222,15,164,58 + COMBOBOX IDC_PLATFORM_LIST,48,38,167,100,CBS_DROPDOWN | WS_VSCROLL | WS_TABSTOP + COMBOBOX IDC_DEVICE_LIST,48,58,167,100,CBS_DROPDOWN | WS_VSCROLL | WS_TABSTOP + CONTROL "https://github.com/intel/opencl-intercept-layer",IDC_URL, + "MfcLink",WS_TABSTOP,48,18,167,15 + LTEXT "Link:",IDC_URL_LABEL,13,19,28,8 + LISTBOX IDC_CONFIG_SUMMARY,13,97,366,188,LBS_SORT | LBS_NOINTEGRALHEIGHT | WS_VSCROLL | WS_TABSTOP,WS_EX_CLIENTEDGE + GROUPBOX "Configuration Summary",IDC_CONFIGURATION_SUMMARY_FRAME,7,86,379,208 +END + +IDD_VARS_PAGE DIALOGEX 0, 0, 399, 302 +STYLE DS_SETFONT | WS_CHILD | WS_CAPTION +FONT 8, "MS Sans Serif", 0, 0, 0x1 +BEGIN + CONTROL "List1",IDC_VAR_LIST,"SysListView32",LVS_REPORT | LVS_SINGLESEL | LVS_SHOWSELALWAYS | LVS_NOLABELWRAP | LVS_NOCOLUMNHEADER | WS_BORDER | WS_TABSTOP,9,18,274,185 + CONTROL "Enabled",IDC_CHECK_CUR_ENABLED,"Button",BS_AUTOCHECKBOX | NOT WS_VISIBLE | WS_TABSTOP,301,20,67,15 + EDITTEXT IDC_CUR_EDIT,301,20,69,15,ES_MULTILINE | ES_AUTOHSCROLL | NOT WS_VISIBLE + PUSHBUTTON "Reset Defaults",IDC_CMD_DEFAULTS,316,271,68,21 + GROUPBOX "Default Value",IDC_STATIC_DEF,294,46,90,36 + GROUPBOX "Current Value",IDC_STATIC_CUR,294,5,90,36 + GROUPBOX "Control Variable",IDC_VAR_FRAME,4,5,284,204 + GROUPBOX "Description",IDC_STATIC_HELP_GROUP,4,212,284,80 + EDITTEXT IDC_DEF_EDIT,300,60,70,15,ES_AUTOHSCROLL | NOT WS_VISIBLE | WS_DISABLED | NOT WS_TABSTOP + CONTROL "Enabled",IDC_CHECK_DEF_ENABLED,"Button",BS_AUTOCHECKBOX | NOT WS_VISIBLE | WS_DISABLED | WS_TABSTOP,300,60,70,15 + GROUPBOX "Legend for Variable States",IDC_STATIC_L0,294,117,90,92 + ICON IDI_STATE_NONDEFAULT,IDC_STATIC_L4,349,157,21,20,SS_SUNKEN,WS_EX_CLIENTEDGE + ICON IDI_STATE_DEFAULT,IDC_STATIC_L8,349,131,21,20,SS_SUNKEN,WS_EX_CLIENTEDGE + ICON IDI_STATE_MODIFIED,IDC_STATIC_L2,349,183,21,20,SS_SUNKEN,WS_EX_CLIENTEDGE + LTEXT "Default State",IDC_STATIC_L7,297,131,35,19 + LTEXT "Non-Default State",IDC_STATIC_L3,297,157,41,22 + LTEXT "Unsaved Variable",IDC_STATIC_L1,297,183,48,21 + EDITTEXT IDC_EDIT_HELP,8,223,276,61,ES_MULTILINE | ES_AUTOVSCROLL | ES_READONLY | NOT WS_BORDER | WS_VSCROLL | NOT WS_TABSTOP +END + + +///////////////////////////////////////////////////////////////////////////// +// +// DESIGNINFO +// + +#ifdef APSTUDIO_INVOKED +GUIDELINES DESIGNINFO +BEGIN + IDD_ABOUT_PAGE, DIALOG + BEGIN + LEFTMARGIN, 7 + RIGHTMARGIN, 386 + TOPMARGIN, 7 + BOTTOMMARGIN, 294 + END + + IDD_VARS_PAGE, DIALOG + BEGIN + LEFTMARGIN, 4 + RIGHTMARGIN, 384 + BOTTOMMARGIN, 292 + END +END +#endif // APSTUDIO_INVOKED + + +///////////////////////////////////////////////////////////////////////////// +// +// Icon +// + +// Icon with lowest ID value placed first to ensure application icon +// remains consistent on all systems. +IDI_LOGO ICON "clintercept_logo.ico" + +IDI_STATE_NONDEFAULT ICON "nondefault.ico" + +IDI_STATE_DEFAULT ICON "default.ico" + +IDI_STATE_DISABLED ICON "disabled.ico" + +IDI_STATE_MODIFIED ICON "modified.ico" + +IDI_STATE_MODIFIED_DEFAULT ICON "modified_default.ico" + +IDI_STATE_MODIFIED_NONDEFAULT ICON "modified_nondefault.ico" + +IDI_STATE_SEPARATOR ICON "separator.ico" + + +///////////////////////////////////////////////////////////////////////////// +// +// Dialog Info +// + +IDD_ABOUT_PAGE DLGINIT +BEGIN + IDC_URL, 0x37c, 204, 0 +0x4d3c, 0x4346, 0x694c, 0x6b6e, 0x555f, 0x6c72, 0x683e, 0x7474, 0x7370, +0x2f3a, 0x672f, 0x7469, 0x7568, 0x2e62, 0x6f63, 0x2f6d, 0x6e69, 0x6574, +0x2f6c, 0x706f, 0x6e65, 0x6c63, 0x692d, 0x746e, 0x7265, 0x6563, 0x7470, +0x6c2d, 0x7961, 0x7265, 0x2f3c, 0x464d, 0x4c43, 0x6e69, 0x5f6b, 0x7255, +0x3e6c, 0x4d3c, 0x4346, 0x694c, 0x6b6e, 0x555f, 0x6c72, 0x7250, 0x6665, +0x7869, 0x3c3e, 0x4d2f, 0x4346, 0x694c, 0x6b6e, 0x555f, 0x6c72, 0x7250, +0x6665, 0x7869, 0x3c3e, 0x464d, 0x4c43, 0x6e69, 0x5f6b, 0x6f54, 0x6c6f, +0x6974, 0x3e70, 0x2f3c, 0x464d, 0x4c43, 0x6e69, 0x5f6b, 0x6f54, 0x6c6f, +0x6974, 0x3e70, 0x4d3c, 0x4346, 0x694c, 0x6b6e, 0x465f, 0x6c75, 0x546c, +0x7865, 0x5474, 0x6f6f, 0x746c, 0x7069, 0x463e, 0x4c41, 0x4553, 0x2f3c, +0x464d, 0x4c43, 0x6e69, 0x5f6b, 0x7546, 0x6c6c, 0x6554, 0x7478, 0x6f54, +0x6c6f, 0x6974, 0x3e70, + 0 +END + + +///////////////////////////////////////////////////////////////////////////// +// +// AFX_DIALOG_LAYOUT +// + +IDD_ABOUT_PAGE AFX_DIALOG_LAYOUT +BEGIN + 0 +END + + +///////////////////////////////////////////////////////////////////////////// +// +// String Table +// + +STRINGTABLE +BEGIN + IDS_USER "User Settings" + IDS_INFO "Information" +END + +#endif // English (United States) resources +///////////////////////////////////////////////////////////////////////////// + + + +#ifndef APSTUDIO_INVOKED +///////////////////////////////////////////////////////////////////////////// +// +// Generated from the TEXTINCLUDE 3 resource. +// +#include "CLIConfig_version.rc2" +///////////////////////////////////////////////////////////////////////////// +#endif // not APSTUDIO_INVOKED + diff --git a/config/CLIConfig_version.rc2 b/config/CLIConfig_version.rc2 new file mode 100644 index 00000000..85b8464a --- /dev/null +++ b/config/CLIConfig_version.rc2 @@ -0,0 +1,62 @@ +/* +// Copyright (c) 2018 Intel Corporation +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +*/ + +///////////////////////////////////////////////////////////////////////////// +// English (United States) resources + +#if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_ENU) + +VS_VERSION_INFO VERSIONINFO + FILEVERSION 2,2,0,0 + PRODUCTVERSION 2,2,0,0 + FILEFLAGSMASK 0x0L +#ifdef _DEBUG + FILEFLAGS 0x1L +#else + FILEFLAGS 0x0L +#endif + FILEOS 0x0L + FILETYPE 0x2L + FILESUBTYPE 0x0L +BEGIN + BLOCK "StringFileInfo" + BEGIN + BLOCK "040904b0" + BEGIN + VALUE "CompanyName", "Intel(R) Corporation" + VALUE "FileDescription", "Intercept Layer for OpenCL(tm) Applications Configuration App" + VALUE "FileVersion", "v2.2" + VALUE "InternalName", "CLIConfig" + VALUE "LegalCopyright", "Copyright(C) Intel Corporation 2018" + VALUE "OriginalFilename", "CLIConfig.exe" + VALUE "ProductName", "Intercept Layer for OpenCL(tm) Applications Configuration App" + VALUE "ProductVersion", "v2.2" + END + END + BLOCK "VarFileInfo" + BEGIN + VALUE "Translation", 0x409, 1200 + END +END + +#endif // English (United States) resources +///////////////////////////////////////////////////////////////////////////// diff --git a/config/CMakeLists.txt b/config/CMakeLists.txt new file mode 100644 index 00000000..aa8ed0bc --- /dev/null +++ b/config/CMakeLists.txt @@ -0,0 +1,62 @@ +# Copyright (c) 2018 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +set(CMAKE_MFC_FLAG 2) +add_definitions(-DUNICODE -D_UNICODE) + +include_directories( + ${CMAKE_CURRENT_SOURCE_DIR}/.. +) + +set( CLICONFIG_ICON_FILES + clintercept_logo.ico + default.ico + disabled.ico + modified.ico + modified_default.ico + modified_nondefault.ico + nondefault.ico + separator.ico +) +source_group( Icons FILES + ${CLICONFIG_ICON_FILES} +) + +set( CLICONFIG_SOURCE_FILES + CLIConfig.cpp + CLIConfig.h + CLIConfig.rc + CLIConfig_version.rc2 + envVars.h + resource.h +) +source_group( Source FILES + ${CLICONFIG_SOURCE_FILES} +) + +add_executable(CLIConfig + ${CLICONFIG_ICON_FILES} + ${CLICONFIG_SOURCE_FILES} +) +set_target_properties(CLIConfig PROPERTIES LINK_FLAGS "/SUBSYSTEM:WINDOWS") + +foreach( OUTPUTCONFIG ${CMAKE_CONFIGURATION_TYPES} ) + install(TARGETS CLIConfig DESTINATION ${OUTPUTCONFIG}/Config CONFIGURATIONS ${OUTPUTCONFIG}) +endforeach( OUTPUTCONFIG CMAKE_CONFIGURATION_TYPES ) diff --git a/config/clintercept_logo.ico b/config/clintercept_logo.ico new file mode 100644 index 00000000..89ad701a Binary files /dev/null and b/config/clintercept_logo.ico differ diff --git a/config/default.ico b/config/default.ico new file mode 100644 index 00000000..3eaafe50 Binary files /dev/null and b/config/default.ico differ diff --git a/config/disabled.ico b/config/disabled.ico new file mode 100644 index 00000000..93719edf Binary files /dev/null and b/config/disabled.ico differ diff --git a/config/envVars.h b/config/envVars.h new file mode 100644 index 00000000..6d362026 --- /dev/null +++ b/config/envVars.h @@ -0,0 +1,109 @@ +/* +// Copyright (c) 2018 Intel Corporation +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +*/ + +#pragma once + +enum CONTROL_TYPE +{ + CONTROL_TYPE_INT = 0, + CONTROL_TYPE_BOOL = 1, + CONTROL_TYPE_STRING = 2, + CONTROL_TYPE_SEPARATOR = 3, +}; + +template +static CONTROL_TYPE GetControlType() +{ + return CONTROL_TYPE_INT; +} + +template<> +static CONTROL_TYPE GetControlType() +{ + return CONTROL_TYPE_BOOL; +} + +template<> +static CONTROL_TYPE GetControlType() +{ + return CONTROL_TYPE_STRING; +} + +template +static const char* GetStringDefault( T dummy ) +{ + return ""; +} +template<> +static const char* GetStringDefault( const char* init ) +{ + return init; +} + +template +static const int GetIntDefault( T init ) +{ + return init; +} +template<> +static const int GetIntDefault( const char* dummy ) +{ + return 0; +} + +struct VarDescription +{ + CONTROL_TYPE Type; + const std::string Name; + const std::string defStrValue; + int defIntValue; + const std::string HelpText; +}; + +#define CLI_CONTROL( _type, _name, _init, _desc ) \ +{ \ + GetControlType<_type>(), \ + #_name, \ + GetStringDefault(_init), \ + GetIntDefault(_init), \ + _desc, \ +}, + +#define CLI_CONTROL_SEPARATOR( _name ) \ +{ \ + CONTROL_TYPE_SEPARATOR, \ + #_name, \ + "", \ + 0, \ + "", \ +}, + +static const VarDescription cVars[] = +{ + { CONTROL_TYPE_BOOL, "BreakOnLoad", "", 0, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will break into the debugger when the DLL is loaded." }, +#include "..\Src\controls.h" +}; + +const int cNumVars = sizeof(cVars) / sizeof(cVars[0]); + +#undef CLI_CONTROL +#undef CLI_CONTROL_SEPARATOR diff --git a/config/modified.ico b/config/modified.ico new file mode 100644 index 00000000..f7cd6b2d Binary files /dev/null and b/config/modified.ico differ diff --git a/config/modified_default.ico b/config/modified_default.ico new file mode 100644 index 00000000..e131b5d6 Binary files /dev/null and b/config/modified_default.ico differ diff --git a/config/modified_nondefault.ico b/config/modified_nondefault.ico new file mode 100644 index 00000000..0155bfc7 Binary files /dev/null and b/config/modified_nondefault.ico differ diff --git a/config/nondefault.ico b/config/nondefault.ico new file mode 100644 index 00000000..0c6f0a6c Binary files /dev/null and b/config/nondefault.ico differ diff --git a/config/resource.h b/config/resource.h new file mode 100644 index 00000000..0c2e8999 --- /dev/null +++ b/config/resource.h @@ -0,0 +1,56 @@ +//{{NO_DEPENDENCIES}} +// Microsoft Visual C++ generated include file. +// Used by CLIConfig.rc +// +#define IDD_VARS_PAGE 101 +#define IDD_ABOUT_PAGE 105 +#define IDI_LOGO 300 +#define IDI_STATE_NONDEFAULT 301 +#define IDI_STATE_MODIFIED_NONDEFAULT 302 +#define IDI_STATE_MODIFIED_DEFAULT 303 +#define IDI_STATE_DEFAULT 304 +#define IDI_STATE_DISABLED 305 +#define IDI_STATE_MODIFIED 306 +#define IDS_USER 503 +#define IDS_INFO 504 +#define IDI_ICON1 519 +#define IDI_STATE_SEPARATOR 519 +#define IDC_STATIC_L0 1003 +#define IDC_VAR_LIST 2000 +#define IDC_CUR_EDIT 2001 +#define IDC_DEF_EDIT 2002 +#define IDC_CHECK_DEF_ENABLED 2005 +#define IDC_CMD_DEFAULTS 2006 +#define IDC_VAR_FRAME 6000 +#define IDC_DEVICE_INFO 6000 +#define IDC_CHECK_CUR_ENABLED 6001 +#define IDC_CONFIG_SUMMARY 6001 +#define IDC_STATIC_CUR 6002 +#define IDC_STATIC_DEF 6003 +#define IDC_STATIC_L1 6004 +#define IDC_STATIC_L2 6005 +#define IDC_STATIC_L3 6006 +#define IDC_STATIC_L4 6007 +#define IDC_STATIC_L7 6010 +#define IDC_STATIC_L8 6011 +#define IDC_STATIC_HELP_GROUP 6012 +#define IDC_EDIT_HELP 6013 +#define IDC_PLATFORM_LABEL 6016 +#define IDC_DEVICE_LABEL 6017 +#define IDC_URL_LABEL 6018 +#define IDC_PLATFORM_LIST 6019 +#define IDC_DEVICE_LIST 6020 +#define IDC_DEVICE_INFO_FRAME 6021 +#define IDC_URL 6022 +#define IDC_CONFIGURATION_SUMMARY_FRAME 6023 + +// Next default values for new objects +// +#ifdef APSTUDIO_INVOKED +#ifndef APSTUDIO_READONLY_SYMBOLS +#define _APS_NEXT_RESOURCE_VALUE 520 +#define _APS_NEXT_COMMAND_VALUE 40005 +#define _APS_NEXT_CONTROL_VALUE 6024 +#define _APS_NEXT_SYMED_VALUE 600 +#endif +#endif diff --git a/config/separator.ico b/config/separator.ico new file mode 100644 index 00000000..51737910 Binary files /dev/null and b/config/separator.ico differ diff --git a/docs/build.md b/docs/build.md new file mode 100644 index 00000000..af52a937 --- /dev/null +++ b/docs/build.md @@ -0,0 +1,67 @@ +# How to Build the Intercept Layer for OpenCL Applications + +CMake is now the primary mechanism to build the Intercept Layer for OpenCL +Applications. The CMakefile has been tested on Windows (VS2013 and newer), +Linux, and OSX. + +## Tools + +You will need: + +* CMake +* A C++ Compiler + +Windows developers can get CMake [here][CMake]. For Linux, CMake is likely +available via your distribution package manager. For example, to install +CMake on Ubuntu all that is needed is: + + sudo apt-get install cmake-gui + +## Recommendations + +For Windows, recommended folders for "where to build the binaries" are `_bin32` +(for 32-bit DLLs) or `_bin64` (for 64-bit DLLs). Note that for Windows, these +directories are only used for project files and intermediate files, and final +output files are built in the `builds` directory. + +For Linux, recommended folders are `_bin32`, `_bin64`, or just plain `_bin`. + +For most 32-bit Windows and Linux usages, you can simply run: + + cmake .. + +in one of the `_bin` directories described above, then open the generated solution +file or run make with the generated Makefile. + +For 64-bit Windows you'll need to specify a 64-bit generator manually, for +example: + + cmake.exe -G "Visual Studio 14 2015 Win64" . + +If this doesn't work, or if you'd rather not bother with command lines, the CMake +gui (`cmake-gui`) or `ccmake` is always an option. + +## Android + +Android support is experimental, has not been ported to CMake, and is not regularly +tested. Building instruction for Android are: + + cd + source build/envsetup.sh + lunch + export TOP=`pwd` + export ANDROID_SRC=`pwd` + cd + mm + +The shared library will be named: clIntercept.so and placed in +`/out/target/product//system/lib`. Copy it to target +manually. + +--- + +\* Other names and brands may be claimed as the property of others. + +Copyright (c) 2018, Intel(R) Corporation + +[CMake]: https://cmake.org diff --git a/docs/chrome_tracing.md b/docs/chrome_tracing.md new file mode 100644 index 00000000..ac6e6868 --- /dev/null +++ b/docs/chrome_tracing.md @@ -0,0 +1,67 @@ +# Using the Intercept Layer for OpenCL Applications with Chrome Tracing + +The Intercept Layer for OpenCL Applications includes support for generating +JSON files compatible with the Google Chrome built-in profiler front end. +This document describes how to use the Intercept Layer for OpenCL Applications +to visualize how an OpenCL application executes using Chrome Tracing. + +## Background and Setup + +The Chrome Tracing Format is described [here][chrome_tracing_format]. + +To start up the Chrome Tracing front end, open Chrome and enter +`chrome://tracing` as your "URL". This should give you a Window that +looks like this one: + +![Empty Chrome Tracing Window](images/chrome_tracing_empty.png) + +## Configuring Chrome Tracing + +There are (currently) two Chrome Tracing-related controls for the Intercept +Layer for OpenCL Applications: + +* `ChromeCallLogging`: This is the control for tracing OpenCL host APIs. + It will plot OpenCL calls for each thread of the host application, + similar to those dumped when `CallLogging` is enabled. +* `ChromePerformanceTiming`: This is the control for tracing OpenCL + device commands. It will plot OpenCL commands for each command queue + created by the application, similar to those dumped when + `DevicePerformanceTiming` is enabled. + +## Collecting Chrome Tracing Data + +After setting one of these two controls (or both!), run your application, +and you should see a "CLIntercept_trace.json" file in your CLIntercept_Dump +directory. + +## Visualizing Chrome Tracing Data + +After collecting a "CLIntercept_Trace.json" file, simply click the "load" +button in the "chrome::tracing" UI, or drag your file into Chrome. + +If all goes well you will see a timegraph like this one: + +![Chrome Tracing Example](images/chrome_tracing_example.png) + +You can navigate around the timegraph with 'wasd' controls similar to many +popular games: `w` zooms in, `s` zooms out, `a` goes backwards in time, +and `d` moves forwards in time. + +Let's zoom in a bit and look at the what's in the timegraph: + +![Chrome Tracing Detail](images/chrome_tracing_detail.png) + +## Overhead + +Empirically, the overhead of Chrome Tracing is very low. The difference in +scores between a run of LuxMark without Chrome Tracing vs. enabling +`ChromeCallLogging` and `ChromePerformanceTiming` was less than 1%, and the +trace file size for the 2+ minutes of execution was 26MB. + +--- + +\* Other names and brands may be claimed as the property of others. + +Copyright (c) 2018, Intel(R) Corporation + +[chrome_tracing_format]: https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview diff --git a/docs/controls.md b/docs/controls.md new file mode 100644 index 00000000..b6e831da --- /dev/null +++ b/docs/controls.md @@ -0,0 +1,620 @@ +# How to Use the Intercept Layer for OpenCL Applications + +This file is automatically generated using the script `generate_controls_doc.py`. +Please do not edit it manually! + +By default, the Intercept Layer for OpenCL Applications will not modify any OpenCL +calls. You may notice some status messages being printed to stderr, but otherwise +your application should run exactly as it does without the Intercept Layer for +OpenCL Applications. + +## Controls + +The Intercept Layer for OpenCL Applications is controlled using the Windows +registry, Linux configuration files, or environment variables on all OSes. + +### Windows Registry + +On Windows, the Intercept Layer for OpenCL Applications reads its registry keys +from: + + HKEY_CURRENT_USER\SOFTWARE\INTEL\IGFX\CLIntercept + +This is the recommended registry location as it has several advantages over +HKEY_LOCAL_MACHINE: modifying the registry keys does not require Administrator +access, registry keys do not need to be set in multiple places, and each user +can set their own registry keys without affecting other users. + +For backwards compatibility, the Intercept Layer for OpenCL applications +will still read registry keys from: + + // For 32-bit systems, or 64-bit applications on a 64-bit system: + HKEY_LOCAL_MACHINE\SOFTWARE\INTEL\IGFX\CLIntercept + + // For 32-bit applications on a 64-bit system: + HKEY_LOCAL_MACHINE\SOFTWARE\WoW6432Node\INTEL\IGFX\CLIntercept + +If a registry is set in both HKCU and HKLM, the setting in HKCU will "win". + +### Linux Configuration Files + +On Linux, the Intercept Layer for OpenCL Applications will read control values +from ~/clintercept.conf. Controls may be set by putting the control on its own +line, followed by an equals sign, followed by the value you'd like to set the +option to. Lines that begin with a semi-colon(";"), a hash mark ("#"), or a +C++-style comment ("//") are ignored. For example, to enable CallLogging, put +a line in your ~/clintercept.conf that looks like: + + // Enable CallLogging: + CallLogging=1 + +### Environment Variables + +The Intercept Layer for OpenCL may be controlled using environment variables. +The name of the environment variable control is "CLI_" and the control name, to +distinguish controls from other environment variables, and to make it easy to +list all of the environment variable controls. So, to enable CallLogging, you +could type: + + export CLI_CallLogging=1 + +To disable CallLogging, you could type: + + unset CLI_CallLogging + +To list all environment variable controls, you could type: + + env | grep CLI_ + +### Setup and Loading Controls + +##### `DllName` (string) + +Used to control the DLL or Shared Library that the Intercept Layer for OpenCL Applications loads to make real OpenCL calls. If present, only this DLL name is loaded. If omitted, the Intercept Layer for OpenCL Applications tries to load the real OpenCL DLL from file names in this order: + +- real_OpenCL.dll (anywhere in the system path) +- %WINDIR%\SysWOW64\OpenCL.dll (32-bit DLLs only) +- %WINDIR%\System32\OpenCL.dll + +##### `BreakOnLoad` (bool) + +If set to a nonzero value, the Intercept Layer for OpenCL Applications will break into the debugger when it is loaded. + +### Logging Controls + +##### `AppendFiles` (bool) + +By default, the Intercept Layer for OpenCL Applications log files will be created from scratch when the intercept DLL is loaded, and any Intercept Layer for OpenCL Applications report files will be created from scratch when the intercept DLL is unloaded. If AppendFiles is set to a nonzero value, the Intercept Layer for OpenCL Applications will append to an existing file instead of recreating it. This can be useful if an application loads and unloads the intercept DLL multiple times, or to simply preserve log or report data from run-to-run. + +##### `LogToFile` (bool) + +If set to a nonzero value, sends log information to the file "clintercept\_log.txt" instead of to stderr. The log file will be placed in the directory "%SYSTEMDRIVE%\\Intel\\CLIntercept\_Dump\\\". + +##### `LogToDebugger` (bool) + +If set to a nonzero value, sends log information to the debugger instead of to stderr. If both LogToFile and LogToDebugger are nonzero then log information will be sent both to a file and to the debugger. + +##### `LogIndent` (int) + +Indents each log entry by this many spaces. + +##### `BuildLogging` (bool) + +If set to a nonzero value, logs the program build log after each call to clBuildProgram(). This will likely only function correctly for synchronous builds. Note that the build log is logged regardless of whether the program built successfully, which allows compiler warnings to be logged for successful compiles. + +##### `PreferredWorkGroupSizeMultipleLogging` (bool) + +If set to a nonzero value, logs the preferred work group size multiple for each kernel after each call to clCreateKernel(). On some devices this is the equivalent of the SIMD size for this kernel. + +##### `CallLogging` (bool) + +If set to a nonzero value, logs function entry and exit information for every OpenCL call. This can be used to easily determine which OpenCL call is causing an application to crash or fail or if a crash occurs outside of an OpenCL call. This setting is best used with LogToFile or LogToDebugger as it can generate a lot of log data. + +##### `CallLoggingEnqueueCounter` (bool) + +If set to a nonzero value, logs the enqueue counter in addition to function entry and exit information for every OpenCL call. This can be used to determine appropriate limits for DumpBuffersMinEnqueue, DumpBuffersMaxEnqueue, DumpImagesMinEnqueue, or DumpBuffersMaxEnqueue. If CallLogging is disabled then this control will have no effect. + +##### `CallLoggingThreadId` (bool) + +If set to a nonzero value, logs the ID of the calling thread in addition to function entry and exit information for every OpenCL call. This can be helpful when debugging multi-threading issues. + +##### `CallLoggingThreadNumber` (bool) + +If set to a nonzero value, logs the symbolic number of the calling thread in addition to function entry and exit information for every OpenCL call. This can be helpful when debugging multi-threading issues. + +##### `CallLoggingElapsedTime` (bool) + +If set to a nonzero value, logs the elapsed time in microseconds in addition to function entry and exit information for every OpenCL call, starting from the time the intercept DLL is loaded. + +##### `ITTCallLogging` (bool) + +If set to a nonzero value, logs function entry and exit information for every OpenCL call using the ITT APIs. This feature will only function if the Intercept Layer for OpenCL Applications is built with ITT support. + +##### `ChromeCallLogging` (bool) + +If set to a nonzero value, logs function entry and exit information for every OpenCL call to a JSON file that may be used for Chrome Tracing. + +##### `ErrorLogging` (bool) + +If set to a nonzero value, logs all OpenCL errors and the function name that caused the error. + +##### `ErrorAssert` (bool) + +If set to a nonzero value, breaks into the debugger when an OpenCL error occurs. + +##### `ContextCallbackLogging` (bool) + +If set to a nonzero value, the Intercept Layer for OpenCL Applications will install a callback for every context and log any calls to the context callback. The application's context callback, if any, will be invoked after the Intercept Layer for OpenCL Applications' context callback. + +##### `ContextHintLevel` (cl_uint) + +If set to a nonzero value, the Intercept Layer for OpenCL Applications will attempt to create contexts with the CL\_CONTEXT\_SHOW\_DIAGNOSTICS\_INTEL property set to the specified value. If this property is specified by the application, the Intercept Layer for OpenCL Applications will overwrite it with the specified value, otherwise the property and the specified value will be added to the list of context creation properties. This functionality is only available for OpenCL implementations that support the cl\_intel\_driver\_diagnostics extension. If this functionality is not available in the underlying OpenCL implementation, the unmodified list of context properties will be used to create the context instead. More information about this feature, including valid values and their meaning, can be found in the cl\_intel\_driver\_diagnostics extension specification. + +##### `EventCallbackLogging` (bool) + +If set to a nonzero value, the Intercept Layer for OpenCL Applications will install its own callback for every event callback and log the call to the event callback. The application's event callback will be invoked after the Intercept Layer for OpenCL Applications' event callback. + +##### `CLInfoLogging` (bool) + +If set to a nonzero value, logs information about the platforms and devices in the system on the first call to clGetPlatformIDs(). + +##### `LogDir` (string) + +If set, the Intercept Layer for OpenCL Applications will emit logs to this directory instead of the default log directory. + +### Performance Timing Controls + +##### `HostPerformanceTiming` (bool) + +If set to a nonzero value, the Intercept Layer for OpenCL Applications will track the minimum, maximum, and average host CPU time for each OpenCL entry point. When the process exits, this information will be printed to the file "clIntercept\_report.txt" in the directory "%SYSTEMDRIVE%\\Intel\\CLIntercept\_Dump\\\". + +##### `DevicePerformanceTiming` (bool) + +If set to a nonzero value, the Intercept Layer for OpenCL Applications will add event profiling to track the minimum, maximum, and average device time for each OpenCL command. This operation may be fairly intrusive and may have side effects; in particular it forces all command queues to be created with PROFILING\_ENABLED and may increment the reference count for application events. When the process exits, this information will be printed to the file "clIntercept\_report.txt" in the directory "%SYSTEMDRIVE%\\Intel\\CLIntercept\_Dump\\\". + +##### `DevicePerformanceTimeHashTracking` (bool) + +If set to a nonzero value, the Intercept Layer for OpenCL Applications will distinguish between OpenCL NDRange kernels from programs with different hashes for the purpose of device performance timing. + +##### `DevicePerformanceTimeKernelInfoTracking` (bool) + +If set to a nonzero value, the Intercept Layer for OpenCL Applications will distinguish between OpenCL NDRange kernels using information such as the kernel's Preferred Work Group Size Multiple (AKA SIMD size). + +##### `DevicePerformanceTimeGWSTracking` (bool) + +If set to a nonzero value, the Intercept Layer for OpenCL Applications will distinguish between OpenCL NDRange kernels with different global work sizes for the purpose of device performance timing. + +##### `DevicePerformanceTimeLWSTracking` (bool) + +If set to a nonzero value, the Intercept Layer for OpenCL Applications will distinguish between OpenCL NDRange kernels with different local work sizes for the purpose of device performance timing. + +##### `DevicePerformanceTimingSkipUnmap` (bool) + +If set to a nonzero value, the Intercept Layer for OpenCL Applications will skip device performance timing for unmap operations. This is a workaround for a bug in some OpenCL implementations, where querying events created from unmap operations results in driver crashes. + +##### `HostPerformanceTimeLogging` (bool) + +If set to a nonzero value, the Intercept Layer for OpenCL Applications will log the host elapsed time for each OpenCL entry point. This can be useful to identify OpenCL entry points that execute significantly slower or faster than average on the host. + +##### `DevicePerformanceTimeLogging` (bool) + +If set to a nonzero value, the Intercept Layer for OpenCL Applications will log the device execution time deltas for each OpenCL command. This can be useful to identify specific OpenCL commands that execute significantly slower or faster than average on the device. If DevicePerformanceTiming is disabled then this control will have no effect. + +##### `DevicePerformanceTimelineLogging` (bool) + +If set to a nonzero value, the Intercept Layer for OpenCL Applications will log the device execution times for each OpenCL command. This can be useful to visualize the execution timeline of OpenCL commands that execute on the device. If DevicePerformanceTiming is disabled then this control will have no effect. + +##### `DevicePerfCounterCustom` (string) + +If set, the Intercept Layer for OpenCL Applications will collect MDAPI metrics for the Metric Set corresponding to this value for each OpenCL command. Frequently used Metric Sets include: ComputeBasic, ComputeExtended, L3\_1, Sampler. The output file has the potential to be very big depending on the work load. This operation may be fairly intrusive and may have side effects; in particular it forces all command queues to be created with PROFILING\_ENABLED and may increment the reference count for application events. When the process exits, this information will be printed to the file "clintercept\_perfcounter\_dump\_\.txt" in the directory "%SYSTEMDRIVE%\\Intel\\CLIntercept\_Dump\\\". This feature will only function if the Intercept Layer for OpenCL Applications is built with MDAPI support. + +##### `DevicePerfCounterFile` (string) + +Full path to a custom MDAPI file. This can be used to add custom Metric Sets. + +##### `DevicePerfCounterTiming` (bool) + +If set to a nonzero value and DevicePerfCounterCustom is set, the Intercept Layer for OpenCL Applications will enable Intel GPU Performance Counters to track the minimum, maximum, and average performance counter deltas for each OpenCL command. This operation may be fairly intrusive and may have side effects; in particular it forces all command queues to be created with PROFILING\_ENABLED and may increment the reference count for application events. When the process exits, this information will be printed to the file "clIntercept\_report.txt" in the directory "%SYSTEMDRIVE%\\Intel\\CLIntercept\_Dump\\\". This feature will only function if the Intercept Layer for OpenCL Applications is built with MDAPI support. + +##### `ITTPerformanceTiming` (bool) + +[Note: This control makes ITT calls, but they appear to do nothing!] If set to a nonzero value, the Intercept Layer for OpenCL Applications will generate ITT-compatible performance timing data. Similar to DevicePerformanceTiming, this operation may be fairly intrusive and may have side effects; in particular it forces all command queues to be created with PROFILING\_ENABLED and may increment the reference count for application events. ITTPerformanceTiming will also silently create OpenCL command queues that support advanced performance counters if this functionality is available. This feature will only function if the Intercept Layer for OpenCL Applications is built with ITT support. + +##### `ITTShowOnlyExecutingEvents` (bool) + +[Note: This control makes ITT calls, but they appear to do nothing!] By default, when ITTPerformanceTiming is enabled, the Intercept Layer for OpenCL Applications will generate ITT-compatible information for all states of an OpenCL event: when the command was queued, when it was submitted, when it started executing, and when it finished executing. If ITTShowOnlyExecutingEvents is set to a nonzero value, the Intercept Layer for OpenCL Applications will only generate ITT-compatible instrumentation when an event begins executing and when an event ends executing. Since no information will be displayed about when a command is queued or submitted, this can sometimes make it easier to identify times when the device is idle. This feature will only function if the Intercept Layer for OpenCL Applications is built with ITT support. + +##### `ChromePerformanceTiming` (bool) + +If set to a nonzero value, the Intercept Layer for OpenCL Applications will generate device performance timing information in a JSON file that may be used for Chrome Tracing. + +### Controls for Dumping and Injecting Programs and Build Options + +##### `OmitProgramNumber` (bool) + +If set to a nonzero value, the Intercept Layer for OpenCL Applications will omit the program number from dumped file names and hash tracking. This can produce deterministic results even if programs are built in a non-deterministic order (say, by multiple threads). + +##### `SimpleDumpProgramSource` (bool) + +If set to a nonzero value, the Intercept Layer for OpenCL Applications will dump the last string(s) passed to clCreateProgramWithSource() to the file kernel.cl, and the last program options passed to clBuildProgram() to the file kernel.txt. These files will be dumped to the application's working directory. If an application fails to compile a program and exits the program immediately after detecting a compile failure SimpleDumpProgram may be all that is needed to identify the program and program options that are failing to compile. + +##### `DumpProgramSourceScript` (bool) + +If set to a nonzero value, the Intercept Layer for OpenCL Applications will dump every string passed to clCreateProgramWithSource() to its own file. The directory names and file names for the dumped files match the directory names and file names expected by a modified OpenCL conformance test script to capture kernels. This setting overrides SimpleDumpProgramSource, and if it is set to a nonzero value then the value of SimpleDumpProgramSource is ignored. + +##### `DumpProgramSource` (bool) + +If set to a nonzero value, the Intercept Layer for OpenCL Applications will dump every string passed to clCreateProgramWithSource() to its own file. The files will be dumped to "%SYSTEMDRIVE%\\Intel\\CLIntercept\_Dump\\\". The filename will have the form "CLI\_\\_\\_source.cl". Program options that are passed to clBuildProgram() or clCompileProgram() will be dumped to the same directory with the filename "CLI\_\\_\\_\\_options.txt". This setting can be used for information purposes to see all kernels that are used by an application or to dump programs for program injection. This setting overrides DumpProgramSourceScript and SimpleDumpProgramSource, and if it is set to a nozero value then the values of DumpProgramSourceScript and SimpleDumpProgramSource will be ignored. + +##### `DumpInputProgramBinaries` (bool) + +If set to a nonzero value, the Intercept Layer for OpenCL Applications will dump every program binary that is passed to clCreateProgramWithBinary() to its own file. The files will be dumped to "%SYSTEMDRIVE%\\Intel\\CLIntercept\_Dump\\\". The filename will have the form "CLI\_\\_\\_\.bin". This is the input program binary provided by the application, and not a device binary queried from the OpenCL implementation. In particular, note that it may be a SPIR 1.2 binary. + +##### `DumpProgramBinaries` (bool) + +If set to a nonzero value, the Intercept Layer for OpenCL Applications will dump every program binary that was successfully built with clBuildProgram() to its own file. The files will be dumped to "%SYSTEMDRIVE%\\Intel\\CLIntercept\_Dump\\\". The filename will have the form "CLI\_\\_\\_\\_\.bin". Program options that are passed to clBuildProgram() or clCompileProgram() will be dumped to the same directory with the filename "CLI\_\\_\\_\\_options.txt". This setting can be used to examine compiled program binaries or to dump program binaries for program binary injection. Note that this option dumps the output binary, which is a device binary, after calling clBuildProgram(). + +##### `DumpProgramSPIRV` (bool) + +If set to a nonzero value, the Intercept Layer for OpenCL Applications will dump every program IL binary passed to clCreateProgramWithIL() to its own file. The files will be dumped to "%SYSTEMDRIVE%\\Intel\\CLIntercept\_Dump\\\". The filename will have the form "CLI\_\\_\\_0000.spv" - for now at least!. Program options that are passed to clBuildProgram() or clCompileProgram() will be dumped to the same directory with the filename "CLI\_\\_\\_\\_options.txt". This setting can be used for information purposes to see all kernels that are used by an application or to dump SPIRV programs for SPIRV injection. + +##### `InjectProgramSource` (bool) + +If set to a nonzero value, the Intercept Layer for OpenCL Applications will look to inject potentially modified kernel source to clCreateProgramWithSource() and/or potentially modified options to clBuildProgram(). + +##### `InjectProgramBinaries` (bool) + +If set to a nonzero value, the Intercept Layer for OpenCL Applications will look to inject potentially modified kernel binaries via clCreateProgramWithBinary() in place of program text for each call to clCreateProgramWithSource(). This is typically done to reduce program compilation time or to use known good program binaries. + +##### `RejectProgramBinaries` (bool) + +If set to a nonzero value, the Intercept Layer for OpenCL Applications will reject kernel binaries passed via clCreateProgramWithBinary() and return CL\_INVALID\_BINARY. This can be used to force an application to re-compile program binaries from source. + +##### `InjectProgramSPIRV` (bool) + +If set to a nonzero value, the Intercept Layer for OpenCL Applications will look to inject potentially modified kernel SPIR-V binaries via clCreateProgramWithIL() in place of program text for each call to clCreateProgramWithSource(). + +##### `PrependProgramSource` (bool) + +If set to a nonzero value, the Intercept Layer for OpenCL Applications will look to prepend kernel code from a file to the application provided kernel source passed to clCreateProgramWithSource(). The Intercept Layer for OpenCL Applications will look for kernel source to prepend in the directory "%SYSTEMDRIVE%\\Intel\\CLIntercept\_Dump\\\". The files that are searched for are (in order) "CLI\_\\_\\_prepend.cl", "CLI\_\\_prepend.cl", and "CLI\_prepend.cl". + +##### `AppendBuildOptions` (string) + +If set, the Intercept Layer for OpenCL Applications will add these build options to the end of any application provided or injected build options for each call to clBuildProgram(). + +##### `DumpProgramBuildLogs` (bool) + +If set to a nonzero value, the Intercept Layer for OpenCL Applications will dump build logs for every device a program is built for to a separate file. The files will be dumped to "%SYSTEMDRIVE%\\Intel\\CLIntercept\_Dump\\\". The filename will have the form "CLI\_\\_\\_\\_\\_build\_log.txt". + +### Controls for Automatically Creating SPIR-V Modules + +##### `AutoCreateSPIRV` (bool) + +If set to a nonzero value, the Intercept Layer for OpenCL Applications will automatically create SPIR-V modules by invoking CLANG each time a program is built. The files will be dumped to "%SYSTEMDRIVE%\\Intel\\CLIntercept\_Dump\\\". The filename will have the form "CLI\_\\_\\_\.spv". Because invoking CLANG requires a file containing the OpenCL C source, setting this option implicitly sets DumpProgramSource as well. Additionally, this feature is not available for injected program source. + +##### `SPIRVClang` (string) + +The clang executable used to compile an OpenCL C program to a SPIR-V module. This can be an executable in the system path, a relative path, or a full absolute path. + +##### `SPIRVCLHeader` (string) + +The OpenCL header file used to compile an OpenCL C program to a SPIR-V module. This must be a relative path or a full absolute path. + +##### `SPIRVDis` (string) + +The spirv-dis executable used to optionally disassemble the compiled SPIR-V module to a SPIR-V text representation. This can be an executable in the system path, a relative path, or a full absolute path. + +##### `DefaultOptions` (string) + +This is the list of options that is implicitly passed to CLANG to build a non-OpenCL 2.0 SPIR-V module. Any application-provided build options will be appended to these build options. + +##### `OpenCL2Options` (string) + +This is the list of options that is implicitly passed to CLANG to build an OpenCL 2.0 SPIR-V module. Any application-provided build options will be appended to these build options. + +### Controls for Dumping Buffers and Images + +##### `DumpArgumentsOnSet` (bool) + +If set to a nonzero value, the Intercept Layer for OpenCL Applications will dump the argument value on calls to clSetKernelArg(). Arguments are dumped as raw binary data to "%SYSTEMDRIVE%\\Intel\\CLIntercept\_Dump\\\\\SetKernelArg". The filenames will have the form "SetKernelArg\_\\_Kernel\_\\_Arg\_\.bin". + +##### `DumpBuffersAfterCreate` (bool) + +If set, the Intercept Layer for OpenCL Applications will dump buffers to a file after creation. This control still honors the enqueue counter limits, even though no enqueues are involved during buffer creation. Currently only works for cl\_mem buffers created from host pointers. + +##### `DumpBuffersAfterMap` (bool) + +If set, the Intercept Layer for OpenCL Applications will dump the contents of a buffer to a file after the buffer is mapped. Only valid if the buffer is NOT mapped with CL\_MAP\_WRITE\_INVALIDATE\_REGION. If the buffer was mapped non-blocking, this may insert a clFinish() into the command queue, which may have functional or performance implications. + +##### `DumpBuffersBeforeUnmap` (bool) + +If set, the Intercept Layer for OpenCL Applications will dump the contents of a buffer to a file immediately before the buffer is unmapped. This is done by inserting a blocking clEnqueueMapBuffer() (and matching clEnqueueUnmapMemObject()) into the command queue, which may have functional or performance implications. + +##### `DumpBuffersBeforeEnqueue` (bool) + +If set to a nonzero value, the Intercept Layer for OpenCL Applications will dump buffers before calls to clEnqueueNDRangeKernel(). Only buffers that are kernel arguments for the kernel being enqueued are dumped. Buffers are dumped as raw binary data to "%SYSTEMDRIVE%\\Intel\\CLIntercept\_Dump\\\\\memDumpPreEnqueue". The filenames will have the form "Enqueue\_\\_Kernel\_\\_Arg\_\\_Buffer\_\.bin". + +##### `DumpBuffersAfterEnqueue` (bool) + +If set to a nonzero value, the Intercept Layer for OpenCL Applications will dump buffers after calls to clEnqueueNDRangeKernel(). Only buffers that are kernel arguments for the kernel being enqueued are dumped. Buffers are dumped as raw binary data to "%SYSTEMDRIVE%\\Intel\\CLIntercept\_Dump\\\\\memDumpPostEnqueue". The filenames will have the form "Enqueue\_\\_Kernel\_\\_Arg\_\\_Buffer\_\.bin". Note that this is the same naming convention as with DumpBuffersBeforeEnqueue, so the changes resulting from an enqueue can be determined by diff'ing the preEnqueue folder with the postEnqueue folder. + +##### `DumpBuffersForKernel` (string) + +If set, the Intercept Layer for OpenCL Applications will only dump buffers when the specified kernel is enqueued. This control is ignored unless DumpBuffersBeforeEnqueue or DumpBuffersAfterEnqueue are enabled. + +##### `DumpImagesBeforeEnqueue` (bool) + +If set to a nonzero value, the Intercept Layer for OpenCL Applications will dump images before calls to clEnqueueNDRangeKernel(). Only images that are kernel arguments for the kernel being enqueued are dumped. Images are dumped as raw binary data to "%SYSTEMDRIVE%\\Intel\\CLIntercept\_Dump\\\\\memDumpPreEnqueue". The filenames will have the form "Enqueue\_\\_Kernel\_\\_Arg\_\\_Image\_\\_\x\x\\_\bpp.raw". + +##### `DumpImagesAfterEnqueue` (bool) + +If set to a nonzero value, the Intercept Layer for OpenCL Applications will dump images after calls to clEnqueueNDRangeKernel(). Only images that are kernel arguments for the kernel being enqueued are dumped. Images are dumped as raw binary data to "%SYSTEMDRIVE%\\Intel\\CLIntercept\_Dump\\\\\memDumpPostEnqueue". The filenames will have the form "Enqueue\_\\_Kernel\_\\_Arg\_\\_Image\_\\_\x\x\\_\bpp.raw". Note that this is the same naming convention as with DumpImagesBeforeEnqueue, so the changes resulting from an enqueue can be determined by diff'ing the preEnqueue folder with the postEnqueue folder. + +##### `DumpImagesForKernel` (string) + +If set, the Intercept Layer for OpenCL Applications will only dump image when the specified kernel is enqueued. This control is ignored unless DumpImagesBeforeEnqueue or DumpImagesAfterEnqueue are enabled. + +##### `DumpBuffersMinEnqueue` (cl_uint) + +The Intercept Layer for OpenCL Applications will only dump buffers when the enqueue counter is greater than this value, inclusive. + +##### `DumpBuffersMaxEnqueue` (cl_uint) + +The Intercept Layer for OpenCL Applications will only dump buffers when the enqueue counter is less than this value, inclusive. + +##### `DumpImagesMinEnqueue` (cl_uint) + +The Intercept Layer for OpenCL Applications will only dump images when the enqueue counter is greater than this value, inclusive. + +##### `DumpImagesMaxEnqueue` (cl_uint) + +The Intercept Layer for OpenCL Applications will only dump images when the enqueue counter is less than this value, inclusive. + +### AubCapture Controls + +##### `AubCapture` (bool) + +This is the master control for aub capture. The Intercept Layer for OpenCL Applications doesn't implement aub capture itself, but can be used to selectively enable and disable aub capture via kdc.exe. + +##### `AubCaptureIndividualEnqueues` (bool) + +If set, the Intercept Layer for OpenCL Applications will invoke kdc.exe to start aub capture before a kernel enqueue, and will also invoke kdc.exe to stop aub capture immediately after the kernel enqueue. Each .daf file will have the form "AubCapture\_Enqueue\_\\_kernel\_\.daf". Note that non-kernel enqueues such as calls to clEnqueueReadBuffer() and clEnqueueWriteBuffer() will NOT be aub captured when this control is set. The AubCaptureMinEnqueue and AubCaptureMaxEnqueue controls are still honored when AubCaptureIndividualEnqueues is set. + +##### `AubCaptureMinEnqueue` (cl_uint) + +The Intercept Layer for OpenCL Applications will only invoke kdc.exe to enable aub capture when the enqueue counter is greater than this value, inclusive. + +##### `AubCaptureMaxEnqueue` (cl_uint) + +The Intercept Layer for OpenCL Applications will invoke kdc.exe to stop aub capture when the encounter is greater than this value, meaning that only enqueues less than this value, inclusive, will be captured. If the enqueue counter never reaches this value, the Intercept Layer for OpenCL Applications will stop aub capture when the DLL is unloaded. + +##### `AubCaptureKernelName` (string) + +If set, the Intercept Layer for OpenCL Applications will only invoke kdc.exe to enable aub capture when the kernel name equals this name. + +##### `AubCaptureKernelGWS` (string) + +If set, the Intercept Layer for OpenCL Applications will only invoke kdc.exe to enable aub capture when the NDRange global work size matches this string. The string should have the form "XxYxZ". The wildcard "*" matches all global work sizes. + +##### `AubCaptureKernelLWS` (string) + +If set, the Intercept Layer for OpenCL Applications will only invoke kdc.exe to enable aub capture when the NDRange local work size matches this string. The string should have the form "XxYxZ". The wildcard "*" matches all local work sizes, and the string "NULL" matches a NULL local work size. + +##### `AubCaptureUniqueKernels` (bool) + +If set, the Intercept Layer for OpenCL Applications will only invoke kdc.exe to enable aub capture if the kernel signature (i.e. hash + kernelname + gws + lws) has not been seen already. The behavior of this control is well-defined when AubCaptureIndividualEnqueues is not set, but it doesn't make much sense without AubCaptureIndividualEnqueues. + +##### `AubCaptureNumKernelEnqueuesSkip` (cl_uint) + +The Intercept Layer for OpenCL Applications will skip this many kernel enqueues before invoking kdc.exe to enable aub capture. The behavior of this control is well-defined when AubCaptureIndividualEnqueues is not set, but it doesn't make much sense without AubCaptureIndividualEnqueues. + +##### `AubCaptureNumKernelEnqueuesCapture` (cl_uint) + +The Intercept Layer for OpenCL Applications will only capture this many kernel enqueues. The behavior of this control is well-defined when AubCaptureIndividualEnqueues is not set, but it doesn't make much sense without AubCaptureIndividualEnqueues. + +##### `AubCaptureStartWait` (cl_uint) + +The Intercept Layer for OpenCL Applications will wait for this many milliseconds before invoking kdc.exe to begin aub capture. + +##### `AubCaptureEndWait` (cl_uint) + +The Intercept Layer for OpenCL Applications will wait for this many milliseconds before invoking kdc.exe to end aub capture. + +### Execution Controls + +##### `NoErrors` (bool) + +If set to a nonzero value, the Intercept Layer for OpenCL Applications will cause all OpenCL APIs to return a successful error status. + +##### `FinishAfterEnqueue` (bool) + +If set to a nonzero value, the Intercept Layer for OpenCL Applications inserts a call to clFinish() after every enqueue. The command queue that the command was just enqueued to is passed to clFinish(). This can be used to debug possible timing or resource management issues and will likely impact performance. + +##### `FlushAfterEnqueue` (bool) + +If set to a nonzero value, the Intercept Layer for OpenCL Applications inserts a call to clFlush() after every enqueue. The command queue that the command was just enqueued to is passed to clFlush(). This can also be used to debug possible timing or resource management issues and is slightly less obtrusive than FinishAfterEnqueue but still will likely impact performance. If both FinishAfterEnqueue and FlushAfterEnqueue are nonzero then the Intercept Layer for OpenCL Applications will only insert a call to clFinish() after every enqueue, because clFinish() implies clFlush(). + +##### `FlushAfterEnqueueBarrier` (bool) + +If set to a nonzero value, the Intercept Layer for OpenCL Applications inserts a call to clFlush() after every barrier enqueue. The command queue that the command was just enqueued to is passed to clFlush(). This has been useful to debug out-of-order queue issues. + +##### `InOrderQueue` (bool) + +If set to a nonzero value, the Intercept Layer for OpenCL Applications will force all queues to be created in-order. This can be used for performance analysis, but may lead to deadlocks in some cases. + +##### `NullEnqueue` (bool) + +If set to a nonzero value, the Intercept Layer for OpenCL Applications will silently ignore any enqueue. This can be used for performance analysis, but will likely cause errors if the application relies on any sort of information from OpenCL events and should be used carefully. + +##### `NullLocalWorkSize` (bool) + +If set to a nonzero value, the Intercept Layer for OpenCL Applications will force the local work size argument to clEnqueueNDRangeKernel() to be NULL, which causes the OpenCL implementation to pick the local work size. Note that this control takes effect before NullLocalWorkSizeX / NullLocalWorkSizeY / NullLocalWorkSizeZ (see below), so enabling both controls will have the effect of forcing a specific local work size. + +##### `NullLocalWorkSizeX` (size_t) + +If set to a nonzero value, the Intercept Layer for OpenCL Applications will set the local work size that will be used if an application passes NULL as the local work size to clEnqueueNDRangeKernel(). 1D dispatches will only look at NullLocalWorkSizeX, 2D dispatches will only look at NullLocalWorkSizeX and NullLocalWorkSizeY, while 3D dispatches will look at NullLocalWorkSizeX, NullLocalWorkSizeY, and NullLocalWorkSizeZ. If the specified values for NullLocalWorkSize do not evenly divide the global work size then the specified values of NullLocalWorkSize will not take effect. + +##### `NullLocalWorkSizeY` (size_t) + +If set to a nonzero value, the Intercept Layer for OpenCL Applications will set the local work size that will be used if an application passes NULL as the local work size to clEnqueueNDRangeKernel(). 1D dispatches will only look at NullLocalWorkSizeX, 2D dispatches will only look at NullLocalWorkSizeX and NullLocalWorkSizeY, while 3D dispatches will look at NullLocalWorkSizeX, NullLocalWorkSizeY, and NullLocalWorkSizeZ. If the specified values for NullLocalWorkSize do not evenly divide the global work size then the specified values of NullLocalWorkSize will not take effect. + +##### `NullLocalWorkSizeZ` (size_t) + +If set to a nonzero value, the Intercept Layer for OpenCL Applications will set the local work size that will be used if an application passes NULL as the local work size to clEnqueueNDRangeKernel(). 1D dispatches will only look at NullLocalWorkSizeX, 2D dispatches will only look at NullLocalWorkSizeX and NullLocalWorkSizeY, while 3D dispatches will look at NullLocalWorkSizeX, NullLocalWorkSizeY, and NullLocalWorkSizeZ. If the specified values for NullLocalWorkSize do not evenly divide the global work size then the specified values of NullLocalWorkSize will not take effect. + +##### `InitializeBuffers` (bool) + +If set to a nonzero value, the Intercept Layer for OpenCL Applications will initialize the contents of allocated buffers with zero. Only valid for non-COPY\_HOST\_PTR and non-USE\_HOST\_PTR allocations. + +### Platform and Device Query Overrides + +##### `PlatformName` (string) + +If set to a non-empty value, the clGetPlatformInfo() query for CL\_PLATFORM\_NAME will return this string instead of the true platform name. + +##### `PlatformVendor` (string) + +If set to a non-empty value, the clGetPlatformInfo() query for CL\_PLATFORM\_VENDOR will return this string instead of the true platform vendor. + +##### `PlatformProfile` (string) + +If set to a non-empty value, the clGetPlatformInfo() query for CL\_PLATFORM\_PROFILE will return this string instead of the true platform profile. + +##### `PlatformVersion` (string) + +If set to a non-empty string, the clGetPlatformInfo() query for CL\_PLATFORM\_VERSION will return this string instead of the true platform version. + +##### `DeviceTypeFilter` (cl_uint) + +Hides all device types that are not in the filter. Note: CL\_DEVICE\_TYPE\_CPU = 2, CL\_DEVICE\_TYPE\_GPU = 4, CL\_DEVICE\_TYPE\_ACCELERATOR = 8, CL\_DEVICE\_TYPE\_CUSTOM = 16. + +##### `DeviceType` (cl_uint) + +If set to a non-zero value, the clGetDeviceInfo() query for CL\_DEVICE\_TYPE will return this value instead of the true device type. In addition, calls to clGetDeviceIDs() for this device type will return all devices, not just devices of the requested type. This can be used to enumerate all devices (even CPUs) as GPUs, or vice versa. + +##### `DeviceName` (string) + +If set to a non-empty string, the clGetDeviceInfo() query for CL\_DEVICE\_NAME will return this value instead of the true device name. + +##### `DeviceVendor` (string) + +If set to a non-empty string, the clGetDeviceInfo() query for CL\_DEVICE\_VENDOR will return this value instead of the true device vendor. + +##### `DeviceProfile` (string) + +If set to a non-empty string, the clGetDeviceInfo() query for CL\_DEVICE\_PROFILE will return this value instead of the true device profile. + +##### `DeviceVersion` (string) + +If set to a non-empty string, the clGetDeviceInfo() query for CL\_DEVICE\_VERSION will return this value instead of the true device version. + +##### `DeviceCVersion` (string) + +If set to a non-empty string, the clGetDeviceInfo() query for CL\_DEVICE\_OPENCL\_C\_VERSION will return this value instead of the true device version. + +##### `DeviceExtensions` (string) + +If set to a non-empty string, the clGetDeviceInfo() query for CL\_DEVICE\_EXTENSIONS will return this value instead of the true device extensions string. + +##### `DeviceVendorID` (cl_uint) + +If set to a non-zero value, the clGetDeviceInfo() query for CL\_DEVICE\_VENDOR will return this value instead of the true device vendor ID. + +##### `DeviceMaxComputeUnits` (cl_uint) + +If set to a non-zero value, the clGetDeviceInfo() query for CL\_DEVICE\_MAX\_COMPUTE\_UNITS will return this value instead of the true device max compute units. + +##### `DevicePreferredVectorWidthChar` (cl_uint) + +If set to a non-negative value, the clGetDeviceInfo() query for CL\_DEVICE\_PREFERRED\_VECTOR\_WIDTH\_CHAR will return this value instead of the true device preferred vector width. + +##### `DevicePreferredVectorWidthShort` (cl_uint) + +If set to a non-negative value, the clGetDeviceInfo() query for CL\_DEVICE\_PREFERRED\_VECTOR\_WIDTH\_SHORT will return this value instead of the true device preferred vector width. + +##### `DevicePreferredVectorWidthInt` (cl_uint) + +If set to a non-negative value, the clGetDeviceInfo() query for CL\_DEVICE\_PREFERRED\_VECTOR\_WIDTH\_INT will return this value instead of the true device preferred vector width. + +##### `DevicePreferredVectorWidthLong` (cl_uint) + +If set to a non-negative value, the clGetDeviceInfo() query for CL\_DEVICE\_PREFERRED\_VECTOR\_WIDTH\_LONG will return this value instead of the true device preferred vector width. + +##### `DevicePreferredVectorWidthHalf` (cl_uint) + +If set to a non-negative value, the clGetDeviceInfo() query for CL\_DEVICE\_PREFERRED\_VECTOR\_WIDTH\_HALF will return this value instead of the true device preferred vector width. + +##### `DevicePreferredVectorWidthFloat` (cl_uint) + +If set to a non-negative value, the clGetDeviceInfo() query for CL\_DEVICE\_PREFERRED\_VECTOR\_WIDTH\_FLOAT will return this value instead of the true device preferred vector width. + +##### `DevicePreferredVectorWidthDouble` (cl_uint) + +If set to a non-negative value, the clGetDeviceInfo() query for CL\_DEVICE\_PREFERRED\_VECTOR\_WIDTH\_DOUBLE will return this value instead of the true device preferred vector width. + +### Precompiled Kernel and Builtin Kernel Override Controls + +##### `ForceByteBufferOverrides` (bool) + +If set to a nonzero value, each of the buffer functions that are overridden (via one or more of the keys below) will use a byte-wise operation to read/write/copy the buffer (default behavior is to try to copy multiple bytes at a time, if possible). Note: Requires OpenCL 1.1 or the "byte addressable store" extension. + +##### `OverrideReadBuffer` (bool) + +If set to a nonzero value, the Intercept Layer for OpenCL Applications will use a kernel to implement clEnqueueReadBuffer() instead of the implementation's clEnqueueReadBuffer(). Note: Requires OpenCL 1.1 or the "byte addressable store" extension. + +##### `OverrideWriteBuffer` (bool) + +If set to a nonzero value, the Intercept Layer for OpenCL Applications will use a kernel to implement clEnqueueWriteBuffer() instead of the implementation's clEnqueueWriteBuffer(). Note: Requires OpenCL 1.1 or the "byte addressable store" extension. + +##### `OverrideCopyBuffer` (bool) + +If set to a nonzero value, the Intercept Layer for OpenCL Applications will use a kernel to implement clEnqueueCopyBuffer() instead of the implementation's clEnqueueCopyBuffer(). Note: Requires OpenCL 1.1 or the "byte addressable store" extension. + +##### `OverrideReadImage` (bool) + +If set to a nonzero value, the Intercept Layer for OpenCL Applications will use a kernel to implement clEnqueueReadImage() instead of the implementation's clEnqueueReadImage(). Only 2D images are currently supported. + +##### `OverrideWriteImage` (bool) + +If set to a nonzero value, the Intercept Layer for OpenCL Applications will use a kernel to implement clEnqueueWriteImage() instead of the implementation's clEnqueueWriteImage(). Only 2D images are currently supported. + +##### `OverrideCopyImage` (bool) + +If set to a nonzero value, the Intercept Layer for OpenCL Applications will use a kernel to implement clEnqueueCopyImage() instead of the implementation's clEnqueueCopyImage(). Only 2D images are currently supported. + +##### `OverrideBuiltinKernels` (bool) + +If set to a nonzero value, the Intercept Layer for OpenCL Applications will use its own version of the built-in OpenCL kernels that may be accessed via clCreateProgramWithBuiltInKernels(). At present, only the VME block\_motion\_estimate\_intel kernel is implemented. + +### SIMD Survey Controls + +##### `SIMDSurvey` (bool) + +Executes a SIMD survey state machine. The general idea of the SIMD survey state machine is to create and manage three additional kernels for each actual OpenCL kernel, one for each SIMD size. Then, execute and time the three kernels, and choose the fastest for subsequent executions. + +##### `SIMDSurveyWarmupIterations` (cl_uint) + +This is the number of NDRanges that the SIMD survey state machine ignores before starting to time the SIMD survey. + +##### `SIMDSurveySIMD8Option` (string) + +This is the build option that is pre-pended to the application-specified build options to create the SIMD8 kernel. + +##### `SIMDSurveySIMD16Option` (string) + +This is the build option that is pre-pended to the application-specified build options to create the SIMD16 kernel. + +##### `SIMDSurveySIMD32Option` (string) + +This is the build option that is pre-pended to the application-specified build options to create the SIMD32 kernel. + +##### `SIMDOracle` (bool) + +[Note: Not currently implemented, but the idea behind the SIMD oracle is to save the best SIMD size from run-to-run, so the full SIMD survey does not need to be re-executed.] + + +--- + +\* Other names and brands may be claimed as the property of others. + +Copyright (c) 2018, Intel(R) Corporation diff --git a/docs/images/chrome_tracing_detail.png b/docs/images/chrome_tracing_detail.png new file mode 100644 index 00000000..abe7e144 Binary files /dev/null and b/docs/images/chrome_tracing_detail.png differ diff --git a/docs/images/chrome_tracing_empty.png b/docs/images/chrome_tracing_empty.png new file mode 100644 index 00000000..ee69ca88 Binary files /dev/null and b/docs/images/chrome_tracing_empty.png differ diff --git a/docs/images/chrome_tracing_example.png b/docs/images/chrome_tracing_example.png new file mode 100644 index 00000000..f32bb161 Binary files /dev/null and b/docs/images/chrome_tracing_example.png differ diff --git a/docs/images/cmake_itt.png b/docs/images/cmake_itt.png new file mode 100644 index 00000000..2149bfd6 Binary files /dev/null and b/docs/images/cmake_itt.png differ diff --git a/docs/images/vtune_config.png b/docs/images/vtune_config.png new file mode 100644 index 00000000..bacf23a8 Binary files /dev/null and b/docs/images/vtune_config.png differ diff --git a/docs/images/vtune_device_timing.png b/docs/images/vtune_device_timing.png new file mode 100644 index 00000000..3d32602b Binary files /dev/null and b/docs/images/vtune_device_timing.png differ diff --git a/docs/images/vtune_output.png b/docs/images/vtune_output.png new file mode 100644 index 00000000..adc75776 Binary files /dev/null and b/docs/images/vtune_output.png differ diff --git a/docs/injecting_programs.md b/docs/injecting_programs.md new file mode 100644 index 00000000..8f23818e --- /dev/null +++ b/docs/injecting_programs.md @@ -0,0 +1,95 @@ +# Using the Intercept Layer for OpenCL Applications to Inject Programs + +Program Injection allows OpenCL programs or program options to be selectively +modified for debugging or performance analysis without application knowledge. +The process for injecting programs is not particularly complicated but requires +several steps. + +## Step 1: Dump Programs and Program Options for Injection + +Run your application with DumpProgramSource enabled. This dumps all programs to +the directory + + %SYSTEMDRIVE%\Intel\CLIntercept_Dump\ (Windows) + +or: + + ~/CLIntercept_Dump/ (Linux) + +This directory will also be used for program injection. + +## Step 2: Find the Program or Program Options to Modify + +Look through the dump directory to find the program(s) or program options you'd +like to modify. Note that a program options file may not exist for every program +source file if the application did not provide options to clBuildProgram(). If +this is the case, simply create a new program options file. + +## Step 3: Copy the Program or Program Options to Modify + +Copy the program(s) or program options you'd like to modify to the directory: + + %SYSTEMDRIVE%\Intel\CLIntercept_Dump\\Inject (Windows) + +or: + + ~/CLIntercept_Dump//Inject (Linux) + +This is the directory that is searched when looking for programs to inject. Note +that this is a subdirectory of the dump directory. + +If the application compiles programs deterministically the program(s) or program +options can be copied unchanged. If the application compiles programs +non-deterministically, you may need rename the programs to modify to remove the +program number or compile count from the filename. + +The Intercept Layer for OpenCL Applications searches for program source filenames +to inject in this order: + +* `CLI___source.cl` - This is the default filename dumped + by DumpProgramSource. +* `CLI__source.cl` - This is the default filename with the program number + removed, so the order the application calls clCreateProgramWithSource() does + not matter. + +The Intercept Layer for OpenCL Applications searches for program option filenames +to inject in this order: + +* `CLI____options.txt` - This is the default filename + dumped by DumpProgramSource. +* `CLI___options.txt` - This is the default filename with the program + number removed, so the order the application calls clCreateProgramWithSource() + does not matter. +* `CLI__options.txt` - This has both the program number and compile count + removed, so it will apply the same options every time the program is built. +* `CLI_options.txt` - This injects the same options globally, for all programs, + unless one of the program-specific filenames exists. + +## Step 4: Modify the Program + +Modify the program as desired. Ideas: Change the program source to a more optimal +code sequence. Switch conformant built-ins to native built-ins. Add program +attributes, e.g. for required work group size. + +## Step 5: Set the InjectProgramSource Registry Key and Go! + +If all goes well you will see a line similar to + + Injecting source file: + +or: + + Injecting options file: + +in your log. + +## Notes: + +* The instructions above describe how to dump and inject program source, but you + can also dump and inject program binaries or SPIR-V intermediate representation. + +--- + +\* Other names and brands may be claimed as the property of others. + +Copyright (c) 2018, Intel(R) Corporation diff --git a/docs/install.md b/docs/install.md new file mode 100644 index 00000000..5022aeef --- /dev/null +++ b/docs/install.md @@ -0,0 +1,129 @@ +# How to Install the Intercept Layer for OpenCL Applications + +There are multiple ways to install the Intercept Layer for OpenCL Applications: + +## Windows + +### Local Install + +The easiest (and least obtrusive!) way to install the Intercept Layer for +OpenCL Applications is to: + +1. Put the Intercept Layer for OpenCL Applications OpenCL.dll into your + application's working directory, typically the directory with the + application executable. Since DLLs are often loaded from the current + working directory before other directories in the system path, the + Intercept Layer for OpenCL Applications OpenCL.dll will be loaded + instead of the real OpenCL.dll. +2. To uninstall, simply delete the Intercept Layer for OpenCL Applications + OpenCL.dll from the application's working directory. + +### Global Install + +To install the Intercept Layer for OpenCL Applications globally (for all +OpenCL applications): + +1. Rename your existing OpenCL.dll (typically in c:\windows\system32 for + 32-bit systems or 64-bit DLLs on 64-bit systems, or c:\windows\syswow64 + for 32-bit DLLs on 64-bit systems). + * You may need to rename your existing OpenCL.dll from safe mode, or + from a command prompt with administrative privileges. + * If you rename your existing DLL to real_OpenCL.dll then the renamed + DLL will be automatically loaded by the Intercept Layer for OpenCL + Applications, otherwise you'll need to tell the Intercept Layer for + OpenCL Applications what your real DLL name is. See below. +2. After renaming your real OpenCL.dll, copy the Intercept Layer for + OpenCL Applications version of OpenCL.dll in its place. +3. To uninstall the Intercept Layer for OpenCL Applications using this + method, reverse the steps: First, delete the Intercept Layer for OpenCL + Applications version of OpenCL.dll, then rename the real OpenCL.dll + back to OpenCL.dll. + +This method also works for applications that load OpenCL.dll from an explicit path. + +## Linux + +### Global Install + +1. Find the location of the real icd loader library (libOpenCL.so): + + sudo find . -name libOpenCL* + + To find the libraries and follow symbolic links in one go use: + + sudo find . -name libOpenCL* | while read -r line; do ll "$line"; done + +2. Rename the real icd loader library: + + sudo mv /path/to/lib/libOpenCL.so.1.2 path/to/lib/real_libOpenCL.so.1.2 + +3. Create a symbolic link from real icd loader library to the Intercept Layer for OpenCL Applications library: + + sudo ln -s /path/to/CLIBin/builds/x86_64/libOpenCL.so.1 /path/to/lib/libOpenCL.so.1.2 + +4. Create a config file to control the Intercept Layer for OpenCL Applications. + Behavior is controlled via a config file on the user's root folder (~). To + change the behavior create/edit the configuration file and set the value + for the desired options. Refer to the list below for the available options. + To create the config file or open it for edit: + + gedit ~/clintercept.conf + + Sample content: + + DllName=path/to/lib/real_libOpenCL.so.1 + LogToFile=1 // Enable LogToFile feature + CallLogging=1 // Enable CallLogging feature + +5. Run an OpenCL application, and output will be in ~/CLIntercept_Dump/AppName + +### Targeted Usage + +To intercept many Linux OpenCL applications, instrumentation can be performed +using only environment variables. If the application specifies an rpath or +otherwise circumvents the OS's method of identifying an appropriate +libOpenCL.so, this method won't work. Example: + + LD_LIBRARY_PATH=/path/to/clintercept/build/output CLI_DLLName=/opt/intel/opencl/libOpenCL.so \ + CLI_DumpProgramSource=1 ./oclapplication + +## Mac OSX - Experimental + +The Intercept Layer for OpenCL Applications on OSX uses an OS capability called +"interposition" to intercept OpenCL calls. As such, there is no "global +install" for OSX. To use the Intercept Layer for OpenCL Applications on OSX, +run your application with the environment variable DYLD_INSERT_LIBRARIES set +to the full path to the CLIntercept library. For example: + + OSX Command Prompt$ DYLD_INSERT_LIBRARIES=/full/path/to/clIntercept/OpenCL ./HelloWorld + +## Android - Experimental + +Only global install was tested + +1. on target: + + cd /system/vendor/lib + mv libOpenCL.so real_libOpenCL.so + +2. on host: + + adb push clIntercept.so /system/vendor/lib/libOpenCL.so + +3. configuration file will be in $HOME/clintercept.conf. If $HOME variable is + undefined (GUI application) it is in /sdcard/clintercept.conf. + + Sample config: + + LogToFile=1 + CallLogging=0 + HostPerformanceTiming=1 + DevicePerformanceTiming=1 + HostPerformanceTimeLogging=1 + DevicePerformanceTimeLogging=1 + +--- + +\* Other names and brands may be claimed as the property of others. + +Copyright (c) 2018, Intel(R) Corporation diff --git a/docs/vtune_logging.md b/docs/vtune_logging.md new file mode 100644 index 00000000..17171f1b --- /dev/null +++ b/docs/vtune_logging.md @@ -0,0 +1,77 @@ +# Using the Intercept Layer for OpenCL Applications with Intel(R) VTune(tm) Amplifier XE + +The Intercept Layer for OpenCL Applications can be built with support for the +Intel(R) VTune(tm) [Instrumentation and Tracing Technology (ITT)][itt] APIs, which +can add information about OpenCL-related calls to VTune timegraphs. This document +describes how to build the Intercept Layer for OpenCL Applications with ITT support, +and how to set controls and VTune to display OpenCL events captured by the +Intercept Layer for OpenCL Applications. + +## Why Use the Intercept Layer for OpenCL Applications? + +Recent versions of VTune supports tracing OpenCL programs natively, however you still +may want to use CLIntercept's VTune integration: + +* The VTune OpenCL tracing works for Intel(R) Processor Graphics only, however the + Intercept Layer for OpenCL Applications's OpenCL tracing works for all OpenCL + devices, including non-Intel OpenCL devices. +* The VTune OpenCL tracing works for a subset of OpenCL calls, however the Intercept + Layer for OpenCL Applications's OpenCL tracing works for all OpenCL calls, including + some non-API calls such as event callbacks. +* The Intercept Layer for OpenCL Applications's OpenCL tracing is very lightweight. + +## Building the Intercept Layer for OpenCL Applications with ITT Support + +To build the CLIntercept Intercept Layer for OpenCL Applications with ITT support, +be sure the "ENABLE_ITT" box is checked when configuring CMake: + +![CMake GUI ITT](images/cmake_itt.png) + +You'll also want to be sure that the VTUNE_INCLUDE_DIR and VTUNE_ITTNOTIFY_LIB +variables are detected correctly. They may be setup automatically, but for systems with +non-standard install paths they may need to be setup manually. + +So long as this box is checked and the include and lib variables are setup correctly, +the Intercept Layer for OpenCL Applications should build without errors or warnings and +include ITT support. + +## Configuring the Intercept Layer for OpenCL Applications for VTune + +The main control to configure the Intercept Layer for OpenCL Applications for VTune +is `ITTCallLogging`. This adds API call entry and exit information to VTune timegraphs. + +## Configuring VTune for the Intercept Layer for OpenCL Applications + +To configure VTune for the Intercept Layer for OpenCL Applications, checked the box for +'Analyze user tasks, events, and counters', since the Intercept Layer for OpenCL +Applications's ITT call logging is considered a "user event". + +You do not need to check the box for 'Trace OpenCL and Media SDK programs (Intel Graphics +only)'. This enables the native VTune support for OpenCL tracing. If this box is checked +you'll likely see some OpenCL calls twice on the timegraph, since both the Intercept Layer +for OpenCL Applications and native VTune tracing will be enabled. + +![VTune Config](images/vtune_config.png) + +## Example VTune Output with the Intercept Layer for OpenCL Applications + +Here is example output with ITTCallLogging. Note that all API calls are logged, including +an event callback on a separate thread: + +![VTune Output](images/vtune_output.png) + +## Limitations and Restrictions + +The biggest limitation at the moment is that you don't get any "device timing" information +in VTune, you only get information about host API calls. This means that you won't see +output like the picture below that you'd get with the native VTune tracing: + +![VTune Device Timing](images/vtune_device_timing.png) + +--- + +\* Other names and brands may be claimed as the property of others. + +Copyright (c) 2018, Intel(R) Corporation + +[itt]: https://software.intel.com/en-us/node/544195 diff --git a/resource/clIntercept.rc b/resource/clIntercept.rc new file mode 100644 index 00000000..8a35445a Binary files /dev/null and b/resource/clIntercept.rc differ diff --git a/resource/clIntercept_resource.h b/resource/clIntercept_resource.h new file mode 100644 index 00000000..ee901f48 --- /dev/null +++ b/resource/clIntercept_resource.h @@ -0,0 +1,17 @@ +//{{NO_DEPENDENCIES}} +// Microsoft Visual C++ generated include file. +// Used by clIntercept.rc +// +#define IDR_TEXT_PRECOMPILED_KERNELS 101 +#define IDR_TEXT_BUILTIN_KERNELS 102 + +// Next default values for new objects +// +#ifdef APSTUDIO_INVOKED +#ifndef APSTUDIO_READONLY_SYMBOLS +#define _APS_NEXT_RESOURCE_VALUE 103 +#define _APS_NEXT_COMMAND_VALUE 40001 +#define _APS_NEXT_CONTROL_VALUE 1001 +#define _APS_NEXT_SYMED_VALUE 101 +#endif +#endif diff --git a/scripts/generate_controls_doc.py b/scripts/generate_controls_doc.py new file mode 100644 index 00000000..cf10bf05 --- /dev/null +++ b/scripts/generate_controls_doc.py @@ -0,0 +1,207 @@ +# Copyright (c) 2018 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import sys +import os +import re + +# File Header: +def GetHeader(): + return """# How to Use the Intercept Layer for OpenCL Applications + +This file is automatically generated using the script `generate_controls_doc.py`. +Please do not edit it manually! + +By default, the Intercept Layer for OpenCL Applications will not modify any OpenCL +calls. You may notice some status messages being printed to stderr, but otherwise +your application should run exactly as it does without the Intercept Layer for +OpenCL Applications. + +## Controls + +The Intercept Layer for OpenCL Applications is controlled using the Windows +registry, Linux configuration files, or environment variables on all OSes. + +### Windows Registry + +On Windows, the Intercept Layer for OpenCL Applications reads its registry keys +from: + + HKEY_CURRENT_USER\SOFTWARE\INTEL\IGFX\CLIntercept + +This is the recommended registry location as it has several advantages over +HKEY_LOCAL_MACHINE: modifying the registry keys does not require Administrator +access, registry keys do not need to be set in multiple places, and each user +can set their own registry keys without affecting other users. + +For backwards compatibility, the Intercept Layer for OpenCL applications +will still read registry keys from: + + // For 32-bit systems, or 64-bit applications on a 64-bit system: + HKEY_LOCAL_MACHINE\SOFTWARE\INTEL\IGFX\CLIntercept + + // For 32-bit applications on a 64-bit system: + HKEY_LOCAL_MACHINE\SOFTWARE\WoW6432Node\INTEL\IGFX\CLIntercept + +If a registry is set in both HKCU and HKLM, the setting in HKCU will "win". + +### Linux Configuration Files + +On Linux, the Intercept Layer for OpenCL Applications will read control values +from ~/clintercept.conf. Controls may be set by putting the control on its own +line, followed by an equals sign, followed by the value you'd like to set the +option to. Lines that begin with a semi-colon(";"), a hash mark ("#"), or a +C++-style comment ("//") are ignored. For example, to enable CallLogging, put +a line in your ~/clintercept.conf that looks like: + + // Enable CallLogging: + CallLogging=1 + +### Environment Variables + +The Intercept Layer for OpenCL may be controlled using environment variables. +The name of the environment variable control is "CLI_" and the control name, to +distinguish controls from other environment variables, and to make it easy to +list all of the environment variable controls. So, to enable CallLogging, you +could type: + + export CLI_CallLogging=1 + +To disable CallLogging, you could type: + + unset CLI_CallLogging + +To list all environment variable controls, you could type: + + env | grep CLI_ + +### Setup and Loading Controls + +##### `DllName` (string) + +Used to control the DLL or Shared Library that the Intercept Layer for OpenCL Applications loads to make real OpenCL calls. If present, only this DLL name is loaded. If omitted, the Intercept Layer for OpenCL Applications tries to load the real OpenCL DLL from file names in this order: + +- real_OpenCL.dll (anywhere in the system path) +- %WINDIR%\SysWOW64\OpenCL.dll (32-bit DLLs only) +- %WINDIR%\System32\OpenCL.dll + +##### `BreakOnLoad` (bool) + +If set to a nonzero value, the Intercept Layer for OpenCL Applications will break into the debugger when it is loaded. + +""" + +# File Footer: +def GetFooter(): + return """ +--- + +\* Other names and brands may be claimed as the property of others. + +Copyright (c) 2018, Intel(R) Corporation +""" + +printHelp = False + +if len(sys.argv) < 3: + nameControlsDoc = '../docs/controls.md' +else: + nameControlsDoc = sys.argv[2] + +if len(sys.argv) < 2: + nameControlsSrc = '../Src/controls.h' +else: + nameControlsSrc = sys.argv[1] + +if ( len(sys.argv) == 2 ) and ( sys.argv[1] == '-h' or sys.argv[1] == '-?' ): + printHelp = True + +if printHelp: + print('usage: generate_controls_doc.py {optional: nameControlsSrc, default: ../Src/controls.h} {optional: nameControlsDoc, default: ../docs/controls.md}') +elif not os.path.exists(nameControlsSrc): + print('error: controls source file ' + nameControlsSrc + ' does not exist!') +else: + print('Generating from source: ' + nameControlsSrc) + + srcFile = open(nameControlsSrc, 'rU') + docFile = open(nameControlsDoc, 'w') + + docFile.write( GetHeader() ) + + numberOfSeparators = 0 + numberOfControls = 0 + + for line in srcFile: + separator = re.search("^CLI_CONTROL_SEPARATOR\((.*)\)$", line) + if separator: + full = separator.group(1).strip() + + # Remove the last character of the string (":"): + full = full[:-1]; + + docFile.write('### ' + full + '\n\n') + + #print('Got separator: ' + full) + + numberOfSeparators = numberOfSeparators + 1 + continue + + control = re.search("^CLI_CONTROL\((.*)\)$", line) + if control: + full = control.group(1).strip() + + data = full.split(',', 3) + type = data[0].strip() + name = data[1].strip() + default = data[2].strip() + description = data[3].strip() + + # Remove std:: from the type: + type = type.replace("std::", "") + + # Remove the first and last characters of the description: + description = description[1:-1]; + # Replace escaped quotes with real quotes: + description = description.replace("\\\"", "\"") + # Replace underscores with escaped underscores: + description = description.replace("_", "\\_") + # Escape angle brackets: + description = description.replace("<", "\<") + description = description.replace(">", "\>") + + docFile.write('##### `' + name + '` (' + type + ')\n\n') + docFile.write(description + '\n\n') + + #print('Got control: ' + name) + #print(' description: ' + description) + + numberOfControls = numberOfControls + 1 + continue + + #print('Not sure what to do with: ' + line) + + docFile.write( GetFooter() ) + + srcFile.close() + docFile.close() + + print('Successfully generated file: ' + nameControlsDoc) + print('Found ' + str(numberOfSeparators) + ' control separators.') + print('Found ' + str(numberOfControls) + ' controls.')