From b04926956a17284aad7b66872c04e86520b4d074 Mon Sep 17 00:00:00 2001 From: MoFtZ Date: Sun, 14 Jan 2024 23:24:36 +1100 Subject: [PATCH] Added detection support for pre-generated LibDevice PTX. --- .gitignore | 1 + Src/ILGPU/Backends/PTX/PTXBackend.cs | 28 +++++++++- Src/ILGPU/Backends/PTX/PTXLibDevicePtx.tt | 37 +++++++++++-- .../RuntimeErrorMessages.Designer.cs | 18 ++++++ Src/ILGPU/Resources/RuntimeErrorMessages.resx | 10 +++- Src/ILGPU/Static/CudaLibDevicePtx.xml | 2 +- .../CudaDriverVersionUtils.tt | 55 +++++++++++++++++++ .../CudaGenerateLibDeviceTool.csproj | 35 ++++++++++++ .../MinimalCudaAPI.cs | 50 +++++++++++++++++ Tools/CudaGenerateLibDeviceTool/Program.cs | 10 +++- 10 files changed, 235 insertions(+), 11 deletions(-) create mode 100644 Tools/CudaGenerateLibDeviceTool/CudaDriverVersionUtils.tt create mode 100644 Tools/CudaGenerateLibDeviceTool/MinimalCudaAPI.cs diff --git a/.gitignore b/.gitignore index 3f1e78435f..f48cad5930 100644 --- a/.gitignore +++ b/.gitignore @@ -286,6 +286,7 @@ Src/ILGPU/Static/DllImports.cs Src/ILGPU/StrideTypes.cs Src/ILGPU/Util/DataBlocks.cs Src/ILGPU/Util/PrimitiveDataBlocks.cs +Tools/CudaGenerateLibDeviceTool/CudaDriverVersionUtils.cs # Ignore specific template outputs (Algorithms) Src/ILGPU.Algorithms/AlgorithmContextMappings.cs diff --git a/Src/ILGPU/Backends/PTX/PTXBackend.cs b/Src/ILGPU/Backends/PTX/PTXBackend.cs index c8c9880000..465db50dbe 100644 --- a/Src/ILGPU/Backends/PTX/PTXBackend.cs +++ b/Src/ILGPU/Backends/PTX/PTXBackend.cs @@ -14,6 +14,7 @@ using ILGPU.IR; using ILGPU.IR.Analyses; using ILGPU.IR.Transformations; +using ILGPU.Resources; using ILGPU.Runtime; using ILGPU.Runtime.Cuda; using ILGPU.Util; @@ -21,6 +22,7 @@ using System.Collections; using System.Collections.Generic; using System.Diagnostics.CodeAnalysis; +using System.Linq; namespace ILGPU.Backends.PTX { @@ -405,8 +407,10 @@ private void GenerateLibDeviceCode( StringComparison.Ordinal); builder.KernelBuilder.Append(compiledString); } - else + else if (Architecture >= PTXLibDevicePtx.MinArchtecture && + InstructionSet >= PTXLibDevicePtx.MinInstructionSet) { + // If supported, use the pre-generated LibDevice PTX code. var ptxModules = InlineList.Create(backendContext.Count); PTXLibDevicePtx.GetPtx( enumerator.AsEnumerable(), @@ -416,6 +420,28 @@ private void GenerateLibDeviceCode( builder.AddModule(ptxModules.AsReadOnlySpan()); builder.KernelBuilder.AppendLine(ptxDeclarations); } + else if (enumerator.AsEnumerable().FirstOrDefault() != null) + { + // Handle any issues if a LibDevice function is used. + if (Architecture >= PTXLibDevicePtx.MinArchtecture) + { + // The architecture is supported, but is using an older instruction + // set. Can be solved by a driver update. + var minDriverVersion = CudaDriverVersionUtils.GetMinimumDriverVersion( + InstructionSet); + throw new NotSupportedException(string.Format( + RuntimeErrorMessages.NotSupportedLibDevicePreGeneratedNewer, + minDriverVersion.Major, + minDriverVersion.Minor)); + } + else + { + // The architecture is too older for the pre-generated LibDevice PTX. + // Inform the user to manually initialize LibDevice. + throw new NotSupportedException( + RuntimeErrorMessages.NotSupportedLibDeviceNotInitialized); + } + } } #endregion diff --git a/Src/ILGPU/Backends/PTX/PTXLibDevicePtx.tt b/Src/ILGPU/Backends/PTX/PTXLibDevicePtx.tt index eccffabca1..c13c507f47 100644 --- a/Src/ILGPU/Backends/PTX/PTXLibDevicePtx.tt +++ b/Src/ILGPU/Backends/PTX/PTXLibDevicePtx.tt @@ -20,9 +20,10 @@ <#@ output extension=".cs" #> <# string rootPath = Host.ResolvePath("."); -var functions = LibDevicePtxFunctions.Load(rootPath, "../../Static/CudaLibDevicePtx.xml"); +var lib = LibDevicePtxFunctions.Load(rootPath, "../../Static/CudaLibDevicePtx.xml"); #> +using ILGPU.Runtime.Cuda; using ILGPU.Util; using System.Collections.Generic; using System.Text; @@ -36,10 +37,22 @@ namespace ILGPU.Backends.PTX /// internal static class PTXLibDevicePtx { + /// + /// Minimum architecture required to use the pre-generated PTX. + /// + public static CudaArchitecture MinArchtecture { get; } = + new CudaArchitecture(<#= lib.MinArchitectureMajor #>, <#= lib.MinArchitectureMinor #>); + + /// + /// Minimum instruction set required to use the pre-generated PTX. + /// + public static CudaInstructionSet MinInstructionSet { get; } = + new CudaInstructionSet(<#= lib.MinInstructionSetMajor #>, <#= lib.MinInstructionSetMinor #>); + #region Generated PTX <# - foreach (var func in functions) + foreach (var func in lib.Functions) { PushIndent(2); WriteLibDeviceFunctionPtx(func); @@ -54,7 +67,7 @@ namespace ILGPU.Backends.PTX { <# PushIndent(4); - foreach (var func in functions) + foreach (var func in lib.Functions) WriteLine($"{{ \"{func.Name}\", {func.Name} }},"); PopIndent(); #> @@ -65,7 +78,7 @@ namespace ILGPU.Backends.PTX { <# PushIndent(4); - foreach (var func in functions) + foreach (var func in lib.Functions) WriteLine($"{{ \"{func.Name}\", {func.Name}{DeclarationSuffix} }},"); PopIndent(); #> @@ -135,8 +148,20 @@ void WriteLibDeviceFunctionPtx(LibDevicePtxFunction func) [XmlRoot("LibDevicePtx")] public class LibDevicePtxFunctions { - internal static LibDevicePtxFunction[] Load(string rootPath, string fileName) => - XmlHelper.Load(rootPath, fileName).Functions; + internal static LibDevicePtxFunctions Load(string rootPath, string fileName) => + XmlHelper.Load(rootPath, fileName); + + [XmlAttribute] + public int MinArchitectureMajor { get; set; } + + [XmlAttribute] + public int MinArchitectureMinor { get; set; } + + [XmlAttribute] + public int MinInstructionSetMajor { get; set; } + + [XmlAttribute] + public int MinInstructionSetMinor { get; set; } [XmlElement("Function")] public LibDevicePtxFunction[] Functions { get; set; } diff --git a/Src/ILGPU/Resources/RuntimeErrorMessages.Designer.cs b/Src/ILGPU/Resources/RuntimeErrorMessages.Designer.cs index 7c6931ffb4..4f5129acea 100644 --- a/Src/ILGPU/Resources/RuntimeErrorMessages.Designer.cs +++ b/Src/ILGPU/Resources/RuntimeErrorMessages.Designer.cs @@ -327,6 +327,24 @@ internal static string NotSupportedLibDeviceNotFoundNvvmDll { } } + /// + /// Looks up a localized string similar to Cannot find LibDevice implementation. Ensure that LibDevice is enabled from the ContextBuilder.. + /// + internal static string NotSupportedLibDeviceNotInitialized { + get { + return ResourceManager.GetString("NotSupportedLibDeviceNotInitialized", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Cannot find LibDevice implementation. Upgrade to Cuda driver >= v{0}.{1} or override LibDevice from the ContextBuilder.. + /// + internal static string NotSupportedLibDevicePreGeneratedNewer { + get { + return ResourceManager.GetString("NotSupportedLibDevicePreGeneratedNewer", resourceCulture); + } + } + /// /// Looks up a localized string similar to Type '{0}' is not blittable. /// diff --git a/Src/ILGPU/Resources/RuntimeErrorMessages.resx b/Src/ILGPU/Resources/RuntimeErrorMessages.resx index d2b8d88192..698b04d9ea 100644 --- a/Src/ILGPU/Resources/RuntimeErrorMessages.resx +++ b/Src/ILGPU/Resources/RuntimeErrorMessages.resx @@ -241,9 +241,15 @@ Unknown parent accelerator - Velocity accelerator requires 64-bit application ({0} not supported). Ensure Prefer32Bit is set to 'false' + Velocity accelerator requires 64-bit application ({0} not supported). Ensure Prefer32Bit is set to 'false' - The Velocity accelerator supports little-endian machines only + The Velocity accelerator supports little-endian machines only + + + Cannot find LibDevice implementation. Ensure that LibDevice is enabled from the ContextBuilder. + + + Cannot find LibDevice implementation. Upgrade to Cuda driver >= v{0}.{1} or override LibDevice from the ContextBuilder. \ No newline at end of file diff --git a/Src/ILGPU/Static/CudaLibDevicePtx.xml b/Src/ILGPU/Static/CudaLibDevicePtx.xml index 7d6e5eda03..ac2cf48428 100644 --- a/Src/ILGPU/Static/CudaLibDevicePtx.xml +++ b/Src/ILGPU/Static/CudaLibDevicePtx.xml @@ -1,5 +1,5 @@  - + diff --git a/Tools/CudaGenerateLibDeviceTool/CudaDriverVersionUtils.tt b/Tools/CudaGenerateLibDeviceTool/CudaDriverVersionUtils.tt new file mode 100644 index 0000000000..89b028e361 --- /dev/null +++ b/Tools/CudaGenerateLibDeviceTool/CudaDriverVersionUtils.tt @@ -0,0 +1,55 @@ +// --------------------------------------------------------------------------------------- +// ILGPU +// Copyright (c) 2023 ILGPU Project +// www.ilgpu.net +// +// File: CudaDriverVersionUtils.tt/CudaDriverVersionUtils.cs +// +// This file is part of ILGPU and is distributed under the University of Illinois Open +// Source License. See LICENSE.txt for details. +// --------------------------------------------------------------------------------------- + +<#@ template debug="false" hostspecific="true" language="C#" #> +<#@ include file="../../Src/ILGPU/Static/TypeInformation.ttinclude" #> +<#@ include file="../../Src/ILGPU/Static/CudaVersions.ttinclude" #> +<#@ assembly name="System.Core" #> +<#@ import namespace="System.Linq" #> +<#@ import namespace="System.Text" #> +<#@ import namespace="System.Collections.Generic" #> +<#@ output extension=".cs" #> +<# +string rootPath = Host.ResolvePath("../../Src/ILGPU/Static"); +var versions = CudaVersions.Load(rootPath, "CudaVersions.xml"); + +var instructionSets = + versions + .GroupBy(x => x.DriverVersion) + .OrderBy(x => x.Key) + .Select(g => (g.Key, g.Min(x => x.InstructionSetVersion))) + .ToArray(); + +#> + +namespace ILGPU.Runtime.Cuda +{ + internal class CudaDriverVersionUtils + { + /// + /// Maps Cuda driver version to the corresponding PTX ISA. + /// + internal static readonly Dictionary< + int, + CudaInstructionSet> DriverVersionLookup = + new Dictionary + { +<# foreach (var instructionSet in instructionSets) { #> +<# var drv = instructionSet.Item1; #> +<# var isa = instructionSet.Item2; #> + { + <#= drv.Major * 1000 + drv.Minor * 10 #>, + CudaInstructionSet.ISA_<#= isa.Major #><#= isa.Minor #> + }, +<# } #> + }; + } +} \ No newline at end of file diff --git a/Tools/CudaGenerateLibDeviceTool/CudaGenerateLibDeviceTool.csproj b/Tools/CudaGenerateLibDeviceTool/CudaGenerateLibDeviceTool.csproj index c2417a6a04..0babb143cf 100644 --- a/Tools/CudaGenerateLibDeviceTool/CudaGenerateLibDeviceTool.csproj +++ b/Tools/CudaGenerateLibDeviceTool/CudaGenerateLibDeviceTool.csproj @@ -22,6 +22,12 @@ True CudaArchitecture.Generated.tt + + + True + True + CudaInstructionSet.Generated.tt + @@ -37,10 +43,39 @@ TextTemplatingFileGenerator CudaArchitecture.Generated.cs + + TextTemplatingFileGenerator + CudaInstructionSet.Generated.cs + + + True + True + CudaDriverVersionUtils.tt + + + + True + True + CudaDriverVersionUtils.tt + + + True + True + CudaInstrunctionSetUtils.tt + + + + + + TextTemplatingFileGenerator + CudaDriverVersionUtils.cs + + + diff --git a/Tools/CudaGenerateLibDeviceTool/MinimalCudaAPI.cs b/Tools/CudaGenerateLibDeviceTool/MinimalCudaAPI.cs new file mode 100644 index 0000000000..68f8035197 --- /dev/null +++ b/Tools/CudaGenerateLibDeviceTool/MinimalCudaAPI.cs @@ -0,0 +1,50 @@ +// --------------------------------------------------------------------------------------- +// ILGPU +// Copyright (c) 2024 ILGPU Project +// www.ilgpu.net +// +// File: MinimalCudaAPI.cs +// +// This file is part of ILGPU and is distributed under the University of Illinois Open +// Source License. See LICENSE.txt for details. +// --------------------------------------------------------------------------------------- + +using System.Runtime.InteropServices; + +namespace CudaGenerateLibDeviceTool +{ + /// + /// Minimal Cuda API binding to allow detecting the current Cuda driver version. + /// + internal static class MinimalCudaAPI + { + delegate int CudaInit(int flags); + delegate int CudaDriverGetVersion(out int driverVersion); + + public static int GetCudaDriverVersion() + { + var cudaLibName = RuntimeInformation.IsOSPlatform(OSPlatform.Windows) + ? "nvcuda" + : "cuda"; + var cudaAPI = NativeLibrary.Load(cudaLibName); + try + { + var cuInit = + Marshal.GetDelegateForFunctionPointer( + NativeLibrary.GetExport(cudaAPI, "cuInit")); + var cuDriverGetVersion = + Marshal.GetDelegateForFunctionPointer( + NativeLibrary.GetExport(cudaAPI, "cuDriverGetVersion")); + + if (cuInit(0) == 0 && cuDriverGetVersion(out int driverVersion) == 0) + return driverVersion; + } + finally + { + NativeLibrary.Free(cudaAPI); + } + + throw new NotImplementedException(); + } + } +} diff --git a/Tools/CudaGenerateLibDeviceTool/Program.cs b/Tools/CudaGenerateLibDeviceTool/Program.cs index 3df0021a8b..3ce4a42a2e 100644 --- a/Tools/CudaGenerateLibDeviceTool/Program.cs +++ b/Tools/CudaGenerateLibDeviceTool/Program.cs @@ -35,6 +35,10 @@ static void Main(string[] args) // Generate the PTX for each of the LibDevice methods. var filePath = Path.Combine(GetDefaultFolder(), "CudaLibDevicePtx.xml"); + var minArchitecture = CudaArchitecture.SM_60; + var driverVersion = MinimalCudaAPI.GetCudaDriverVersion(); + var instructionSet = CudaDriverVersionUtils.DriverVersionLookup[driverVersion]; + using var doc = XmlWriter.Create( filePath, new XmlWriterSettings @@ -43,6 +47,10 @@ static void Main(string[] args) Encoding = Encoding.UTF8 }); doc.WriteStartElement("LibDevicePtx"); + doc.WriteAttributeString("MinArchitectureMajor", $"{minArchitecture.Major}"); + doc.WriteAttributeString("MinArchitectureMinor", $"{minArchitecture.Minor}"); + doc.WriteAttributeString("MinInstructionSetMajor", $"{instructionSet.Major}"); + doc.WriteAttributeString("MinInstructionSetMinor", $"{instructionSet.Minor}"); var methods = LoadMethodNames(); @@ -50,7 +58,7 @@ static void Main(string[] args) { PTXLibDevice.GenerateLibDeviceCode( nvvmAPI, - CudaArchitecture.SM_60, + minArchitecture, new[] { method }, out var ptx);