From 8afc5d75d6a851b6586c3a2202d61ad7987019f0 Mon Sep 17 00:00:00 2001 From: MoFtZ Date: Fri, 12 Jan 2024 14:50:28 +1100 Subject: [PATCH] Implemented support for pre-generated LibDevice PTX modules. --- .gitignore | 1 + Samples/LibDeviceKernel/Program.cs | 6 +- Src/ILGPU/Backends/PTX/PTXAssembly.cs | 104 ++++++++++++ Src/ILGPU/Backends/PTX/PTXBackend.cs | 72 ++++---- Src/ILGPU/Backends/PTX/PTXCodeGenerator.cs | 11 +- Src/ILGPU/Backends/PTX/PTXCompiledKernel.cs | 6 +- .../Backends/PTX/PTXFunctionGenerator.cs | 14 +- .../PTX/PTXKernelFunctionGenerator.cs | 14 +- Src/ILGPU/Backends/PTX/PTXLibDevicePtx.tt | 158 ++++++++++++++++++ Src/ILGPU/Context.Builder.cs | 26 ++- Src/ILGPU/ILGPU.csproj | 9 + Src/ILGPU/Runtime/Cuda/CudaAPI.cs | 72 +++++++- Src/ILGPU/Runtime/Cuda/CudaAPI.xml | 46 +++++ Src/ILGPU/Runtime/Cuda/CudaKernel.cs | 4 +- Src/ILGPU/Util/StringCache.cs | 84 ++++++++++ 15 files changed, 564 insertions(+), 63 deletions(-) create mode 100644 Src/ILGPU/Backends/PTX/PTXAssembly.cs create mode 100644 Src/ILGPU/Backends/PTX/PTXLibDevicePtx.tt create mode 100644 Src/ILGPU/Util/StringCache.cs diff --git a/.gitignore b/.gitignore index 5f0d1e8fa2..3f1e78435f 100644 --- a/.gitignore +++ b/.gitignore @@ -257,6 +257,7 @@ Src/ILGPU/AtomicFunctions.cs Src/ILGPU/Backends/PTX/PTXIntrinsics.Generated.cs Src/ILGPU/Backends/PTX/PTXLibDeviceMethods.cs Src/ILGPU/Backends/PTX/PTXLibDeviceNvvm.cs +Src/ILGPU/Backends/PTX/PTXLibDevicePtx.cs Src/ILGPU/Backends/Velocity/Scalar/ScalarOperations.cs Src/ILGPU/Backends/Velocity/VelocityIntrinsics.Generated.cs Src/ILGPU/Frontend/Intrinsic/RemappedIntrinsics.Generated.cs diff --git a/Samples/LibDeviceKernel/Program.cs b/Samples/LibDeviceKernel/Program.cs index 7f0d5d9d26..11d994a6fb 100644 --- a/Samples/LibDeviceKernel/Program.cs +++ b/Samples/LibDeviceKernel/Program.cs @@ -1,6 +1,6 @@ // --------------------------------------------------------------------------------------- // ILGPU Samples -// Copyright (c) 2021 ILGPU Project +// Copyright (c) 2021-2024 ILGPU Project // www.ilgpu.net // // File: Program.cs @@ -28,8 +28,8 @@ public static void KernelWithLibDevice(Index1D index, ArrayView data) static void Main() { - // Create default context and enable LibDevice library - using var context = Context.Create(builder => builder.Cuda().LibDevice()); + // Create default context. + using var context = Context.Create(builder => builder.Cuda()); // For each available device... foreach (var device in context) diff --git a/Src/ILGPU/Backends/PTX/PTXAssembly.cs b/Src/ILGPU/Backends/PTX/PTXAssembly.cs new file mode 100644 index 0000000000..237addefef --- /dev/null +++ b/Src/ILGPU/Backends/PTX/PTXAssembly.cs @@ -0,0 +1,104 @@ +// --------------------------------------------------------------------------------------- +// ILGPU +// Copyright (c) 2024 ILGPU Project +// www.ilgpu.net +// +// File: PTXAssembly.cs +// +// This file is part of ILGPU and is distributed under the University of Illinois Open +// Source License. See LICENSE.txt for details. +// --------------------------------------------------------------------------------------- + +using System; +using System.Collections.Immutable; +using System.Text; + +namespace ILGPU.Backends.PTX +{ + /// + /// Collection of PTX modules that are used to build a Cuda kernel. + /// + public sealed class PTXAssembly + { + #region Nested Types + + /// + /// A builder for a collection of PTX modules. + /// + public class Builder + { + #region Instance + + /// + /// List of PTX modules. + /// + private readonly ImmutableArray.Builder modules; + + /// + /// Constructs a new builder. + /// + internal Builder() + { + KernelBuilder = new StringBuilder(); + modules = ImmutableArray.CreateBuilder(1); + + // Add placeholder for kernel module. + modules.Add(string.Empty); + } + + #endregion + + #region Properties + + /// + /// Contains the definition of the kernel module. + /// + public StringBuilder KernelBuilder { get; } + + #endregion + + #region Methods + + /// + /// Adds the PTX modules to the collection. + /// + public void AddModule(ReadOnlySpan ptxModules) => +#if NET7_0_OR_GREATER + modules.AddRange(ptxModules); +#else + modules.AddRange(ptxModules.ToArray()); +#endif + + /// + /// Constructs the completed collection of PTX modules. + /// + public PTXAssembly Seal() + { + // Replace placeholder string, so that the kernel is always at index 0. + modules[0] = KernelBuilder.ToString(); + return new PTXAssembly(modules.ToImmutable()); + } + + #endregion + } + + #endregion + + #region Instance + + /// + /// Collection of PTX modules. + /// + public ImmutableArray Modules { get; } + + /// + /// Constructs the list of PTX modules. + /// + internal PTXAssembly(ImmutableArray modules) + { + Modules = modules; + } + + #endregion + } +} diff --git a/Src/ILGPU/Backends/PTX/PTXBackend.cs b/Src/ILGPU/Backends/PTX/PTXBackend.cs index 3386d45a0c..c8c9880000 100644 --- a/Src/ILGPU/Backends/PTX/PTXBackend.cs +++ b/Src/ILGPU/Backends/PTX/PTXBackend.cs @@ -21,7 +21,6 @@ using System.Collections; using System.Collections.Generic; using System.Diagnostics.CodeAnalysis; -using System.Text; namespace ILGPU.Backends.PTX { @@ -50,7 +49,7 @@ public sealed class PTXBackend : PTXIntrinsic.Handler, PTXCodeGenerator.GeneratorArgs, PTXCodeGenerator, - StringBuilder> + PTXAssembly.Builder> { #region Constants @@ -126,7 +125,10 @@ public bool MoveNext() { while (enumerator.MoveNext()) { - if (!enumerator.Current.HasSource) + var current = enumerator.Current; + if (!current.HasSource) + continue; + if (current.Source.DeclaringType != typeof(PTXLibDeviceMethods)) continue; return true; } @@ -264,7 +266,7 @@ protected override void Dispose(bool disposing) /// instance. /// [SuppressMessage("Globalization", "CA1308:Normalize strings to uppercase")] - protected override StringBuilder CreateKernelBuilder( + protected override PTXAssembly.Builder CreateKernelBuilder( EntryPoint entryPoint, in BackendContext backendContext, in KernelSpecialization specialization, @@ -284,7 +286,8 @@ protected override StringBuilder CreateKernelBuilder( : new PTXDebugLineInfoGenerator(); } - var builder = new StringBuilder(); + var assemblyBuilder = new PTXAssembly.Builder(); + var builder = assemblyBuilder.KernelBuilder; builder.AppendLine("//"); builder.Append("// Generated by ILGPU v"); @@ -304,7 +307,7 @@ protected override StringBuilder CreateKernelBuilder( builder.AppendLine((PointerSize * 8).ToString()); builder.AppendLine(); - GenerateLibDeviceCode(backendContext, builder); + GenerateLibDeviceCode(backendContext, assemblyBuilder); // Check whether we are running in the O1 or O2 pipeline bool o1Enabled = Context.Properties.OptimizationLevel >= OptimizationLevel.O1; @@ -330,7 +333,7 @@ protected override StringBuilder CreateKernelBuilder( alignments, uniforms); - return builder; + return assemblyBuilder; } /// @@ -359,17 +362,16 @@ protected override PTXCodeGenerator CreateKernelCodeGenerator( protected override CompiledKernel CreateKernel( EntryPoint entryPoint, CompiledKernel.KernelInfo? kernelInfo, - StringBuilder builder, + PTXAssembly.Builder builder, PTXCodeGenerator.GeneratorArgs data) { - data.DebugInfoGenerator.GenerateDebugSections(builder); + data.DebugInfoGenerator.GenerateDebugSections(builder.KernelBuilder); - var ptxAssembly = builder.ToString(); return new PTXCompiledKernel( Context, entryPoint, kernelInfo, - ptxAssembly); + builder.Seal()); } /// @@ -379,27 +381,41 @@ protected override CompiledKernel CreateKernel( /// The kernel builder. private void GenerateLibDeviceCode( in BackendContext backendContext, - StringBuilder builder) + PTXAssembly.Builder builder) { - if (NvvmAPI == null || backendContext.Count == 0) + if (backendContext.Count == 0) return; using var enumerator = new LibDeviceEnumerator(backendContext); - PTXLibDevice.GenerateLibDeviceCode( - NvvmAPI, - Architecture, - enumerator.AsEnumerable(), - out var ptx); - - var compiledString = - ptx.AsNotNull() - .Replace(".version", "//.version", StringComparison.Ordinal) - .Replace(".target", "//.target", StringComparison.Ordinal) - .Replace( - ".address_size", - "//.address_size", - StringComparison.Ordinal); - builder.Append(compiledString); + if (NvvmAPI != null) + { + PTXLibDevice.GenerateLibDeviceCode( + NvvmAPI, + Architecture, + enumerator.AsEnumerable(), + out var ptx); + + var compiledString = + ptx.AsNotNull() + .Replace(".version", "//.version", StringComparison.Ordinal) + .Replace(".target", "//.target", StringComparison.Ordinal) + .Replace( + ".address_size", + "//.address_size", + StringComparison.Ordinal); + builder.KernelBuilder.Append(compiledString); + } + else + { + var ptxModules = InlineList.Create(backendContext.Count); + PTXLibDevicePtx.GetPtx( + enumerator.AsEnumerable(), + ref ptxModules, + out var ptxDeclarations); + + builder.AddModule(ptxModules.AsReadOnlySpan()); + builder.KernelBuilder.AppendLine(ptxDeclarations); + } } #endregion diff --git a/Src/ILGPU/Backends/PTX/PTXCodeGenerator.cs b/Src/ILGPU/Backends/PTX/PTXCodeGenerator.cs index 74f8b8ea7b..3a1425e393 100644 --- a/Src/ILGPU/Backends/PTX/PTXCodeGenerator.cs +++ b/Src/ILGPU/Backends/PTX/PTXCodeGenerator.cs @@ -31,7 +31,7 @@ namespace ILGPU.Backends.PTX /// The code needs to be prepared for this code generator. public abstract partial class PTXCodeGenerator : PTXRegisterAllocator, - IBackendCodeGenerator + IBackendCodeGenerator { #region Constants @@ -404,7 +404,7 @@ public IntrinsicImplementationProvider /// /// Generates a function declaration in PTX code. /// - public abstract void GenerateHeader(StringBuilder builder); + public abstract void GenerateHeader(PTXAssembly.Builder builder); /// /// Generates PTX code. @@ -415,11 +415,12 @@ public IntrinsicImplementationProvider /// Generates PTX constant declarations. /// /// The target builder. - public void GenerateConstants(StringBuilder builder) => - builder.Append(GenerateConstantDeclarations()); + public void GenerateConstants(PTXAssembly.Builder builder) => + builder.KernelBuilder.Append(GenerateConstantDeclarations()); /// - public void Merge(StringBuilder builder) => builder.Append(Builder); + public void Merge(PTXAssembly.Builder builder) => + builder.KernelBuilder.Append(Builder); #endregion diff --git a/Src/ILGPU/Backends/PTX/PTXCompiledKernel.cs b/Src/ILGPU/Backends/PTX/PTXCompiledKernel.cs index 606f49026f..33cabca12a 100644 --- a/Src/ILGPU/Backends/PTX/PTXCompiledKernel.cs +++ b/Src/ILGPU/Backends/PTX/PTXCompiledKernel.cs @@ -1,6 +1,6 @@ // --------------------------------------------------------------------------------------- // ILGPU -// Copyright (c) 2018-2023 ILGPU Project +// Copyright (c) 2018-2024 ILGPU Project // www.ilgpu.net // // File: PTXCompiledKernel.cs @@ -31,7 +31,7 @@ internal PTXCompiledKernel( Context context, EntryPoint entryPoint, KernelInfo? info, - string ptxAssembly) + PTXAssembly ptxAssembly) : base(context, entryPoint, info) { PTXAssembly = ptxAssembly; @@ -44,7 +44,7 @@ internal PTXCompiledKernel( /// /// Returns the PTX assembly code. /// - public string PTXAssembly { get; } + public PTXAssembly PTXAssembly { get; } #endregion } diff --git a/Src/ILGPU/Backends/PTX/PTXFunctionGenerator.cs b/Src/ILGPU/Backends/PTX/PTXFunctionGenerator.cs index ad36fdeb85..4d7c848276 100644 --- a/Src/ILGPU/Backends/PTX/PTXFunctionGenerator.cs +++ b/Src/ILGPU/Backends/PTX/PTXFunctionGenerator.cs @@ -78,16 +78,16 @@ private List GenerateHeaderDeclaration( /// /// Generates a function declaration in PTX code. /// - public override void GenerateHeader(StringBuilder builder) + public override void GenerateHeader(PTXAssembly.Builder builder) { - static bool IsLibDeviceMethod(Method method) => - method.HasSource && - method.Source.DeclaringType == typeof(PTXLibDeviceMethods); - if (IsLibDeviceMethod(Method)) + if (Method.HasSource && + Method.Source.DeclaringType == typeof(PTXLibDeviceMethods)) + { return; + } - GenerateHeaderDeclaration(builder); - builder.AppendLine(";"); + GenerateHeaderDeclaration(builder.KernelBuilder); + builder.KernelBuilder.AppendLine(";"); } /// diff --git a/Src/ILGPU/Backends/PTX/PTXKernelFunctionGenerator.cs b/Src/ILGPU/Backends/PTX/PTXKernelFunctionGenerator.cs index 413f694ba1..8231293f3c 100644 --- a/Src/ILGPU/Backends/PTX/PTXKernelFunctionGenerator.cs +++ b/Src/ILGPU/Backends/PTX/PTXKernelFunctionGenerator.cs @@ -1,6 +1,6 @@ // --------------------------------------------------------------------------------------- // ILGPU -// Copyright (c) 2018-2023 ILGPU Project +// Copyright (c) 2018-2024 ILGPU Project // www.ilgpu.net // // File: PTXKernelFunctionGenerator.cs @@ -115,7 +115,7 @@ public PTXKernelFunctionGenerator( /// /// Generates a function declaration in PTX code. /// - public override void GenerateHeader(StringBuilder builder) + public override void GenerateHeader(PTXAssembly.Builder builder) { // Generate global dynamic shared memory allocation information if (!EntryPoint.SharedMemory.HasDynamicMemory) @@ -134,11 +134,11 @@ public override void GenerateHeader(StringBuilder builder) PTXBackend.DefaultGlobalMemoryAlignment); // Use the proper alignment that is compatible with all types - builder.Append(".extern .shared .align "); - builder.Append(sharedAlignmentInBytes); - builder.Append(" .b8 "); - builder.Append(DynamicSharedMemoryAllocationName); - builder.AppendLine("[];"); + builder.KernelBuilder.Append(".extern .shared .align "); + builder.KernelBuilder.Append(sharedAlignmentInBytes); + builder.KernelBuilder.Append(" .b8 "); + builder.KernelBuilder.Append(DynamicSharedMemoryAllocationName); + builder.KernelBuilder.AppendLine("[];"); } /// diff --git a/Src/ILGPU/Backends/PTX/PTXLibDevicePtx.tt b/Src/ILGPU/Backends/PTX/PTXLibDevicePtx.tt new file mode 100644 index 0000000000..eccffabca1 --- /dev/null +++ b/Src/ILGPU/Backends/PTX/PTXLibDevicePtx.tt @@ -0,0 +1,158 @@ +// --------------------------------------------------------------------------------------- +// ILGPU +// Copyright (c) 2024 ILGPU Project +// www.ilgpu.net +// +// File: PTXLibDevicePtx.tt/PTXLibDevicePtx.cs +// +// This file is part of ILGPU and is distributed under the University of Illinois Open +// Source License. See LICENSE.txt for details. +// --------------------------------------------------------------------------------------- + +<#@ template debug="false" hostspecific="true" language="C#" #> +<#@ include file="../../Static/TextTransformHelpers.ttinclude" #> +<#@ include file="../../Static/TypeInformation.ttinclude" #> +<#@ include file="../../Static/CudaLibDevice.ttinclude" #> +<#@ assembly name="System.Core" #> +<#@ import namespace="System.Linq" #> +<#@ import namespace="System.Text" #> +<#@ import namespace="System.Collections.Generic" #> +<#@ output extension=".cs" #> +<# +string rootPath = Host.ResolvePath("."); +var functions = LibDevicePtxFunctions.Load(rootPath, "../../Static/CudaLibDevicePtx.xml"); +#> + +using ILGPU.Util; +using System.Collections.Generic; +using System.Text; + +// disable: max_line_length + +namespace ILGPU.Backends.PTX +{ + /// + /// Contains the pre-generated PTX for Cuda LibDevice functions. + /// + internal static class PTXLibDevicePtx + { + #region Generated PTX + +<# + foreach (var func in functions) + { + PushIndent(2); + WriteLibDeviceFunctionPtx(func); + PopIndent(); + } + +#> + #endregion + + private static readonly Dictionary fragments = + new Dictionary() + { +<# + PushIndent(4); + foreach (var func in functions) + WriteLine($"{{ \"{func.Name}\", {func.Name} }},"); + PopIndent(); +#> + }; + + private static readonly Dictionary headers = + new Dictionary() + { +<# + PushIndent(4); + foreach (var func in functions) + WriteLine($"{{ \"{func.Name}\", {func.Name}{DeclarationSuffix} }},"); + PopIndent(); +#> + }; + + /// + /// Returns the pre-generated PTX code for the Cuda LibDevice functions. + /// + /// The LibDevice method names. + /// Filled in with the PTX modules. + /// Filled in with the PTX declarations. + /// The PTX modules. + public static void GetPtx( + IEnumerable methods, + ref InlineList ptxModules, + out string ptxDeclarations) + { + var declarationBuilder = new StringBuilder(); + + foreach (var methodName in methods) + { + if (fragments.TryGetValue(methodName, out var methodPtx)) + { + if (headers.TryGetValue(methodName, out var header)) + declarationBuilder.AppendLine(header); + ptxModules.Add(methodPtx); + } + } + + ptxDeclarations = declarationBuilder.ToString(); + } + } +} +<#+ + +public const string DeclarationSuffix = "_decl"; + +void WriteLibDeviceFunctionPtx(LibDevicePtxFunction func) +{ + // PTX declaration + WriteLine($"private const string {func.Name}{DeclarationSuffix} ="); + + PopIndent(); + PopIndent(); + WriteLine("\"\"\""); + + WriteLine(func.PtxDeclaration); + WriteLine("\"\"\";"); + WriteLine(); + + PushIndent(2); + + // PTX module + WriteLine($"private const string {func.Name} ="); + + PopIndent(); + PopIndent(); + WriteLine("\"\"\""); + + WriteLine(func.PtxModule); + WriteLine("\"\"\";"); + WriteLine(); + + PushIndent(); +} + +[XmlRoot("LibDevicePtx")] +public class LibDevicePtxFunctions +{ + internal static LibDevicePtxFunction[] Load(string rootPath, string fileName) => + XmlHelper.Load(rootPath, fileName).Functions; + + [XmlElement("Function")] + public LibDevicePtxFunction[] Functions { get; set; } +} + +[XmlRoot("Function")] +public class LibDevicePtxFunction +{ + [XmlAttribute] + public string Name { get; set; } + + [XmlAttribute] + public string PtxModule { get; set; } + + [XmlAttribute] + public string PtxDeclaration { get; set; } +} + +#> \ No newline at end of file diff --git a/Src/ILGPU/Context.Builder.cs b/Src/ILGPU/Context.Builder.cs index 3064ed90ae..4823f00f94 100644 --- a/Src/ILGPU/Context.Builder.cs +++ b/Src/ILGPU/Context.Builder.cs @@ -323,7 +323,17 @@ public Builder Profiling() /// Automatically detects the CUDA SDK location. /// /// The current builder instance. - public Builder LibDevice() + [Obsolete("LibDevice is now embedded into ILGPU. Use LibDeviceOverride() if" + + " you want to load a specific version of LibDevice at runtime.")] + public Builder LibDevice() => + this; + + /// + /// Overrides the version of LibDevice embedded into ILGPU, and loads from the + /// CUDA SDK at runtime. Automatically detects the CUDA SDK location. + /// + /// The current builder instance. + public Builder LibDeviceOverride() { PTXLibDevice.FindLibDevicePaths( out var cudaEnvName, @@ -357,7 +367,19 @@ public Builder LibDevice() /// Path to LibNvvm DLL. /// Path to LibDevice bitcode. /// The current builder instance. - public Builder LibDevice(string libNvvmPath, string libDevicePath) + [Obsolete("LibDevice is now embedded into ILGPU. Use LibDeviceOverride() if" + + " you want to load a specific version of LibDevice at runtime.")] + public Builder LibDevice(string libNvvmPath, string libDevicePath) => + this; + + /// + /// Overrides the version of LibDevice embedded into ILGPU, and loads from the + /// CUDA SDK at runtime. Explicitly specifies the LibDevice location. + /// + /// Path to LibNvvm DLL. + /// Path to LibDevice bitcode. + /// The current builder instance. + public Builder LibDeviceOverride(string libNvvmPath, string libDevicePath) { LibNvvmPath = libNvvmPath; LibDevicePath = libDevicePath; diff --git a/Src/ILGPU/ILGPU.csproj b/Src/ILGPU/ILGPU.csproj index 4228847f22..3bf9267d85 100644 --- a/Src/ILGPU/ILGPU.csproj +++ b/Src/ILGPU/ILGPU.csproj @@ -173,6 +173,10 @@ True PrimitiveDataBlocks.tt + + TextTemplatingFileGenerator + PTXLibDevicePtx.cs + TextTemplatingFileGenerator CudaArchitecture.Generated.cs @@ -198,6 +202,11 @@ True PTXLibDeviceNvvm.tt + + True + True + PTXLibDevicePtx.tt + True True diff --git a/Src/ILGPU/Runtime/Cuda/CudaAPI.cs b/Src/ILGPU/Runtime/Cuda/CudaAPI.cs index 98e6053d73..d25cb9a18e 100644 --- a/Src/ILGPU/Runtime/Cuda/CudaAPI.cs +++ b/Src/ILGPU/Runtime/Cuda/CudaAPI.cs @@ -1,6 +1,6 @@ // --------------------------------------------------------------------------------------- // ILGPU -// Copyright (c) 2020-2023 ILGPU Project +// Copyright (c) 2020-2024 ILGPU Project // www.ilgpu.net // // File: CudaAPI.cs @@ -10,6 +10,7 @@ // --------------------------------------------------------------------------------------- using ILGPU.Resources; +using ILGPU.Util; using System; using System.Diagnostics; using System.Runtime.CompilerServices; @@ -573,6 +574,19 @@ public CudaError LoadModule( public unsafe CudaError LoadModule( out IntPtr kernelModule, string moduleData, + out string? errorLog) => + LoadModule(out kernelModule, new string[] { moduleData }, out errorLog); + + /// + /// Loads the given kernel module into driver memory. + /// + /// The loaded module. + /// The module data to load. + /// The error log. + /// The error status. + public unsafe CudaError LoadModule( + out IntPtr kernelModule, + ReadOnlySpan modules, out string? errorLog) { const int BufferSize = 1024; @@ -591,16 +605,62 @@ public unsafe CudaError LoadModule( values[0] = errorBuffer; values[1] = (void*)BufferSize; - var result = LoadModule( - out kernelModule, - moduleData, + var result = CurrentAPI.cuLinkCreate_v2( NumOptions, new IntPtr(options), - new IntPtr(optionValues)); - + new IntPtr(optionValues), + out IntPtr linkState); errorLog = result != CudaError.CUDA_SUCCESS ? Encoding.ASCII.GetString(errorBuffer, BufferSize) : null; + CudaException.ThrowIfFailed(result); + + using var stringCache = new StringCache(); + try + { + foreach (var module in modules) + { + var moduleStrEntry = stringCache.AddString(module); + + result = CurrentAPI.cuLinkAddData_v2( + linkState, + new IntPtr(1), // CU_JIT_INPUT_PTX + moduleStrEntry.NativePtr, + new IntPtr(moduleStrEntry.Length), + string.Empty, + 0, + IntPtr.Zero, + IntPtr.Zero); + errorLog = result != CudaError.CUDA_SUCCESS + ? Encoding.ASCII.GetString(errorBuffer, BufferSize) + : null; + CudaException.ThrowIfFailed(result); + } + + result = CurrentAPI.cuLinkComplete( + linkState, + out IntPtr cubin, + out IntPtr cubinSize); + errorLog = result != CudaError.CUDA_SUCCESS + ? Encoding.ASCII.GetString(errorBuffer, BufferSize) + : null; + CudaException.ThrowIfFailed(result); + + result = CurrentAPI.cuModuleLoadDataEx( + out kernelModule, + cubin, + NumOptions, + new IntPtr(options), + new IntPtr(optionValues)); + errorLog = result != CudaError.CUDA_SUCCESS + ? Encoding.ASCII.GetString(errorBuffer, BufferSize) + : null; + } + finally + { + CurrentAPI.cuLinkDestroy(linkState); + } + return result; } diff --git a/Src/ILGPU/Runtime/Cuda/CudaAPI.xml b/Src/ILGPU/Runtime/Cuda/CudaAPI.xml index ea09134376..21da259bac 100644 --- a/Src/ILGPU/Runtime/Cuda/CudaAPI.xml +++ b/Src/ILGPU/Runtime/Cuda/CudaAPI.xml @@ -150,6 +150,10 @@ + + + + @@ -158,6 +162,13 @@ + + + + + + + @@ -180,6 +191,41 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/Src/ILGPU/Runtime/Cuda/CudaKernel.cs b/Src/ILGPU/Runtime/Cuda/CudaKernel.cs index 3f5c6c07c6..6415f45750 100644 --- a/Src/ILGPU/Runtime/Cuda/CudaKernel.cs +++ b/Src/ILGPU/Runtime/Cuda/CudaKernel.cs @@ -1,6 +1,6 @@ // --------------------------------------------------------------------------------------- // ILGPU -// Copyright (c) 2017-2023 ILGPU Project +// Copyright (c) 2017-2024 ILGPU Project // www.ilgpu.net // // File: CudaKernel.cs @@ -50,7 +50,7 @@ internal CudaKernel( { var kernelLoaded = CurrentAPI.LoadModule( out modulePtr, - kernel.PTXAssembly, + kernel.PTXAssembly.Modules.AsSpan(), out string? errorLog); if (kernelLoaded != CudaError.CUDA_SUCCESS) { diff --git a/Src/ILGPU/Util/StringCache.cs b/Src/ILGPU/Util/StringCache.cs new file mode 100644 index 0000000000..bdb7dc7958 --- /dev/null +++ b/Src/ILGPU/Util/StringCache.cs @@ -0,0 +1,84 @@ +// --------------------------------------------------------------------------------------- +// ILGPU +// Copyright (c) 2024 ILGPU Project +// www.ilgpu.net +// +// File: StringCache.cs +// +// This file is part of ILGPU and is distributed under the University of Illinois Open +// Source License. See LICENSE.txt for details. +// --------------------------------------------------------------------------------------- + +using System; +using System.Runtime.InteropServices; +using System.Text; + +namespace ILGPU.Util +{ + /// + /// Maintains a cache of strings that have been marshalled to native strings + /// and need to be kept in memory. + /// + internal class StringCache : DisposeBase + { + #region Nested Types + + public readonly struct StringCacheEntry + { + public IntPtr NativePtr { get; } + public int Length { get; } + + public StringCacheEntry(IntPtr ptr, int length) + { + NativePtr = ptr; + Length = length; + } + } + + #endregion + + #region Instance + + private readonly InlineList _cache = + InlineList.Create(1); + + #endregion + + #region Methods + + /// + /// Adds a string to the cache, and returns the native pointer and length. + /// + /// The string to convert. + /// The native pointer and length. + public unsafe StringCacheEntry AddString(string value) + { + // Create null-terminated native string + var len = Encoding.ASCII.GetMaxByteCount(value.Length); + var ptr = Marshal.AllocHGlobal(len + 1); + var ptrSpan = new Span((void*)ptr, len); + + len = Encoding.ASCII.GetBytes(value, ptrSpan); + ptrSpan[len] = 0; + + // Add to cache, so that memory is valid until cache is disposed. + var entry = new StringCacheEntry(ptr, len); + _cache.Add(entry); + return entry; + } + + #endregion + + #region IDisposable + + /// + protected override void Dispose(bool disposing) + { + foreach (var entry in _cache) + Marshal.FreeHGlobal(entry.NativePtr); + base.Dispose(disposing); + } + + #endregion + } +}