Skip to content

Commit

Permalink
Implemented support for pre-generated LibDevice PTX modules.
Browse files Browse the repository at this point in the history
  • Loading branch information
MoFtZ committed Jan 12, 2024
1 parent 9a2bc13 commit 8afc5d7
Show file tree
Hide file tree
Showing 15 changed files with 564 additions and 63 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,7 @@ Src/ILGPU/AtomicFunctions.cs
Src/ILGPU/Backends/PTX/PTXIntrinsics.Generated.cs
Src/ILGPU/Backends/PTX/PTXLibDeviceMethods.cs
Src/ILGPU/Backends/PTX/PTXLibDeviceNvvm.cs
Src/ILGPU/Backends/PTX/PTXLibDevicePtx.cs
Src/ILGPU/Backends/Velocity/Scalar/ScalarOperations.cs
Src/ILGPU/Backends/Velocity/VelocityIntrinsics.Generated.cs
Src/ILGPU/Frontend/Intrinsic/RemappedIntrinsics.Generated.cs
Expand Down
6 changes: 3 additions & 3 deletions Samples/LibDeviceKernel/Program.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// ---------------------------------------------------------------------------------------
// ILGPU Samples
// Copyright (c) 2021 ILGPU Project
// Copyright (c) 2021-2024 ILGPU Project
// www.ilgpu.net
//
// File: Program.cs
Expand Down Expand Up @@ -28,8 +28,8 @@ public static void KernelWithLibDevice(Index1D index, ArrayView<float> data)

static void Main()
{
// Create default context and enable LibDevice library
using var context = Context.Create(builder => builder.Cuda().LibDevice());
// Create default context.
using var context = Context.Create(builder => builder.Cuda());

// For each available device...
foreach (var device in context)
Expand Down
104 changes: 104 additions & 0 deletions Src/ILGPU/Backends/PTX/PTXAssembly.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
// ---------------------------------------------------------------------------------------
// ILGPU
// Copyright (c) 2024 ILGPU Project
// www.ilgpu.net
//
// File: PTXAssembly.cs
//
// This file is part of ILGPU and is distributed under the University of Illinois Open
// Source License. See LICENSE.txt for details.
// ---------------------------------------------------------------------------------------

using System;
using System.Collections.Immutable;
using System.Text;

namespace ILGPU.Backends.PTX
{
/// <summary>
/// Collection of PTX modules that are used to build a Cuda kernel.
/// </summary>
public sealed class PTXAssembly
{
#region Nested Types

/// <summary>
/// A builder for a collection of PTX modules.
/// </summary>
public class Builder
{
#region Instance

/// <summary>
/// List of PTX modules.
/// </summary>
private readonly ImmutableArray<string>.Builder modules;

/// <summary>
/// Constructs a new builder.
/// </summary>
internal Builder()
{
KernelBuilder = new StringBuilder();
modules = ImmutableArray.CreateBuilder<string>(1);

// Add placeholder for kernel module.
modules.Add(string.Empty);
}

#endregion

#region Properties

/// <summary>
/// Contains the definition of the kernel module.
/// </summary>
public StringBuilder KernelBuilder { get; }

#endregion

#region Methods

/// <summary>
/// Adds the PTX modules to the collection.
/// </summary>
public void AddModule(ReadOnlySpan<string> ptxModules) =>
#if NET7_0_OR_GREATER
modules.AddRange(ptxModules);
#else
modules.AddRange(ptxModules.ToArray());
#endif

/// <summary>
/// Constructs the completed collection of PTX modules.
/// </summary>
public PTXAssembly Seal()
{
// Replace placeholder string, so that the kernel is always at index 0.
modules[0] = KernelBuilder.ToString();
return new PTXAssembly(modules.ToImmutable());
}

#endregion
}

#endregion

#region Instance

/// <summary>
/// Collection of PTX modules.
/// </summary>
public ImmutableArray<string> Modules { get; }

/// <summary>
/// Constructs the list of PTX modules.
/// </summary>
internal PTXAssembly(ImmutableArray<string> modules)
{
Modules = modules;
}

#endregion
}
}
72 changes: 44 additions & 28 deletions Src/ILGPU/Backends/PTX/PTXBackend.cs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
using System.Collections;
using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
using System.Text;

namespace ILGPU.Backends.PTX
{
Expand Down Expand Up @@ -50,7 +49,7 @@ public sealed class PTXBackend :
PTXIntrinsic.Handler,
PTXCodeGenerator.GeneratorArgs,
PTXCodeGenerator,
StringBuilder>
PTXAssembly.Builder>
{
#region Constants

Expand Down Expand Up @@ -126,7 +125,10 @@ public bool MoveNext()
{
while (enumerator.MoveNext())
{
if (!enumerator.Current.HasSource)
var current = enumerator.Current;
if (!current.HasSource)
continue;
if (current.Source.DeclaringType != typeof(PTXLibDeviceMethods))
continue;
return true;
}
Expand Down Expand Up @@ -264,7 +266,7 @@ protected override void Dispose(bool disposing)
/// <see cref="PTXCodeGenerator.GeneratorArgs"/> instance.
/// </summary>
[SuppressMessage("Globalization", "CA1308:Normalize strings to uppercase")]
protected override StringBuilder CreateKernelBuilder(
protected override PTXAssembly.Builder CreateKernelBuilder(
EntryPoint entryPoint,
in BackendContext backendContext,
in KernelSpecialization specialization,
Expand All @@ -284,7 +286,8 @@ protected override StringBuilder CreateKernelBuilder(
: new PTXDebugLineInfoGenerator();
}

var builder = new StringBuilder();
var assemblyBuilder = new PTXAssembly.Builder();
var builder = assemblyBuilder.KernelBuilder;

builder.AppendLine("//");
builder.Append("// Generated by ILGPU v");
Expand All @@ -304,7 +307,7 @@ protected override StringBuilder CreateKernelBuilder(
builder.AppendLine((PointerSize * 8).ToString());
builder.AppendLine();

GenerateLibDeviceCode(backendContext, builder);
GenerateLibDeviceCode(backendContext, assemblyBuilder);

// Check whether we are running in the O1 or O2 pipeline
bool o1Enabled = Context.Properties.OptimizationLevel >= OptimizationLevel.O1;
Expand All @@ -330,7 +333,7 @@ protected override StringBuilder CreateKernelBuilder(
alignments,
uniforms);

return builder;
return assemblyBuilder;
}

/// <summary>
Expand Down Expand Up @@ -359,17 +362,16 @@ protected override PTXCodeGenerator CreateKernelCodeGenerator(
protected override CompiledKernel CreateKernel(
EntryPoint entryPoint,
CompiledKernel.KernelInfo? kernelInfo,
StringBuilder builder,
PTXAssembly.Builder builder,
PTXCodeGenerator.GeneratorArgs data)
{
data.DebugInfoGenerator.GenerateDebugSections(builder);
data.DebugInfoGenerator.GenerateDebugSections(builder.KernelBuilder);

var ptxAssembly = builder.ToString();
return new PTXCompiledKernel(
Context,
entryPoint,
kernelInfo,
ptxAssembly);
builder.Seal());
}

/// <summary>
Expand All @@ -379,27 +381,41 @@ protected override CompiledKernel CreateKernel(
/// <param name="builder">The kernel builder.</param>
private void GenerateLibDeviceCode(
in BackendContext backendContext,
StringBuilder builder)
PTXAssembly.Builder builder)
{
if (NvvmAPI == null || backendContext.Count == 0)
if (backendContext.Count == 0)
return;

using var enumerator = new LibDeviceEnumerator(backendContext);
PTXLibDevice.GenerateLibDeviceCode(
NvvmAPI,
Architecture,
enumerator.AsEnumerable(),
out var ptx);

var compiledString =
ptx.AsNotNull()
.Replace(".version", "//.version", StringComparison.Ordinal)
.Replace(".target", "//.target", StringComparison.Ordinal)
.Replace(
".address_size",
"//.address_size",
StringComparison.Ordinal);
builder.Append(compiledString);
if (NvvmAPI != null)
{
PTXLibDevice.GenerateLibDeviceCode(
NvvmAPI,
Architecture,
enumerator.AsEnumerable(),
out var ptx);

var compiledString =
ptx.AsNotNull()
.Replace(".version", "//.version", StringComparison.Ordinal)
.Replace(".target", "//.target", StringComparison.Ordinal)
.Replace(
".address_size",
"//.address_size",
StringComparison.Ordinal);
builder.KernelBuilder.Append(compiledString);
}
else
{
var ptxModules = InlineList<string>.Create(backendContext.Count);
PTXLibDevicePtx.GetPtx(
enumerator.AsEnumerable(),
ref ptxModules,
out var ptxDeclarations);

builder.AddModule(ptxModules.AsReadOnlySpan());
builder.KernelBuilder.AppendLine(ptxDeclarations);
}
}

#endregion
Expand Down
11 changes: 6 additions & 5 deletions Src/ILGPU/Backends/PTX/PTXCodeGenerator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ namespace ILGPU.Backends.PTX
/// <remarks>The code needs to be prepared for this code generator.</remarks>
public abstract partial class PTXCodeGenerator :
PTXRegisterAllocator,
IBackendCodeGenerator<StringBuilder>
IBackendCodeGenerator<PTXAssembly.Builder>
{
#region Constants

Expand Down Expand Up @@ -404,7 +404,7 @@ public IntrinsicImplementationProvider<PTXIntrinsic.Handler>
/// <summary>
/// Generates a function declaration in PTX code.
/// </summary>
public abstract void GenerateHeader(StringBuilder builder);
public abstract void GenerateHeader(PTXAssembly.Builder builder);

/// <summary>
/// Generates PTX code.
Expand All @@ -415,11 +415,12 @@ public IntrinsicImplementationProvider<PTXIntrinsic.Handler>
/// Generates PTX constant declarations.
/// </summary>
/// <param name="builder">The target builder.</param>
public void GenerateConstants(StringBuilder builder) =>
builder.Append(GenerateConstantDeclarations());
public void GenerateConstants(PTXAssembly.Builder builder) =>
builder.KernelBuilder.Append(GenerateConstantDeclarations());

/// <summary cref="IBackendCodeGenerator{TKernelBuilder}.Merge(TKernelBuilder)"/>
public void Merge(StringBuilder builder) => builder.Append(Builder);
public void Merge(PTXAssembly.Builder builder) =>
builder.KernelBuilder.Append(Builder);

#endregion

Expand Down
6 changes: 3 additions & 3 deletions Src/ILGPU/Backends/PTX/PTXCompiledKernel.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// ---------------------------------------------------------------------------------------
// ILGPU
// Copyright (c) 2018-2023 ILGPU Project
// Copyright (c) 2018-2024 ILGPU Project
// www.ilgpu.net
//
// File: PTXCompiledKernel.cs
Expand Down Expand Up @@ -31,7 +31,7 @@ internal PTXCompiledKernel(
Context context,
EntryPoint entryPoint,
KernelInfo? info,
string ptxAssembly)
PTXAssembly ptxAssembly)
: base(context, entryPoint, info)
{
PTXAssembly = ptxAssembly;
Expand All @@ -44,7 +44,7 @@ internal PTXCompiledKernel(
/// <summary>
/// Returns the PTX assembly code.
/// </summary>
public string PTXAssembly { get; }
public PTXAssembly PTXAssembly { get; }

#endregion
}
Expand Down
14 changes: 7 additions & 7 deletions Src/ILGPU/Backends/PTX/PTXFunctionGenerator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -78,16 +78,16 @@ private List<MappedParameter> GenerateHeaderDeclaration(
/// <summary>
/// Generates a function declaration in PTX code.
/// </summary>
public override void GenerateHeader(StringBuilder builder)
public override void GenerateHeader(PTXAssembly.Builder builder)
{
static bool IsLibDeviceMethod(Method method) =>
method.HasSource &&
method.Source.DeclaringType == typeof(PTXLibDeviceMethods);
if (IsLibDeviceMethod(Method))
if (Method.HasSource &&
Method.Source.DeclaringType == typeof(PTXLibDeviceMethods))
{
return;
}

GenerateHeaderDeclaration(builder);
builder.AppendLine(";");
GenerateHeaderDeclaration(builder.KernelBuilder);
builder.KernelBuilder.AppendLine(";");
}

/// <summary>
Expand Down
14 changes: 7 additions & 7 deletions Src/ILGPU/Backends/PTX/PTXKernelFunctionGenerator.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// ---------------------------------------------------------------------------------------
// ILGPU
// Copyright (c) 2018-2023 ILGPU Project
// Copyright (c) 2018-2024 ILGPU Project
// www.ilgpu.net
//
// File: PTXKernelFunctionGenerator.cs
Expand Down Expand Up @@ -115,7 +115,7 @@ public PTXKernelFunctionGenerator(
/// <summary>
/// Generates a function declaration in PTX code.
/// </summary>
public override void GenerateHeader(StringBuilder builder)
public override void GenerateHeader(PTXAssembly.Builder builder)
{
// Generate global dynamic shared memory allocation information
if (!EntryPoint.SharedMemory.HasDynamicMemory)
Expand All @@ -134,11 +134,11 @@ public override void GenerateHeader(StringBuilder builder)
PTXBackend.DefaultGlobalMemoryAlignment);

// Use the proper alignment that is compatible with all types
builder.Append(".extern .shared .align ");
builder.Append(sharedAlignmentInBytes);
builder.Append(" .b8 ");
builder.Append(DynamicSharedMemoryAllocationName);
builder.AppendLine("[];");
builder.KernelBuilder.Append(".extern .shared .align ");
builder.KernelBuilder.Append(sharedAlignmentInBytes);
builder.KernelBuilder.Append(" .b8 ");
builder.KernelBuilder.Append(DynamicSharedMemoryAllocationName);
builder.KernelBuilder.AppendLine("[];");
}

/// <summary>
Expand Down
Loading

0 comments on commit 8afc5d7

Please sign in to comment.