Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimized PTX IntrinsicMath implementation to use LibDevice. #1189

Merged
merged 3 commits into from
Apr 10, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions Src/ILGPU.Algorithms/PTX/PTXContext.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// ---------------------------------------------------------------------------------------
// ILGPU Algorithms
// Copyright (c) 2019-2023 ILGPU Project
// Copyright (c) 2019-2024 ILGPU Project
// www.ilgpu.net
//
// File: PTXContext.cs
Expand Down Expand Up @@ -46,7 +46,8 @@ static partial class PTXContext
private static readonly PTXIntrinsic MathCodeGeneratorIntrinsic =
new PTXIntrinsic(
MathCodeGenerator,
IntrinsicImplementationMode.GenerateCode)
IntrinsicImplementationMode.GenerateCode,
libDeviceRequired: false)
.ThrowIfNull();

/// <summary>
Expand All @@ -70,7 +71,8 @@ private static PTXIntrinsic GetMathCodeGeneratorIntrinsic(
PTXMathType,
nameof(PTXMath.GenerateMathIntrinsic),
IntrinsicImplementationMode.GenerateCode,
minArchitecture);
minArchitecture,
libDeviceRequired: false);

/// <summary>
/// Resolves a PTX intrinsic for the given math-function configuration.
Expand All @@ -87,7 +89,10 @@ private static PTXIntrinsic GetMathIntrinsic(string name, params Type[] types)
types,
null)
.ThrowIfNull();
return new PTXIntrinsic(targetMethod, IntrinsicImplementationMode.Redirect);
return new PTXIntrinsic(
targetMethod,
IntrinsicImplementationMode.Redirect,
libDeviceRequired: false);
}

/// <summary>
Expand Down
97 changes: 95 additions & 2 deletions Src/ILGPU/Backends/PTX/PTXIntrinsic.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// ---------------------------------------------------------------------------------------
// ILGPU
// Copyright (c) 2019-2021 ILGPU Project
// Copyright (c) 2019-2024 ILGPU Project
// www.ilgpu.net
//
// File: PTXIntrinsic.cs
Expand Down Expand Up @@ -52,6 +52,66 @@ public PTXIntrinsic(MethodInfo targetMethod, IntrinsicImplementationMode mode)
mode)
{ }

/// <summary>
/// Constructs a new PTX intrinsic that can handle all architectures
/// newer or equal to <paramref name="minArchitecture"/>.
/// </summary>
/// <param name="targetMethod">The associated target method.</param>
/// <param name="mode">The code-generation mode.</param>
/// <param name="minArchitecture">The target/minimum architecture.</param>
public PTXIntrinsic(
MethodInfo targetMethod,
IntrinsicImplementationMode mode,
CudaArchitecture minArchitecture)
: base(
BackendType.PTX,
targetMethod,
mode)
{
MinArchitecture = minArchitecture;
}

/// <summary>
/// Constructs a new PTX intrinsic.
/// </summary>
/// <param name="targetMethod">The associated target method.</param>
/// <param name="mode">The code-generation mode.</param>
/// <param name="minArchitecture">The target/minimum architecture.</param>
/// <param name="maxArchitecture">The max architecture (exclusive).</param>
public PTXIntrinsic(
MethodInfo targetMethod,
IntrinsicImplementationMode mode,
CudaArchitecture? minArchitecture,
CudaArchitecture? maxArchitecture)
: base(
BackendType.PTX,
targetMethod,
mode)
{
MinArchitecture = minArchitecture;
MaxArchitecture = maxArchitecture;
}

/// <summary>
/// Constructs a new PTX intrinsic.
/// </summary>
/// <param name="targetMethod">The associated target method.</param>
/// <param name="mode">The code-generator mode.</param>
/// <param name="libDeviceRequired">
/// Indicates whether LibDevice is required.
/// </param>
public PTXIntrinsic(
MethodInfo targetMethod,
IntrinsicImplementationMode mode,
bool libDeviceRequired)
: base(
BackendType.PTX,
targetMethod,
mode)
{
LibDeviceRequired = libDeviceRequired;
}

/// <summary>
/// Constructs a new PTX intrinsic that can handle all architectures.
/// </summary>
Expand Down Expand Up @@ -143,6 +203,32 @@ public PTXIntrinsic(
MaxArchitecture = maxArchitecture;
}

/// <summary>
/// Constructs a new PTX intrinsic.
/// </summary>
/// <param name="handlerType">The associated target handler type.</param>
/// <param name="methodName">The target method name (or null).</param>
/// <param name="mode">The code-generator mode.</param>
/// <param name="minArchitecture">The target/minimum architecture.</param>
/// <param name="libDeviceRequired">
/// Indicates whether LibDevice is required.
/// </param>
public PTXIntrinsic(
Type handlerType,
string methodName,
IntrinsicImplementationMode mode,
CudaArchitecture minArchitecture,
bool libDeviceRequired)
: base(
BackendType.PTX,
handlerType,
methodName,
mode)
{
MinArchitecture = minArchitecture;
LibDeviceRequired = libDeviceRequired;
}

#endregion

#region Properties
Expand All @@ -164,6 +250,11 @@ public PTXIntrinsic(
/// </remarks>
public CudaArchitecture? MaxArchitecture { get; }

/// <summary>
/// Returns whether LibDevice is required to use this instrinsic.
/// </summary>
public bool? LibDeviceRequired { get; }

#endregion

#region Methods
Expand All @@ -174,7 +265,9 @@ backend is PTXBackend ptxBackend
&& (!MinArchitecture.HasValue ||
ptxBackend.Architecture >= MinArchitecture.Value)
&& (!MaxArchitecture.HasValue ||
ptxBackend.Architecture < MaxArchitecture.Value);
ptxBackend.Architecture < MaxArchitecture.Value)
&& (!LibDeviceRequired.HasValue ||
ptxBackend.NvvmAPI != null == LibDeviceRequired.Value);

#endregion
}
Expand Down
95 changes: 94 additions & 1 deletion Src/ILGPU/Backends/PTX/PTXIntrinsics.Generated.tt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// ---------------------------------------------------------------------------------------
// ILGPU
// Copyright (c) 2016-2021 ILGPU Project
// Copyright (c) 2016-2024 ILGPU Project
// www.ilgpu.net
//
// File: PTXIntrinsics.Generated.tt/PTXIntrinsics.Generated.cs
Expand Down Expand Up @@ -35,6 +35,61 @@ var fp16Ops = new (string, string, string, string)[]

("Ternary", "MultiplyAdd", "FmaFP32", "SM_53"),
};

var unaryMathFunctions = new (string, string, TypeInformation)[]
{
("AcosF", "Acos", FloatTypes[2]),
("AsinF", "Asin", FloatTypes[2]),
("AtanF", "Atan", FloatTypes[2]),
("CeilingF", "Ceil", FloatTypes[2]),
("CosF", "Cos", FloatTypes[2]),
("CoshF", "Cosh", FloatTypes[2]),
("ExpF", "Exp", FloatTypes[2]),
("Exp2F", "Exp2", FloatTypes[2]),
("FloorF", "Floor", FloatTypes[2]),
("LogF", "Log", FloatTypes[2]),
("Log2F", "Log2", FloatTypes[2]),
("Log10F", "Log10", FloatTypes[2]),
("RsqrtF", "Rsqrt", FloatTypes[2]),
("SinF", "Sin", FloatTypes[2]),
("SinhF", "Sinh", FloatTypes[2]),
("SqrtF", "Sqrt", FloatTypes[2]),
("TanF", "Tan", FloatTypes[2]),
("TanhF", "Tanh", FloatTypes[2]),

("AcosF", "Acos", FloatTypes[1]),
("AsinF", "Asin", FloatTypes[1]),
("AtanF", "Atan", FloatTypes[1]),
("CeilingF", "Ceil", FloatTypes[1]),
("CosF", "Cos", FloatTypes[1]),
("CoshF", "Cosh", FloatTypes[1]),
("ExpF", "Exp", FloatTypes[1]),
("Exp2F", "Exp2", FloatTypes[1]),
("FloorF", "Floor", FloatTypes[1]),
("LogF", "Log", FloatTypes[1]),
("Log2F", "Log2", FloatTypes[1]),
("Log10F", "Log10", FloatTypes[1]),
("RsqrtF", "Rsqrt", FloatTypes[1]),
("SinF", "Sin", FloatTypes[1]),
("SinhF", "Sinh", FloatTypes[1]),
("SqrtF", "Sqrt", FloatTypes[1]),
("TanF", "Tan", FloatTypes[1]),
("TanhF", "Tanh", FloatTypes[1]),
};

var binaryMathFunctions = new (string, string, string, TypeInformation)[]
{
("Atan2F", "Atan", null, FloatTypes[2]),
("BinaryLogF", "Log", "IntrinsicMath.BinaryLog", FloatTypes[2]),
("PowF", "Pow", null, FloatTypes[2]),
("Rem", "Fmod", null, FloatTypes[2]),

("Atan2F", "Atan", null, FloatTypes[1]),
("BinaryLogF", "Log", "IntrinsicMath.BinaryLog", FloatTypes[1]),
("PowF", "Pow", null, FloatTypes[1]),
("Rem", "Fmod", null, FloatTypes[1]),
};

#>
using ILGPU.IR.Intrinsics;
using ILGPU.IR.Values;
Expand Down Expand Up @@ -157,5 +212,43 @@ namespace ILGPU.Backends.PTX
}

#endregion

#region Math

/// <summary>
/// Registers all Math intrinsics with the given manager.
/// </summary>
/// <param name="manager">The target implementation manager.</param>
private static void RegisterMathFunctions(IntrinsicImplementationManager manager)
{
<# foreach (var (kind, methodName, type) in unaryMathFunctions) { #>
manager.RegisterUnaryArithmetic(
UnaryArithmeticKind.<#= kind #>,
BasicValueType.<#= type.GetBasicValueType() #>,
CreateLibDeviceMathIntrinsic(
nameof(LibDevice.<#= methodName #>),
typeof(<#= type.Type #>)));
<# } #>

<# foreach (var (kind, methodName, baseClass, type) in binaryMathFunctions) { #>
manager.RegisterBinaryArithmetic(
BinaryArithmeticKind.<#= kind #>,
BasicValueType.<#= type.GetBasicValueType() #>,
<# if (baseClass == null) { #>
CreateLibDeviceMathIntrinsic(
nameof(LibDevice.<#= methodName #>),
typeof(<#= type.Type #>),
typeof(<#= type.Type #>)));
<# } else { #>
CreateMathIntrinsic(
typeof(<#= baseClass #>),
nameof(<#= baseClass #>.<#= methodName #>),
typeof(<#= type.Type #>),
typeof(<#= type.Type #>)));
<# } #>
<# } #>
}

#endregion
}
}
50 changes: 49 additions & 1 deletion Src/ILGPU/Backends/PTX/PTXIntrinsics.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// ---------------------------------------------------------------------------------------
// ILGPU
// Copyright (c) 2019-2023 ILGPU Project
// Copyright (c) 2019-2024 ILGPU Project
// www.ilgpu.net
//
// File: PTXIntrinsics.cs
Expand All @@ -13,7 +13,9 @@
using ILGPU.IR.Intrinsics;
using ILGPU.IR.Values;
using ILGPU.Runtime.Cuda;
using ILGPU.Util;
using System;
using System.Reflection;
using System.Runtime.CompilerServices;

namespace ILGPU.Backends.PTX
Expand Down Expand Up @@ -84,6 +86,51 @@ private static PTXIntrinsic CreateFP16Intrinsic(
maxArchitecture.Value)
: new PTXIntrinsic(HalfType, name, IntrinsicImplementationMode.Redirect);

/// <summary>
/// Creates a PTX intrinsic for the given math function.
/// </summary>
/// <param name="name">The intrinsic name.</param>
/// <param name="types">The parameter types.</param>
/// <returns>The resolved intrinsic representation.</returns>
private static PTXIntrinsic CreateLibDeviceMathIntrinsic(
string name,
params Type[] types)
{
var targetMethod = typeof(LibDevice).GetMethod(
name,
BindingFlags.Public | BindingFlags.Static,
null,
types,
null)
.ThrowIfNull();
return new PTXIntrinsic(
targetMethod,
IntrinsicImplementationMode.Redirect,
libDeviceRequired: true);
}

/// <summary>
/// Creates a PTX intrinsic for the given math function.
/// </summary>
/// <param name="baseType">The source type containing the intrinsic.</param>
/// <param name="name">The intrinsic name.</param>
/// <param name="types">The parameter types.</param>
/// <returns>The resolved intrinsic representation.</returns>
private static PTXIntrinsic CreateMathIntrinsic(
Type baseType,
string name,
params Type[] types)
{
var targetMethod = baseType.GetMethod(
name,
BindingFlags.Public | BindingFlags.Static,
null,
types,
null)
.ThrowIfNull();
return new PTXIntrinsic(targetMethod, IntrinsicImplementationMode.Redirect);
}

/// <summary>
/// Registers all PTX intrinsics with the given manager.
/// </summary>
Expand All @@ -95,6 +142,7 @@ public static void Register(IntrinsicImplementationManager manager)
RegisterWarpShuffles(manager);
RegisterFP16(manager);
RegisterBitFunctions(manager);
RegisterMathFunctions(manager);
}

#endregion
Expand Down
Loading
Loading