Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add more cudnn operations #284

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
136 changes: 136 additions & 0 deletions src/cudnn/result.rs
Original file line number Diff line number Diff line change
Expand Up @@ -447,6 +447,51 @@ pub unsafe fn convolution_forward(
.result()
}

#[allow(clippy::too_many_arguments)]
pub unsafe fn convolution_bias_activation_forward(
handle: sys::cudnnHandle_t,
alpha1: *const ::core::ffi::c_void,
x_desc: sys::cudnnTensorDescriptor_t,
x: *const ::core::ffi::c_void,
w_desc: sys::cudnnFilterDescriptor_t,
w: *const ::core::ffi::c_void,
conv_desc: sys::cudnnConvolutionDescriptor_t,
algo: sys::cudnnConvolutionFwdAlgo_t,
work_space: *mut ::core::ffi::c_void,
work_space_size_in_bytes: usize,
alpha2: *const ::core::ffi::c_void,
z_desc: sys::cudnnTensorDescriptor_t,
z: *const ::core::ffi::c_void,
bias_desc: sys::cudnnTensorDescriptor_t,
bias: *const ::core::ffi::c_void,
activation_desc: sys::cudnnActivationDescriptor_t,
y_desc: sys::cudnnTensorDescriptor_t,
y: *mut ::core::ffi::c_void,
) -> Result<(), CudnnError> {
lib()
.cudnnConvolutionBiasActivationForward(
handle,
alpha1,
x_desc,
x,
w_desc,
w,
conv_desc,
algo,
work_space,
work_space_size_in_bytes,
alpha2,
z_desc,
z,
bias_desc,
bias,
activation_desc,
y_desc,
y,
)
.result()
}

/// See [nvidia docs](https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnGetConvolutionBackwardDataAlgorithm_v7)
///
/// # Safety
Expand Down Expand Up @@ -779,3 +824,94 @@ pub unsafe fn reduce_tensor(
)
.result()
}

pub fn create_pooling_descriptor() -> Result<sys::cudnnPoolingDescriptor_t, CudnnError> {
let mut desc = MaybeUninit::uninit();
unsafe {
lib()
.cudnnCreatePoolingDescriptor(desc.as_mut_ptr())
.result()?;
Ok(desc.assume_init())
}
}

pub fn set_pooling_descriptor(
desc: sys::cudnnPoolingDescriptor_t,
mode: sys::cudnnPoolingMode_t,
nan_propagation: sys::cudnnNanPropagation_t,
nb_dims: std::ffi::c_int,
window_dims: &[std::ffi::c_int],
pads: &[std::ffi::c_int],
strides: &[std::ffi::c_int],
) -> Result<(), CudnnError> {
unsafe {
lib()
.cudnnSetPoolingNdDescriptor(
desc,
mode,
nan_propagation,
nb_dims,
window_dims.as_ptr(),
pads.as_ptr(),
strides.as_ptr(),
)
.result()
}
}

pub fn pooling_forward(
handle: sys::cudnnHandle_t,
pooling_desc: sys::cudnnPoolingDescriptor_t,
alpha: *const ::core::ffi::c_void,
x_desc: sys::cudnnTensorDescriptor_t,
x: *const ::core::ffi::c_void,
beta: *const ::core::ffi::c_void,
y_desc: sys::cudnnTensorDescriptor_t,
y: *mut ::core::ffi::c_void,
) -> Result<(), CudnnError> {
unsafe {
lib()
.cudnnPoolingForward(handle, pooling_desc, alpha, x_desc, x, beta, y_desc, y)
.result()
}
}

pub fn create_activation_descriptor() -> Result<sys::cudnnActivationDescriptor_t, CudnnError> {
let mut desc = MaybeUninit::uninit();
unsafe {
lib()
.cudnnCreateActivationDescriptor(desc.as_mut_ptr())
.result()?;
Ok(desc.assume_init())
}
}

pub fn set_activation_descriptor(
desc: sys::cudnnActivationDescriptor_t,
mode: sys::cudnnActivationMode_t,
nan_propagation: sys::cudnnNanPropagation_t,
coef: f64,
) -> Result<(), CudnnError> {
unsafe {
lib()
.cudnnSetActivationDescriptor(desc, mode, nan_propagation, coef)
.result()
}
}

pub fn activation_forward(
handle: sys::cudnnHandle_t,
activation_desc: sys::cudnnActivationDescriptor_t,
alpha: *const ::core::ffi::c_void,
x_desc: sys::cudnnTensorDescriptor_t,
x: *const ::core::ffi::c_void,
beta: *const ::core::ffi::c_void,
y_desc: sys::cudnnTensorDescriptor_t,
y: *mut ::core::ffi::c_void,
) -> Result<(), CudnnError> {
unsafe {
lib()
.cudnnActivationForward(handle, activation_desc, alpha, x_desc, x, beta, y_desc, y)
.result()
}
}
79 changes: 79 additions & 0 deletions src/cudnn/safe/activation.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
use crate::cudnn::{result, sys, Cudnn, CudnnDataType, CudnnError, TensorDescriptor};
use crate::driver::{DevicePtr, DevicePtrMut};
use core::marker::PhantomData;
use std::sync::Arc;

/// A descriptor of the activation operation. Create with [`Cudnn::create_activation()`]
#[derive(Debug)]
pub struct ActivationDescriptor<T> {
pub(crate) desc: sys::cudnnActivationDescriptor_t,
#[allow(unused)]
pub(crate) handle: Arc<Cudnn>,
pub(crate) marker: PhantomData<T>,
}

impl Cudnn {
pub fn create_activation<T: CudnnDataType>(
self: &Arc<Cudnn>,
mode: sys::cudnnActivationMode_t,
nan_propagation: sys::cudnnNanPropagation_t,
coef: f64,
) -> Result<ActivationDescriptor<T>, CudnnError> {
let desc = result::create_activation_descriptor()?;
let desc = ActivationDescriptor {
desc,
handle: self.clone(),
marker: PhantomData,
};
result::set_activation_descriptor(desc.desc, mode, nan_propagation, coef)?;
Ok(desc)
}
}

/// The activation forward operation. Pass in references to descriptors
/// directly, and then call [`ConvForward::launch()`] .
pub struct ActivationForward<'a, A: CudnnDataType, X: CudnnDataType, Y: CudnnDataType> {
/// Activation function.
pub act: &'a ActivationDescriptor<A>,
pub x: &'a TensorDescriptor<X>,
pub y: &'a TensorDescriptor<Y>,
}

impl<'a, A, X, Y> ActivationForward<'a, A, X, Y>
where
A: CudnnDataType,
X: CudnnDataType,
Y: CudnnDataType,
{
/// Launches the operation.
///
/// - `src` is the input tensor
/// - `y` is the output
///
/// # Safety
/// The arguments must match the data type/layout specified in the
/// descriptors in `self.
pub unsafe fn launch<Src, Dst>(
&self,
(alpha, beta): (Y, Y),
x: &Src,
y: &mut Dst,
) -> Result<(), CudnnError>
where
Src: DevicePtr<A>,
Dst: DevicePtrMut<A>,
{
let alpha = alpha.into_scaling_parameter();
let beta = beta.into_scaling_parameter();
result::activation_forward(
self.act.handle.handle,
self.act.desc,
(&alpha) as *const Y::Scalar as *const std::ffi::c_void,
self.x.desc,
*x.device_ptr() as *const X as *const std::ffi::c_void,
(&beta) as *const Y::Scalar as *const std::ffi::c_void,
self.y.desc,
*y.device_ptr_mut() as *mut Y as *mut std::ffi::c_void,
)
}
}
121 changes: 121 additions & 0 deletions src/cudnn/safe/conv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use crate::{
driver::{DevicePtr, DevicePtrMut},
};

use crate::cudnn::safe::activation::ActivationDescriptor;
use std::{marker::PhantomData, sync::Arc};

/// A descriptor of the filters for conv operation. Create with [`Cudnn::create_4d_filter()`]
Expand Down Expand Up @@ -525,3 +526,123 @@ impl<'a, X: CudnnDataType, C: CudnnDataType, Y: CudnnDataType> ConvBackwardFilte
)
}
}

/// The bias + convolution + activation forward operation.
/// The full computation follows the equation `y = act (alpha1 * conv(x) + alpha2 * z + bias)`.
/// Pass in references to descriptors directly, and then call:
/// 1. [`ConvForward::pick_algorithm()`] to use cudnn heuristics to select the algorithm
/// 2. [`ConvForward::get_workspace_size()`] to get required workspace size.
/// 3. [`ConvForward::launch()`] to execute it
#[derive(Debug)]
pub struct ConvBiasActivationForward<
'a,
X: CudnnDataType,
C: CudnnDataType,
A: CudnnDataType,
Y: CudnnDataType,
> {
/// Conv parameters.
pub conv: &'a ConvDescriptor<C>,
/// Activation function.
pub act: &'a ActivationDescriptor<A>,
/// Input tensor descriptor
pub x: &'a TensorDescriptor<X>,
/// Filter descriptor
pub w: &'a FilterDescriptor<X>,
/// Z descriptor
pub z: &'a TensorDescriptor<X>,
/// Bias descriptor
pub bias: &'a TensorDescriptor<X>,
/// Output tensor descriptor
pub y: &'a TensorDescriptor<Y>,
}

impl<'a, X, C, A, Y> ConvBiasActivationForward<'a, X, C, A, Y>
where
X: CudnnDataType,
C: CudnnDataType,
A: CudnnDataType,
Y: CudnnDataType,
{
/// Picks the fastest algorithm from all available cuDNN algorithms based on cudnn heuristics.
pub fn pick_algorithm(&self) -> Result<sys::cudnnConvolutionFwdAlgo_t, CudnnError> {
let conv = ConvForward {
conv: self.conv,
x: self.x,
w: self.w,
y: self.y,
};
conv.pick_algorithm()
}

/// Returns size in **bytes** to execute the selected algorithm.
pub fn get_workspace_size(
&self,
algo: sys::cudnnConvolutionFwdAlgo_t,
) -> Result<usize, CudnnError> {
let conv = ConvForward {
conv: self.conv,
x: self.x,
w: self.w,
y: self.y,
};
conv.get_workspace_size(algo)
}

/// Launches the operation.
///
/// - `src` is the input tensor
/// - `filter` is the convolution kernels
/// - `y` is the output
///
/// # Safety
/// The src/filter/y arguments must match the data type/layout specified in the
/// descriptors in `self.
pub unsafe fn launch<Workspace, Src, Filter, Dst>(
&self,
algo: sys::cudnnConvolutionFwdAlgo_t,
workspace: Option<&mut Workspace>,
(alpha1, alpha2): (Y, Y),
src: &Src,
filter: &Filter,
z: &Src,
bias: &Src,
y: &mut Dst,
) -> Result<(), CudnnError>
where
Workspace: DevicePtrMut<u8>,
Src: DevicePtr<X>,
Filter: DevicePtr<X>,
Dst: DevicePtrMut<Y>,
{
let (num_bytes, workspace_ptr) = match workspace {
Some(w) => (
w.num_bytes(),
*w.device_ptr_mut() as *mut u8 as *mut std::ffi::c_void,
),
None => (0, std::ptr::null_mut()),
};
let alpha1 = alpha1.into_scaling_parameter();
let alpha2 = alpha2.into_scaling_parameter();
result::convolution_bias_activation_forward(
self.conv.handle.handle,
(&alpha1) as *const Y::Scalar as *const std::ffi::c_void,
self.x.desc,
*src.device_ptr() as *const X as *const std::ffi::c_void,
self.w.desc,
*filter.device_ptr() as *const X as *const std::ffi::c_void,
self.conv.desc,
algo,
workspace_ptr,
num_bytes,
(&alpha2) as *const Y::Scalar as *const std::ffi::c_void,
self.z.desc,
*z.device_ptr() as *const X as *const std::ffi::c_void,
self.bias.desc,
*bias.device_ptr() as *const X as *const std::ffi::c_void,
self.act.desc,
self.y.desc,
*y.device_ptr_mut() as *mut Y as *mut std::ffi::c_void,
)
}
}
Loading