Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Build DML in Windows GPU CI pipeline #22869

Draft
wants to merge 26 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
a73e73c
ORT DML CUDA
invalid-email-address Nov 1, 2024
f7fa408
typo
invalid-email-address Nov 1, 2024
7206749
Merge branch 'main' of https://github.com/microsoft/onnxruntime into …
invalid-email-address Nov 4, 2024
f906347
gpu and dml test
invalid-email-address Nov 15, 2024
625534e
ignore tests in cuda dir
invalid-email-address Nov 16, 2024
e8232f6
Merge branch 'main' of https://github.com/microsoft/onnxruntime into …
invalid-email-address Nov 16, 2024
1ce2fd1
update and merge
invalid-email-address Nov 16, 2024
73ed01a
typo
invalid-email-address Nov 16, 2024
62d4263
update
invalid-email-address Nov 16, 2024
851218d
8G A10
invalid-email-address Nov 16, 2024
e4dd41a
update1
invalid-email-address Nov 16, 2024
062206c
check
invalid-email-address Nov 16, 2024
8bf7ad6
dislabe cudannhwc only
invalid-email-address Nov 17, 2024
07bbfe6
Merge branch 'main' of https://github.com/microsoft/onnxruntime into …
invalid-email-address Nov 18, 2024
76481a4
merge
invalid-email-address Nov 18, 2024
1e47b8f
ingore cpu model tests
invalid-email-address Nov 18, 2024
cf3ecaa
update
invalid-email-address Nov 18, 2024
6bc3bbd
update filter
invalid-email-address Nov 18, 2024
ea47c68
update filter
invalid-email-address Nov 18, 2024
2ad1a36
add enable_wcos
invalid-email-address Nov 18, 2024
a46265d
update
invalid-email-address Nov 18, 2024
4e4e052
add path env
invalid-email-address Nov 21, 2024
5f6c2ba
add new stage for cuda+dml
invalid-email-address Nov 22, 2024
a10bbd5
add new stage for cuda+dml
invalid-email-address Nov 22, 2024
96bbd2d
update
invalid-email-address Nov 22, 2024
2872273
Merge branch 'main' of https://github.com/microsoft/onnxruntime into …
invalid-email-address Nov 22, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,15 @@
import java.util.HashMap;
import java.util.Map;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.condition.DisabledIfSystemProperty;
import org.junit.jupiter.api.condition.EnabledIfSystemProperty;

public class ProviderOptionsTest {
private static final OrtEnvironment env = TestHelpers.getOrtEnvironment();

@Test
@EnabledIfSystemProperty(named = "USE_CUDA", matches = "1")
@DisabledIfSystemProperty(named = "NO_CUDA_TEST", matches = "1")
public void testCUDAOptions() throws OrtException {
// Test standard options
OrtCUDAProviderOptions cudaOpts = new OrtCUDAProviderOptions(0);
Expand Down Expand Up @@ -61,6 +63,7 @@ public void testCUDAOptions() throws OrtException {

@Test
@EnabledIfSystemProperty(named = "USE_TENSORRT", matches = "1")
@DisabledIfSystemProperty(named = "NO_CUDA_TEST", matches = "1")
public void testTensorRT() throws OrtException {
// Test standard options
OrtTensorRTProviderOptions rtOpts = new OrtTensorRTProviderOptions(0);
Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/test/providers/cuda/cuda_provider_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ ProviderInfo_CUDA& GetProviderInfo_CUDA_Test();

namespace test {
namespace cuda {
TEST(CUDA_EP_Unittest, All) {
TEST(CudaEpUnittest, All) {
onnxruntime::ProviderInfo_CUDA& ep = onnxruntime::GetProviderInfo_CUDA_Test();
ep.TestAll();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
namespace onnxruntime {
namespace test {

TEST(AllocatorTest, CUDAAllocatorTest) {
TEST(CudaEpAllocatorTest, CUDAAllocatorTest) {
OrtDevice::DeviceId cuda_device_id = 0;

// ensure CUDA device is available.
Expand Down Expand Up @@ -77,7 +77,7 @@ TEST(AllocatorTest, CUDAAllocatorTest) {
}

// test that we fallback to smaller allocations if the growth of the arena exceeds the available memory
TEST(AllocatorTest, CUDAAllocatorFallbackTest) {
TEST(CudaEpAllocatorTest, CUDAAllocatorFallbackTest) {
OrtDevice::DeviceId cuda_device_id = 0;

size_t free = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ using onnxruntime::contrib::attention::AttentionBackend;
namespace onnxruntime {
namespace test {

TEST(AttentionKernelOptionsTest, NonZeroValue) {
TEST(CudaEpAttentionKernelOptionsTest, NonZeroValue) {
{
AttentionKernelOptions options;
int value = static_cast<int>(AttentionBackend::FLASH_ATTENTION) | static_cast<int>(AttentionBackend::EFFICIENT_ATTENTION);
Expand Down Expand Up @@ -156,7 +156,7 @@ TEST(AttentionKernelOptionsTest, NonZeroValue) {
}

// Test all environment variables take effect when option value is 0.
TEST(AttentionKernelOptionsTest, DefaultOptionWithEnvVar) {
TEST(CudaEpAttentionKernelOptionsTest, DefaultOptionWithEnvVar) {
constexpr int value = 0;
ScopedEnvironmentVariables scoped_env_vars{
EnvVarMap{
Expand Down Expand Up @@ -186,7 +186,7 @@ TEST(AttentionKernelOptionsTest, DefaultOptionWithEnvVar) {
}

// Test default min sequence lengths when environment variables are not set.
TEST(AttentionKernelOptionsTest, DefaultMinSeqLens) {
TEST(CudaEpAttentionKernelOptionsTest, DefaultMinSeqLens) {
constexpr int value = 0;
ScopedEnvironmentVariables scoped_env_vars{
EnvVarMap{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ void ComputeTopKReference(const std::vector<float>& values,
}
}

TEST(TestBeamSearch, TopK) {
TEST(CudaEpTestBeamSearch, TopK) {
int32_t batch_size = 4;
int32_t beam_size = 4;
int32_t vocab_size = 50257;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ void testPrepack(int rows, int columns) {
}

// TODO: code runs on CPU, but this is for sm80 only, maybe enable only when test on sm80
TEST(BlkQ4_GEMM, PrepackSm80Test) {
TEST(CudaEpBlkQ4_GEMM, PrepackSm80Test) {
Status status = onnxruntime::cuda::test::sm80_supported();
if (!status.IsOK()) {
// skip the test if sm80 is not supported
Expand Down Expand Up @@ -263,7 +263,7 @@ TEST(BlkQ4_GEMM, PrepackSm80Test) {
testPrepack<true, false>(256, 256);
}

TEST(BlkQ4_GEMM, Sm80RowBlockingTest) {
TEST(CudaEpBlkQ4_GEMM, Sm80RowBlockingTest) {
Status status = onnxruntime::cuda::test::sm80_supported();
if (!status.IsOK()) {
// skip the test if sm80 is not supported
Expand Down Expand Up @@ -292,7 +292,7 @@ TEST(BlkQ4_GEMM, Sm80RowBlockingTest) {
onnxruntime::cuda::test::run_blkq4_gemm<64, false, false, true>(256, 1024, 576);
}

TEST(BlkQ4_GEMM, Sm80ColBlockingTest) {
TEST(CudaEpBlkQ4_GEMM, Sm80ColBlockingTest) {
Status status = onnxruntime::cuda::test::sm80_supported();
if (!status.IsOK()) {
// skip the test if sm80 is not supported
Expand All @@ -305,7 +305,7 @@ TEST(BlkQ4_GEMM, Sm80ColBlockingTest) {
onnxruntime::cuda::test::run_blkq4_gemm<64, true, false, true>(256, 1024, 576);
}

TEST(BlkQ4_GEMM, Sm80SmallMTest) {
TEST(CudaEpBlkQ4_GEMM, Sm80SmallMTest) {
Status status = onnxruntime::cuda::test::sm80_supported();
if (!status.IsOK()) {
// skip the test if sm80 is not supported
Expand All @@ -326,7 +326,7 @@ TEST(BlkQ4_GEMM, Sm80SmallMTest) {
onnxruntime::cuda::test::run_blkq4_gemm<64, true, true, true>(16, 1024, 576);
}

TEST(BlkQ4_GEMM, Sm80SmallTileKernelTest) {
TEST(CudaEpBlkQ4_GEMM, Sm80SmallTileKernelTest) {
Status status = onnxruntime::cuda::test::sm80_supported();
if (!status.IsOK()) {
// skip the test if sm80 is not supported
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ namespace cuda {
namespace test {
// TODO: Since the "DeferredRelease" has been migrated to CudaStream class,
// we should migrate this test from CudaEP unit test to CudaStream unit test.
TEST(TestDeferredRelease, WithArena) {
TEST(CudaEpTestDeferredRelease, WithArena) {
// Create CUDA EP.
CUDAExecutionProviderInfo info;
CUDAExecutionProvider ep(info);
Expand Down Expand Up @@ -52,7 +52,7 @@ TEST(TestDeferredRelease, WithArena) {
ORT_THROW_IF_ERROR(ep.OnRunEnd(true, run_opts));
}

TEST(TestDeferredRelease, WithoutArena) {
TEST(CudaEpTestDeferredRelease, WithoutArena) {
// Create CUDA EP.
CUDAExecutionProviderInfo info;
CUDAExecutionProvider ep(info);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ void TestFillCorrectness(size_t num_elements, TElement value) {
}
} // namespace

TEST(CudaUtilsTest, FillCorrectness) {
TEST(CudaEpUnittest, FillCorrectness) {
TestFillCorrectness<int8_t>(1 << 20, 1);
TestFillCorrectness<int16_t>(1 << 20, 2);
TestFillCorrectness<int32_t>(1 << 20, 3);
Expand Down
12 changes: 6 additions & 6 deletions onnxruntime/test/providers/cuda/test_cases/gemm_options_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ namespace onnxruntime {
namespace cuda {
namespace test {

TEST(CudaGemmOptions, TestDefaultOptions) {
TEST(CudaEpGemmOptions, TestDefaultOptions) {
HalfGemmOptions gemm_options;
ASSERT_FALSE(gemm_options.IsCompute16F());
#if defined(USE_CUDA)
Expand All @@ -22,7 +22,7 @@ TEST(CudaGemmOptions, TestDefaultOptions) {
#endif
}

TEST(CudaGemmOptions, TestCompute16F) {
TEST(CudaEpGemmOptions, TestCompute16F) {
HalfGemmOptions gemm_options;
gemm_options.Initialize(1);
ASSERT_TRUE(gemm_options.IsCompute16F());
Expand All @@ -35,7 +35,7 @@ TEST(CudaGemmOptions, TestCompute16F) {
#endif
}

TEST(CudaGemmOptions, NoReducedPrecision) {
TEST(CudaEpGemmOptions, NoReducedPrecision) {
HalfGemmOptions gemm_options;
gemm_options.Initialize(2);
ASSERT_FALSE(gemm_options.IsCompute16F());
Expand All @@ -48,7 +48,7 @@ TEST(CudaGemmOptions, NoReducedPrecision) {
#endif
}

TEST(CudaGemmOptions, Pedantic) {
TEST(CudaEpGemmOptions, Pedantic) {
HalfGemmOptions gemm_options;
gemm_options.Initialize(4);
ASSERT_FALSE(gemm_options.IsCompute16F());
Expand All @@ -61,7 +61,7 @@ TEST(CudaGemmOptions, Pedantic) {
#endif
}

TEST(CudaGemmOptions, Compute16F_Pedantic) {
TEST(CudaEpGemmOptions, Compute16F_Pedantic) {
HalfGemmOptions gemm_options;
gemm_options.Initialize(5);
ASSERT_TRUE(gemm_options.IsCompute16F());
Expand All @@ -74,7 +74,7 @@ TEST(CudaGemmOptions, Compute16F_Pedantic) {
#endif
}

TEST(CudaGemmOptions, Compute16F_NoReducedPrecision) {
TEST(CudaEpGemmOptions, Compute16F_NoReducedPrecision) {
HalfGemmOptions gemm_options;
gemm_options.Initialize(3);
ASSERT_TRUE(gemm_options.IsCompute16F());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ void ComputeTop1Reference(const std::vector<float>& values,
}
}

TEST(TestGreedySearch, TopOne) {
TEST(CudaEpTestGreedySearch, TopOne) {
int32_t batch_size = 4;
int32_t vocab_size = 50257;
int32_t batch_x_vocab = batch_size * vocab_size;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ void TestReduceColumnsToColumn(int m, int n, float relative_error_tolerance = 1e
}
} // namespace

TEST(ReductionFunctionsTest, ReduceRowToScalar) {
TEST(CudaEpReductionFunctionsTest, ReduceRowToScalar) {
TestReduceRowToScalarApis(3);
TestReduceRowToScalarApis(19);
TestReduceRowToScalarApis(123);
Expand All @@ -188,7 +188,7 @@ TEST(ReductionFunctionsTest, ReduceRowToScalar) {
TestReduceRowToScalarApis(941736, 2e-4f);
}

TEST(ReductionFunctionsTest, ReduceRowsToRow) {
TEST(CudaEpReductionFunctionsTest, ReduceRowsToRow) {
for (int m : {3, 193, 2945}) {
for (int n : {3, 193, 2945}) {
TestReduceRowsToRow(m, n, true);
Expand All @@ -197,15 +197,15 @@ TEST(ReductionFunctionsTest, ReduceRowsToRow) {
}
}

TEST(ReductionFunctionsTest, ReduceColumnsToColumn) {
TEST(CudaEpReductionFunctionsTest, ReduceColumnsToColumn) {
for (int m : {3, 193, 2945}) {
for (int n : {3, 193, 2945}) {
TestReduceColumnsToColumn(m, n);
}
}
}

TEST(ReductionFunctionsTest, BufferOffsets) {
TEST(CudaEpReductionFunctionsTest, BufferOffsets) {
const int m = 2048;
const int n = 1024;
const TensorShape shape{m, n};
Expand Down Expand Up @@ -240,7 +240,7 @@ TEST(ReductionFunctionsTest, BufferOffsets) {
}
}

TEST(ReductionFunctionsTest, InvalidBufferSize) {
TEST(CudaEpReductionFunctionsTest, InvalidBufferSize) {
const int m = 2048;
const int n = 1024;
const TensorShape shape{m, n};
Expand All @@ -262,7 +262,7 @@ TEST(ReductionFunctionsTest, InvalidBufferSize) {
ASSERT_FALSE(status.IsOK());
}

TEST(ReductionFunctionsTest, GetApplicableMatrixReduction) {
TEST(CudaEpReductionFunctionsTest, GetApplicableMatrixReduction) {
auto test_get_applicable_matrix_reduction =
[](cudnnReduceTensorOp_t cudnn_op,
const std::vector<int64_t>& dims, const std::vector<int64_t>& axes,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -218,16 +218,32 @@ jobs:
- powershell: |
python3 -m pip uninstall -y onnxruntime onnxruntime-gpu onnxruntime-training onnxruntime-directml -qq
Get-ChildItem -Path dist/*.whl | foreach {pip --disable-pip-version-check install --upgrade $_.fullname}

workingDirectory: '$(Build.BinariesDirectory)\${{ parameters.BuildConfig }}\${{ parameters.BuildConfig }}'
displayName: 'Install onnxruntime wheel'

- ${{ if eq(parameters.RunOnnxRuntimeTests, true) }}:
- powershell: |
python $(Build.SourcesDirectory)\tools\ci_build\build.py --config ${{ parameters.BuildConfig }} --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --test --cmake_generator "Visual Studio 17 2022" --build_shared_lib --enable_onnx_tests ${{ parameters.additionalBuildFlags }}

workingDirectory: '$(Build.BinariesDirectory)\${{ parameters.BuildConfig }}\${{ parameters.BuildConfig }}'
displayName: 'Run tests'
- ${{ if and(contains(parameters.additionalBuildFlags, 'use_cuda'), contains(parameters.additionalBuildFlags, 'use_dml')) }}:
- powershell: |
python $(Build.SourcesDirectory)\tools\ci_build\build.py --config ${{ parameters.BuildConfig }} --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --test --cmake_generator "Visual Studio 17 2022" --enable_onnx_tests ${{ parameters.additionalBuildFlags }}
workingDirectory: '$(Build.BinariesDirectory)\${{ parameters.BuildConfig }}\${{ parameters.BuildConfig }}'
displayName: 'Run tests excluding CUDA tests'
env:
NO_CUDA_TEST: '1'
GTEST_FILTER: '-CudaEp*:CudaNhwcTypedTest*:*cpu_*models*' # Exclude CUDA EP tests under providers/cuda/ and cpu models test
PATH: '$(Build.BinariesDirectory)\${{ parameters.BuildConfig }}\${{ parameters.BuildConfig }};$(PATH)'
- powershell: |
python $(Build.SourcesDirectory)\tools\ci_build\build.py --config ${{ parameters.BuildConfig }} --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --test --cmake_generator "Visual Studio 17 2022" --enable_onnx_tests ${{ parameters.additionalBuildFlags }}
workingDirectory: '$(Build.BinariesDirectory)\${{ parameters.BuildConfig }}\${{ parameters.BuildConfig }}'
displayName: 'Run tests excluding DML tests'
env:
NO_DMLTEST: '1'
GTEST_FILTER: '-*cpu_*models*'
PATH: '$(Build.BinariesDirectory)\${{ parameters.BuildConfig }}\${{ parameters.BuildConfig }};$(PATH)'
- ${{ else }}:
- powershell: |
python $(Build.SourcesDirectory)\tools\ci_build\build.py --config ${{ parameters.BuildConfig }} --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --test --cmake_generator "Visual Studio 17 2022" --enable_onnx_tests ${{ parameters.additionalBuildFlags }}
workingDirectory: '$(Build.BinariesDirectory)\${{ parameters.BuildConfig }}\${{ parameters.BuildConfig }}'
displayName: 'Run tests'

- ${{ if eq(parameters.GenerateDocumentation, true) }}:
- task: PythonScript@0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -413,7 +413,7 @@ stages:
workingDirectory: '$(Build.BinariesDirectory)'
env:
NO_CUDA_TEST: '1'
GTEST_FILTER: -*CudaNhwcTypedTest*
GTEST_FILTER: '-CudaEp*:CudaNhwcTypedTest*' # Exclude CUDA EP tests under providers/cuda/
- task: PythonScript@0
displayName: 'test excludes DML'
condition: and(succeeded(), eq('${{ parameters.runTests}}', true))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,4 +62,27 @@ stages:
RunOnnxRuntimeTests: ${{ parameters.RunOnnxRuntimeTests }}
ORT_EP_NAME: CUDA
WITH_CACHE: true
MachinePool: onnxruntime-Win2022-GPU-A10
MachinePool: onnxruntime-Win2022-GPU-A10

- stage: cuda_dml
dependsOn: []
jobs:
- template: templates/jobs/win-ci-vs-2022-job.yml
parameters:
BuildConfig: 'RelWithDebInfo'
EnvSetupScript: setup_env_cuda.bat
buildArch: x64
additionalBuildFlags: >-
--build_java --build_nodejs --use_cuda --cuda_home="$(Agent.TempDirectory)\v${{ parameters.CudaVersion }}"
--enable_cuda_profiling --enable_transformers_tool_test
--use_dml
--cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=86
--cmake_extra_defines onnxruntime_BUILD_UNIT_TESTS=ON
--cmake_extra_defines onnxruntime_ENABLE_CUDA_EP_INTERNAL_TESTS=ON
msbuildPlatform: x64
isX86: false
job_name_suffix: x64_RelWithDebInfo
RunOnnxRuntimeTests: ${{ parameters.RunOnnxRuntimeTests }}
ORT_EP_NAME: CUDA
WITH_CACHE: true
MachinePool: onnxruntime-Win2022-GPU-A10
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,11 @@ stages:
BuildConfig: 'RelWithDebInfo'
EnvSetupScript: setup_env.bat
buildArch: x64
additionalBuildFlags: --enable_pybind --use_dml --enable_wcos --use_winml
additionalBuildFlags: --enable_pybind --use_dml --enable_wcos --use_winml
msbuildPlatform: x64
isX86: false
job_name_suffix: x64_RelWithDebInfo
RunOnnxRuntimeTests: ${{ parameters.RunOnnxRuntimeTests }}
ORT_EP_NAME: DML
WITH_CACHE: false
MachinePool: onnxruntime-Win2022-GPU-dml-A10
MachinePool: onnxruntime-Win2022-GPU-dml-A10