Skip to content

Commit 4886636

Browse files
authored
Queue up 'skip reasons' (#1352)
1 parent 0eeabc1 commit 4886636

File tree

5 files changed

+88
-72
lines changed

5 files changed

+88
-72
lines changed

cuda_bindings/tests/nvml/conftest.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -71,14 +71,9 @@ def get_devices(device_info):
7171

7272

7373
@pytest.fixture
74-
def for_all_devices(device_info):
74+
def all_devices(device_info):
7575
with NVMLInitializer():
76-
unique_devices = set()
77-
for device_id in get_devices(device_info):
78-
if device_id not in unique_devices:
79-
unique_devices.add(device_id)
80-
yield device_id
81-
# RestoreDefaultEnvironment.restore()
76+
yield sorted(list(set(get_devices(device_info))))
8277

8378

8479
@pytest.fixture

cuda_bindings/tests/nvml/test_compute_mode.py

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,19 @@
1515

1616

1717
@pytest.mark.skipif(sys.platform == "win32", reason="Test not supported on Windows")
18-
def test_compute_mode_supported_nonroot(for_all_devices):
19-
device = for_all_devices
20-
21-
try:
22-
original_compute_mode = nvml.device_get_compute_mode(device)
23-
except nvml.NotSupportedError:
24-
pytest.skip("nvmlDeviceGetComputeMode not supported")
25-
26-
for cm in COMPUTE_MODES:
27-
with pytest.raises(nvml.NoPermissionError):
28-
nvml.device_set_compute_mode(device, cm)
29-
assert original_compute_mode == nvml.device_get_compute_mode(device), "Compute mode shouldn't have changed"
18+
def test_compute_mode_supported_nonroot(all_devices):
19+
skip_reasons = set()
20+
for device in all_devices:
21+
try:
22+
original_compute_mode = nvml.device_get_compute_mode(device)
23+
except nvml.NotSupportedError:
24+
skip_reasons.add(f"nvmlDeviceGetComputeMode not supported for device {device}")
25+
continue
26+
27+
for cm in COMPUTE_MODES:
28+
with pytest.raises(nvml.NoPermissionError):
29+
nvml.device_set_compute_mode(device, cm)
30+
assert original_compute_mode == nvml.device_get_compute_mode(device), "Compute mode shouldn't have changed"
31+
32+
if skip_reasons:
33+
pytest.skip(" ; ".join(skip_reasons))

cuda_bindings/tests/nvml/test_gpu.py

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -22,19 +22,24 @@ def test_gpu_get_module_id(nvml_init):
2222
assert isinstance(module_id, int)
2323

2424

25-
def test_gpu_get_platform_info(for_all_devices):
26-
device = for_all_devices
25+
def test_gpu_get_platform_info(all_devices):
26+
skip_reasons = set()
27+
for device in all_devices:
28+
if util.is_vgpu(device):
29+
skip_reasons.add(f"Not supported on vGPU device {device}")
30+
continue
2731

28-
if util.is_vgpu(device):
29-
pytest.skip("Not supported on vGPU device")
32+
# TODO
33+
# if device.feature_dict.board.chip < board_class.Architecture.Blackwell:
34+
# test_utils.skip_test("Not supported on chip before Blackwell")
3035

31-
# TODO
32-
# if device.feature_dict.board.chip < board_class.Architecture.Blackwell:
33-
# test_utils.skip_test("Not supported on chip before Blackwell")
36+
try:
37+
platform_info = nvml.device_get_platform_info(device)
38+
except nvml.NotSupportedError:
39+
skip_reasons.add(f"Not supported returned, linkely NVLink is disable for {device}")
40+
continue
3441

35-
try:
36-
platform_info = nvml.device_get_platform_info(device)
37-
except nvml.NotSupportedError:
38-
pytest.skip("Not supported returned, likely NVLink is disabled.")
42+
assert isinstance(platform_info, nvml.PlatformInfo_v2)
3943

40-
assert isinstance(platform_info, nvml.PlatformInfo_v2)
44+
if skip_reasons:
45+
pytest.skip(" ; ".join(skip_reasons))

cuda_bindings/tests/nvml/test_nvlink.py

Lines changed: 17 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -5,26 +5,25 @@
55
from cuda.bindings import _nvml as nvml
66

77

8-
def test_nvlink_get_link_count(for_all_devices):
8+
def test_nvlink_get_link_count(all_devices):
99
"""
1010
Checks that the link count of the device is same.
1111
"""
12-
device = for_all_devices
12+
for device in all_devices:
13+
fields = nvml.FieldValue(1)
14+
fields[0].field_id = nvml.FI.DEV_NVLINK_LINK_COUNT
15+
value = nvml.device_get_field_values(device, fields)[0]
16+
assert value.nvml_return == nvml.Return.SUCCESS or value.nvml_return == nvml.Return.ERROR_NOT_SUPPORTED, (
17+
f"Unexpected return {value.nvml_return} for link count field query"
18+
)
1319

14-
fields = nvml.FieldValue(1)
15-
fields[0].field_id = nvml.FI.DEV_NVLINK_LINK_COUNT
16-
value = nvml.device_get_field_values(device, fields)[0]
17-
assert value.nvml_return == nvml.Return.SUCCESS or value.nvml_return == nvml.Return.ERROR_NOT_SUPPORTED, (
18-
f"Unexpected return {value.nvml_return} for link count field query"
19-
)
20+
# Use the alternative argument to device_get_field_values
21+
value = nvml.device_get_field_values(device, [nvml.FI.DEV_NVLINK_LINK_COUNT])[0]
22+
assert value.nvml_return == nvml.Return.SUCCESS or value.nvml_return == nvml.Return.ERROR_NOT_SUPPORTED, (
23+
f"Unexpected return {value.nvml_return} for link count field query"
24+
)
2025

21-
# Use the alternative argument to device_get_field_values
22-
value = nvml.device_get_field_values(device, [nvml.FI.DEV_NVLINK_LINK_COUNT])[0]
23-
assert value.nvml_return == nvml.Return.SUCCESS or value.nvml_return == nvml.Return.ERROR_NOT_SUPPORTED, (
24-
f"Unexpected return {value.nvml_return} for link count field query"
25-
)
26-
27-
# The feature_nvlink_supported detection is not robust, so we
28-
# can't be more specific about how many links we should find.
29-
if value.nvml_return == nvml.Return.SUCCESS:
30-
assert value.value.ui_val <= nvml.NVLINK_MAX_LINKS, f"Unexpected link count {value.value.ui_val}"
26+
# The feature_nvlink_supported detection is not robust, so we
27+
# can't be more specific about how many links we should find.
28+
if value.nvml_return == nvml.Return.SUCCESS:
29+
assert value.value.ui_val <= nvml.NVLINK_MAX_LINKS, f"Unexpected link count {value.value.ui_val}"

cuda_bindings/tests/nvml/test_page_retirement.py

Lines changed: 36 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -20,42 +20,55 @@ def supports_page_retirement(device):
2020
return False
2121

2222

23-
def test_page_retirement_notsupported(for_all_devices):
23+
def test_page_retirement_notsupported(all_devices):
2424
"""
2525
Verifies that on platforms that don't supports page retirement, APIs will return Not Supported
2626
"""
27-
device = for_all_devices
27+
skip_reasons = set()
2828

29-
if supports_page_retirement(device):
30-
pytest.skip("page_retirement not supported")
29+
for device in all_devices:
30+
if supports_page_retirement(device):
31+
skip_reasons.add(f"page_retirement is supported for {device}")
32+
continue
3133

32-
if not util.supports_ecc(device):
33-
pytest.skip("device doesn't support ECC")
34+
if not util.supports_ecc(device):
35+
skip_reasons.add(f"device doesn't support ECC for {device}")
36+
continue
3437

35-
with pytest.raises(nvml.NotSupportedError):
36-
for source in PAGE_RETIREMENT_PUBLIC_CAUSE_TYPES:
37-
nvml.device_get_retired_pages(device, source)
38+
with pytest.raises(nvml.NotSupportedError):
39+
for source in PAGE_RETIREMENT_PUBLIC_CAUSE_TYPES:
40+
nvml.device_get_retired_pages(device, source)
3841

39-
with pytest.raises(nvml.NotSupportedError):
40-
nvml.device_get_retired_pages_pending_status(device)
42+
with pytest.raises(nvml.NotSupportedError):
43+
nvml.device_get_retired_pages_pending_status(device)
4144

45+
if skip_reasons:
46+
pytest.skip(" ; ".join(skip_reasons))
4247

43-
def test_page_retirement_supported(for_all_devices):
48+
49+
def test_page_retirement_supported(all_devices):
4450
"""
4551
Verifies that on platforms that support page_retirement, APIs will return success
4652
"""
47-
device = for_all_devices
53+
skip_reasons = set()
4854

49-
if not supports_page_retirement(device):
50-
pytest.skip("page_retirement not supported")
55+
for device in all_devices:
56+
if not supports_page_retirement(device):
57+
skip_reasons.add(f"page_retirement not supported for {device}")
58+
continue
5159

52-
if not util.supports_ecc(device):
53-
pytest.skip("device doesn't support ECC")
60+
if not util.supports_ecc(device):
61+
skip_reasons.add(f"device doesn't support ECC for {device}")
62+
continue
5463

55-
try:
56-
for source in PAGE_RETIREMENT_PUBLIC_CAUSE_TYPES:
57-
nvml.device_get_retired_pages(device, source)
58-
except nvml.NotSupportedError:
59-
pytest.skip("Exception case: Page retirment is not supported in this GPU")
64+
try:
65+
for source in PAGE_RETIREMENT_PUBLIC_CAUSE_TYPES:
66+
nvml.device_get_retired_pages(device, source)
67+
except nvml.NotSupportedError:
68+
skip_reasons.add(f"Exception case: Page retirement is not supported in this GPU {device}")
69+
continue
70+
71+
nvml.device_get_retired_pages_pending_status(device)
6072

61-
nvml.device_get_retired_pages_pending_status(device)
73+
if skip_reasons:
74+
pytest.skip(" ; ".join(skip_reasons))

0 commit comments

Comments
 (0)