Skip to content

Commit 462571b

Browse files
committed
add nvptx_target_feature
Add target features for sm_* and ptx*, both of which form a partial order, but cannot be combined to a single partial order. These mirror the LLVM target features, but we do not provide LLVM target processors (which imply both an sm_* and ptx* feature). Add some documentation for the nvptx target.
1 parent e5a2a6a commit 462571b

File tree

9 files changed

+172
-1
lines changed

9 files changed

+172
-1
lines changed

compiler/rustc_codegen_llvm/src/llvm_util.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,15 @@ pub(crate) fn to_llvm_features<'a>(sess: &Session, s: &'a str) -> Option<LLVMFea
274274
// Filter out features that are not supported by the current LLVM version
275275
("aarch64", "fpmr") => None, // only existed in 18
276276
("arm", "fp16") => Some(LLVMFeature::new("fullfp16")),
277+
// NVPTX targets added in LLVM 20
278+
("nvptx64", "sm_100") if get_version().0 < 20 => None,
279+
("nvptx64", "sm_100a") if get_version().0 < 20 => None,
280+
("nvptx64", "sm_101") if get_version().0 < 20 => None,
281+
("nvptx64", "sm_101a") if get_version().0 < 20 => None,
282+
("nvptx64", "sm_120") if get_version().0 < 20 => None,
283+
("nvptx64", "sm_120a") if get_version().0 < 20 => None,
284+
("nvptx64", "ptx86") if get_version().0 < 20 => None,
285+
("nvptx64", "ptx87") if get_version().0 < 20 => None,
277286
// Filter out features that are not supported by the current LLVM version
278287
("loongarch64", "div32" | "lam-bh" | "lamcas" | "ld-seq-sa" | "scq")
279288
if get_version().0 < 20 =>

compiler/rustc_feature/src/unstable.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -327,6 +327,7 @@ declare_features! (
327327
(unstable, m68k_target_feature, "1.85.0", Some(134328)),
328328
(unstable, mips_target_feature, "1.27.0", Some(44839)),
329329
(unstable, movrs_target_feature, "1.88.0", Some(137976)),
330+
(unstable, nvptx_target_feature, "CURRENT_RUSTC_VERSION", Some(44839)),
330331
(unstable, powerpc_target_feature, "1.27.0", Some(44839)),
331332
(unstable, prfchw_target_feature, "1.78.0", Some(44839)),
332333
(unstable, riscv_target_feature, "1.45.0", Some(44839)),

compiler/rustc_span/src/symbol.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1482,6 +1482,7 @@ symbols! {
14821482
not,
14831483
notable_trait,
14841484
note,
1485+
nvptx_target_feature,
14851486
object_safe_for_dispatch,
14861487
of,
14871488
off,

compiler/rustc_target/src/target_features.rs

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -512,6 +512,71 @@ const MIPS_FEATURES: &[(&str, Stability, ImpliedFeatures)] = &[
512512
// tidy-alphabetical-end
513513
];
514514

515+
const NVPTX_FEATURES: &[(&str, Stability, ImpliedFeatures)] = &[
516+
// tidy-alphabetical-start
517+
("sm_20", Unstable(sym::nvptx_target_feature), &[]),
518+
("sm_21", Unstable(sym::nvptx_target_feature), &["sm_20"]),
519+
("sm_30", Unstable(sym::nvptx_target_feature), &["sm_21"]),
520+
("sm_32", Unstable(sym::nvptx_target_feature), &["sm_30"]),
521+
("sm_35", Unstable(sym::nvptx_target_feature), &["sm_32"]),
522+
("sm_37", Unstable(sym::nvptx_target_feature), &["sm_35"]),
523+
("sm_50", Unstable(sym::nvptx_target_feature), &["sm_37"]),
524+
("sm_52", Unstable(sym::nvptx_target_feature), &["sm_50"]),
525+
("sm_53", Unstable(sym::nvptx_target_feature), &["sm_52"]),
526+
("sm_60", Unstable(sym::nvptx_target_feature), &["sm_53"]),
527+
("sm_61", Unstable(sym::nvptx_target_feature), &["sm_60"]),
528+
("sm_62", Unstable(sym::nvptx_target_feature), &["sm_61"]),
529+
("sm_70", Unstable(sym::nvptx_target_feature), &["sm_62"]),
530+
("sm_72", Unstable(sym::nvptx_target_feature), &["sm_70"]),
531+
("sm_75", Unstable(sym::nvptx_target_feature), &["sm_72"]),
532+
("sm_80", Unstable(sym::nvptx_target_feature), &["sm_75"]),
533+
("sm_86", Unstable(sym::nvptx_target_feature), &["sm_80"]),
534+
("sm_87", Unstable(sym::nvptx_target_feature), &["sm_86"]),
535+
("sm_89", Unstable(sym::nvptx_target_feature), &["sm_87"]),
536+
("sm_90", Unstable(sym::nvptx_target_feature), &["sm_89"]),
537+
("sm_90a", Unstable(sym::nvptx_target_feature), &["sm_90"]),
538+
// tidy-alphabetical-end
539+
// tidy-alphabetical-start
540+
("sm_100", Unstable(sym::nvptx_target_feature), &["sm_90"]),
541+
("sm_100a", Unstable(sym::nvptx_target_feature), &["sm_100"]),
542+
("sm_101", Unstable(sym::nvptx_target_feature), &["sm_100"]),
543+
("sm_101a", Unstable(sym::nvptx_target_feature), &["sm_101"]),
544+
("sm_120", Unstable(sym::nvptx_target_feature), &["sm_101"]),
545+
("sm_120a", Unstable(sym::nvptx_target_feature), &["sm_120"]),
546+
// tidy-alphabetical-end
547+
// tidy-alphabetical-start
548+
("ptx32", Unstable(sym::nvptx_target_feature), &[]),
549+
("ptx40", Unstable(sym::nvptx_target_feature), &["ptx32"]),
550+
("ptx41", Unstable(sym::nvptx_target_feature), &["ptx40"]),
551+
("ptx42", Unstable(sym::nvptx_target_feature), &["ptx41"]),
552+
("ptx43", Unstable(sym::nvptx_target_feature), &["ptx42"]),
553+
("ptx50", Unstable(sym::nvptx_target_feature), &["ptx43"]),
554+
("ptx60", Unstable(sym::nvptx_target_feature), &["ptx50"]),
555+
("ptx61", Unstable(sym::nvptx_target_feature), &["ptx60"]),
556+
("ptx62", Unstable(sym::nvptx_target_feature), &["ptx61"]),
557+
("ptx63", Unstable(sym::nvptx_target_feature), &["ptx62"]),
558+
("ptx64", Unstable(sym::nvptx_target_feature), &["ptx63"]),
559+
("ptx65", Unstable(sym::nvptx_target_feature), &["ptx64"]),
560+
("ptx70", Unstable(sym::nvptx_target_feature), &["ptx65"]),
561+
("ptx71", Unstable(sym::nvptx_target_feature), &["ptx70"]),
562+
("ptx72", Unstable(sym::nvptx_target_feature), &["ptx71"]),
563+
("ptx73", Unstable(sym::nvptx_target_feature), &["ptx72"]),
564+
("ptx74", Unstable(sym::nvptx_target_feature), &["ptx73"]),
565+
("ptx75", Unstable(sym::nvptx_target_feature), &["ptx74"]),
566+
("ptx76", Unstable(sym::nvptx_target_feature), &["ptx75"]),
567+
("ptx77", Unstable(sym::nvptx_target_feature), &["ptx76"]),
568+
("ptx78", Unstable(sym::nvptx_target_feature), &["ptx77"]),
569+
("ptx80", Unstable(sym::nvptx_target_feature), &["ptx78"]),
570+
("ptx81", Unstable(sym::nvptx_target_feature), &["ptx80"]),
571+
("ptx82", Unstable(sym::nvptx_target_feature), &["ptx81"]),
572+
("ptx83", Unstable(sym::nvptx_target_feature), &["ptx82"]),
573+
("ptx84", Unstable(sym::nvptx_target_feature), &["ptx83"]),
574+
("ptx85", Unstable(sym::nvptx_target_feature), &["ptx84"]),
575+
("ptx86", Unstable(sym::nvptx_target_feature), &["ptx85"]),
576+
("ptx87", Unstable(sym::nvptx_target_feature), &["ptx86"]),
577+
// tidy-alphabetical-end
578+
];
579+
515580
static RISCV_FEATURES: &[(&str, Stability, ImpliedFeatures)] = &[
516581
// tidy-alphabetical-start
517582
("a", Stable, &["zaamo", "zalrsc"]),
@@ -770,6 +835,7 @@ pub fn all_rust_features() -> impl Iterator<Item = (&'static str, Stability)> {
770835
.chain(HEXAGON_FEATURES.iter())
771836
.chain(POWERPC_FEATURES.iter())
772837
.chain(MIPS_FEATURES.iter())
838+
.chain(NVPTX_FEATURES.iter())
773839
.chain(RISCV_FEATURES.iter())
774840
.chain(WASM_FEATURES.iter())
775841
.chain(BPF_FEATURES.iter())
@@ -835,6 +901,7 @@ impl Target {
835901
"x86" | "x86_64" => X86_FEATURES,
836902
"hexagon" => HEXAGON_FEATURES,
837903
"mips" | "mips32r6" | "mips64" | "mips64r6" => MIPS_FEATURES,
904+
"nvptx64" => NVPTX_FEATURES,
838905
"powerpc" | "powerpc64" => POWERPC_FEATURES,
839906
"riscv32" | "riscv64" => RISCV_FEATURES,
840907
"wasm32" | "wasm64" => WASM_FEATURES,
@@ -861,6 +928,7 @@ impl Target {
861928
"sparc" | "sparc64" => SPARC_FEATURES_FOR_CORRECT_VECTOR_ABI,
862929
"hexagon" => HEXAGON_FEATURES_FOR_CORRECT_VECTOR_ABI,
863930
"mips" | "mips32r6" | "mips64" | "mips64r6" => MIPS_FEATURES_FOR_CORRECT_VECTOR_ABI,
931+
"nvptx64" => &[], // no vector ABI
864932
"bpf" | "m68k" => &[], // no vector ABI
865933
"csky" => CSKY_FEATURES_FOR_CORRECT_VECTOR_ABI,
866934
// FIXME: for some tier3 targets, we are overly cautious and always give warnings

library/core/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,7 @@
189189
// Target features:
190190
// tidy-alphabetical-start
191191
#![cfg_attr(bootstrap, feature(avx512_target_feature))]
192+
#![cfg_attr(not(bootstrap), feature(nvptx_target_feature))]
192193
#![feature(aarch64_unstable_target_feature)]
193194
#![feature(arm_target_feature)]
194195
#![feature(hexagon_target_feature)]

src/doc/rustc/src/platform-support/nvptx64-nvidia-cuda.md

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,40 @@ platform.
1010
[@RDambrosio016](https://github.com/RDambrosio016)
1111
[@kjetilkjeka](https://github.com/kjetilkjeka)
1212

13+
## Requirements
14+
15+
This target is `no_std` and will typically be built with crate-type `cdylib` and `-C linker-flavor=llbc`, which generates PTX.
16+
The necessary components for this workflow are:
17+
18+
- `rustup toolchain add nightly`
19+
- `rustup component add llvm-tools --toolchain nightly`
20+
- `rustup component add llvm-bitcode-linker --toolchain nightly`
21+
22+
There are two options for using the core library:
23+
24+
- `rustup component add rust-src --toolchain nightly` and build using `-Z build-std=core`.
25+
- `rustup target add nvptx64-nvidia-cuda --toolchain nightly`
26+
27+
### Target and features
28+
29+
It is necessary to specify the target, such as `-C target-cpu=sm_89`. This implies two target features: `sm_89` and `ptx78` (and all preceding features within `sm_*` and `ptx*`). Rust will default to using the oldest PTX version that supports the target processor (see [this table](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#release-notes-ptx-release-history)), which maximizes driver compatibility.
30+
One can use `-C target-feature=+ptx80` to choose a later PTX version without changing the target (the default `ptx78` requires CUDA driver version 11.8, while `ptx80` would require driver version 12.0).
31+
32+
Although `ptx*` is represented as a target feature, it is a compile-time property and it is not possible to build a crate that uses instructions not present in the PTX version specified at compile-time (either via `target-cpu` or `target-feature`).
33+
For example, consider an unaligned barrier `barrier.sync`, which requires both `sm_70` and `ptx60`.
34+
If one wants to support building for older devices (e.g., `-C target-cpu=sm_62`; ensuring that this unaligned barrier is unreachable at run-time on such devices), the relevant function could use attributes:
35+
```
36+
#[cfg(target_feature = "ptx60")]
37+
#[target_feature(enable = "sm_70")]
38+
```
39+
40+
## Building Rust kernels
41+
42+
A `no_std` crate containing one or more functions with `extern "ptx-kernel"` can be compiled to PTX using a command like the following.
43+
44+
```console
45+
$ cargo +nightly rustc --target=nvptx64-nvidia-cuda -Zbuild-std=core --crate-type=cdylib -- -Clinker-flavor=llbc -Ctarget-cpu=sm_89 -Zunstable-options
46+
```
1347
<!-- FIXME: fill this out
1448
1549
## Requirements

tests/ui/check-cfg/target_feature.stderr

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,35 @@ LL | cfg!(target_feature = "_UNEXPECTED_VALUE");
188188
`power9-altivec`
189189
`power9-vector`
190190
`prfchw`
191+
`ptx32`
192+
`ptx40`
193+
`ptx41`
194+
`ptx42`
195+
`ptx43`
196+
`ptx50`
197+
`ptx60`
198+
`ptx61`
199+
`ptx62`
200+
`ptx63`
201+
`ptx64`
202+
`ptx65`
203+
`ptx70`
204+
`ptx71`
205+
`ptx72`
206+
`ptx73`
207+
`ptx74`
208+
`ptx75`
209+
`ptx76`
210+
`ptx77`
211+
`ptx78`
212+
`ptx80`
213+
`ptx81`
214+
`ptx82`
215+
`ptx83`
216+
`ptx84`
217+
`ptx85`
218+
`ptx86`
219+
`ptx87`
191220
`quadword-atomics`
192221
`rand`
193222
`ras`
@@ -213,6 +242,33 @@ LL | cfg!(target_feature = "_UNEXPECTED_VALUE");
213242
`simd128`
214243
`sm3`
215244
`sm4`
245+
`sm_100`
246+
`sm_100a`
247+
`sm_101`
248+
`sm_101a`
249+
`sm_120`
250+
`sm_120a`
251+
`sm_20`
252+
`sm_21`
253+
`sm_30`
254+
`sm_32`
255+
`sm_35`
256+
`sm_37`
257+
`sm_50`
258+
`sm_52`
259+
`sm_53`
260+
`sm_60`
261+
`sm_61`
262+
`sm_62`
263+
`sm_70`
264+
`sm_72`
265+
`sm_75`
266+
`sm_80`
267+
`sm_86`
268+
`sm_87`
269+
`sm_89`
270+
`sm_90`
271+
`sm_90a`
216272
`sme`
217273
`sme-b16b16`
218274
`sme-f16f16`

tests/ui/target-feature/gate.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
// gate-test-arm_target_feature
77
// gate-test-hexagon_target_feature
88
// gate-test-mips_target_feature
9+
// gate-test-nvptx_target_feature
910
// gate-test-wasm_target_feature
1011
// gate-test-adx_target_feature
1112
// gate-test-cmpxchg16b_target_feature

tests/ui/target-feature/gate.stderr

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
error[E0658]: the target feature `x87` is currently unstable
2-
--> $DIR/gate.rs:29:18
2+
--> $DIR/gate.rs:30:18
33
|
44
LL | #[target_feature(enable = "x87")]
55
| ^^^^^^^^^^^^^^

0 commit comments

Comments
 (0)