Skip to content

Commit 6dbac3f

Browse files
committed
add nvptx_target_feature
Add target features for sm_* and ptx*, both of which form a partial order, but cannot be combined to a single partial order. These mirror the LLVM target features, but we do not provide LLVM target processors (which imply both an sm_* and ptx* feature). Add some documentation for the nvptx target.
1 parent d4e1159 commit 6dbac3f

File tree

9 files changed

+178
-2
lines changed

9 files changed

+178
-2
lines changed

compiler/rustc_codegen_llvm/src/llvm_util.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,15 @@ pub(crate) fn to_llvm_features<'a>(sess: &Session, s: &'a str) -> Option<LLVMFea
262262
// Filter out features that are not supported by the current LLVM version
263263
("aarch64", "fpmr") => None, // only existed in 18
264264
("arm", "fp16") => Some(LLVMFeature::new("fullfp16")),
265+
// NVPTX targets added in LLVM 20
266+
("nvptx64", "sm_100") if get_version().0 < 20 => None,
267+
("nvptx64", "sm_100a") if get_version().0 < 20 => None,
268+
("nvptx64", "sm_101") if get_version().0 < 20 => None,
269+
("nvptx64", "sm_101a") if get_version().0 < 20 => None,
270+
("nvptx64", "sm_120") if get_version().0 < 20 => None,
271+
("nvptx64", "sm_120a") if get_version().0 < 20 => None,
272+
("nvptx64", "ptx86") if get_version().0 < 20 => None,
273+
("nvptx64", "ptx87") if get_version().0 < 20 => None,
265274
// Filter out features that are not supported by the current LLVM version
266275
("loongarch64", "div32" | "lam-bh" | "lamcas" | "ld-seq-sa" | "scq")
267276
if get_version().0 < 20 =>

compiler/rustc_feature/src/unstable.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,7 @@ declare_features! (
329329
(unstable, m68k_target_feature, "1.85.0", Some(134328)),
330330
(unstable, mips_target_feature, "1.27.0", Some(44839)),
331331
(unstable, movrs_target_feature, "1.88.0", Some(137976)),
332+
(unstable, nvptx_target_feature, "CURRENT_RUSTC_VERSION", Some(44839)),
332333
(unstable, powerpc_target_feature, "1.27.0", Some(44839)),
333334
(unstable, prfchw_target_feature, "1.78.0", Some(44839)),
334335
(unstable, riscv_target_feature, "1.45.0", Some(44839)),

compiler/rustc_span/src/symbol.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1509,6 +1509,7 @@ symbols! {
15091509
not,
15101510
notable_trait,
15111511
note,
1512+
nvptx_target_feature,
15121513
object_safe_for_dispatch,
15131514
of,
15141515
off,

compiler/rustc_target/src/target_features.rs

Lines changed: 68 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -517,6 +517,71 @@ const MIPS_FEATURES: &[(&str, Stability, ImpliedFeatures)] = &[
517517
// tidy-alphabetical-end
518518
];
519519

520+
const NVPTX_FEATURES: &[(&str, Stability, ImpliedFeatures)] = &[
521+
// tidy-alphabetical-start
522+
("sm_20", Unstable(sym::nvptx_target_feature), &[]),
523+
("sm_21", Unstable(sym::nvptx_target_feature), &["sm_20"]),
524+
("sm_30", Unstable(sym::nvptx_target_feature), &["sm_21"]),
525+
("sm_32", Unstable(sym::nvptx_target_feature), &["sm_30"]),
526+
("sm_35", Unstable(sym::nvptx_target_feature), &["sm_32"]),
527+
("sm_37", Unstable(sym::nvptx_target_feature), &["sm_35"]),
528+
("sm_50", Unstable(sym::nvptx_target_feature), &["sm_37"]),
529+
("sm_52", Unstable(sym::nvptx_target_feature), &["sm_50"]),
530+
("sm_53", Unstable(sym::nvptx_target_feature), &["sm_52"]),
531+
("sm_60", Unstable(sym::nvptx_target_feature), &["sm_53"]),
532+
("sm_61", Unstable(sym::nvptx_target_feature), &["sm_60"]),
533+
("sm_62", Unstable(sym::nvptx_target_feature), &["sm_61"]),
534+
("sm_70", Unstable(sym::nvptx_target_feature), &["sm_62"]),
535+
("sm_72", Unstable(sym::nvptx_target_feature), &["sm_70"]),
536+
("sm_75", Unstable(sym::nvptx_target_feature), &["sm_72"]),
537+
("sm_80", Unstable(sym::nvptx_target_feature), &["sm_75"]),
538+
("sm_86", Unstable(sym::nvptx_target_feature), &["sm_80"]),
539+
("sm_87", Unstable(sym::nvptx_target_feature), &["sm_86"]),
540+
("sm_89", Unstable(sym::nvptx_target_feature), &["sm_87"]),
541+
("sm_90", Unstable(sym::nvptx_target_feature), &["sm_89"]),
542+
("sm_90a", Unstable(sym::nvptx_target_feature), &["sm_90"]),
543+
// tidy-alphabetical-end
544+
// tidy-alphabetical-start
545+
("sm_100", Unstable(sym::nvptx_target_feature), &["sm_90"]),
546+
("sm_100a", Unstable(sym::nvptx_target_feature), &["sm_100"]),
547+
("sm_101", Unstable(sym::nvptx_target_feature), &["sm_100"]),
548+
("sm_101a", Unstable(sym::nvptx_target_feature), &["sm_101"]),
549+
("sm_120", Unstable(sym::nvptx_target_feature), &["sm_101"]),
550+
("sm_120a", Unstable(sym::nvptx_target_feature), &["sm_120"]),
551+
// tidy-alphabetical-end
552+
// tidy-alphabetical-start
553+
("ptx32", Unstable(sym::nvptx_target_feature), &[]),
554+
("ptx40", Unstable(sym::nvptx_target_feature), &["ptx32"]),
555+
("ptx41", Unstable(sym::nvptx_target_feature), &["ptx40"]),
556+
("ptx42", Unstable(sym::nvptx_target_feature), &["ptx41"]),
557+
("ptx43", Unstable(sym::nvptx_target_feature), &["ptx42"]),
558+
("ptx50", Unstable(sym::nvptx_target_feature), &["ptx43"]),
559+
("ptx60", Unstable(sym::nvptx_target_feature), &["ptx50"]),
560+
("ptx61", Unstable(sym::nvptx_target_feature), &["ptx60"]),
561+
("ptx62", Unstable(sym::nvptx_target_feature), &["ptx61"]),
562+
("ptx63", Unstable(sym::nvptx_target_feature), &["ptx62"]),
563+
("ptx64", Unstable(sym::nvptx_target_feature), &["ptx63"]),
564+
("ptx65", Unstable(sym::nvptx_target_feature), &["ptx64"]),
565+
("ptx70", Unstable(sym::nvptx_target_feature), &["ptx65"]),
566+
("ptx71", Unstable(sym::nvptx_target_feature), &["ptx70"]),
567+
("ptx72", Unstable(sym::nvptx_target_feature), &["ptx71"]),
568+
("ptx73", Unstable(sym::nvptx_target_feature), &["ptx72"]),
569+
("ptx74", Unstable(sym::nvptx_target_feature), &["ptx73"]),
570+
("ptx75", Unstable(sym::nvptx_target_feature), &["ptx74"]),
571+
("ptx76", Unstable(sym::nvptx_target_feature), &["ptx75"]),
572+
("ptx77", Unstable(sym::nvptx_target_feature), &["ptx76"]),
573+
("ptx78", Unstable(sym::nvptx_target_feature), &["ptx77"]),
574+
("ptx80", Unstable(sym::nvptx_target_feature), &["ptx78"]),
575+
("ptx81", Unstable(sym::nvptx_target_feature), &["ptx80"]),
576+
("ptx82", Unstable(sym::nvptx_target_feature), &["ptx81"]),
577+
("ptx83", Unstable(sym::nvptx_target_feature), &["ptx82"]),
578+
("ptx84", Unstable(sym::nvptx_target_feature), &["ptx83"]),
579+
("ptx85", Unstable(sym::nvptx_target_feature), &["ptx84"]),
580+
("ptx86", Unstable(sym::nvptx_target_feature), &["ptx85"]),
581+
("ptx87", Unstable(sym::nvptx_target_feature), &["ptx86"]),
582+
// tidy-alphabetical-end
583+
];
584+
520585
static RISCV_FEATURES: &[(&str, Stability, ImpliedFeatures)] = &[
521586
// tidy-alphabetical-start
522587
("a", Stable, &["zaamo", "zalrsc"]),
@@ -782,6 +847,7 @@ pub fn all_rust_features() -> impl Iterator<Item = (&'static str, Stability)> {
782847
.chain(HEXAGON_FEATURES.iter())
783848
.chain(POWERPC_FEATURES.iter())
784849
.chain(MIPS_FEATURES.iter())
850+
.chain(NVPTX_FEATURES.iter())
785851
.chain(RISCV_FEATURES.iter())
786852
.chain(WASM_FEATURES.iter())
787853
.chain(BPF_FEATURES.iter())
@@ -847,6 +913,7 @@ impl Target {
847913
"x86" | "x86_64" => X86_FEATURES,
848914
"hexagon" => HEXAGON_FEATURES,
849915
"mips" | "mips32r6" | "mips64" | "mips64r6" => MIPS_FEATURES,
916+
"nvptx64" => NVPTX_FEATURES,
850917
"powerpc" | "powerpc64" => POWERPC_FEATURES,
851918
"riscv32" | "riscv64" => RISCV_FEATURES,
852919
"wasm32" | "wasm64" => WASM_FEATURES,
@@ -873,7 +940,7 @@ impl Target {
873940
"sparc" | "sparc64" => SPARC_FEATURES_FOR_CORRECT_VECTOR_ABI,
874941
"hexagon" => HEXAGON_FEATURES_FOR_CORRECT_VECTOR_ABI,
875942
"mips" | "mips32r6" | "mips64" | "mips64r6" => MIPS_FEATURES_FOR_CORRECT_VECTOR_ABI,
876-
"bpf" | "m68k" => &[], // no vector ABI
943+
"nvptx64" | "bpf" | "m68k" => &[], // no vector ABI
877944
"csky" => CSKY_FEATURES_FOR_CORRECT_VECTOR_ABI,
878945
// FIXME: for some tier3 targets, we are overly cautious and always give warnings
879946
// when passing args in vector registers.

library/core/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,7 @@
192192
#![feature(hexagon_target_feature)]
193193
#![feature(loongarch_target_feature)]
194194
#![feature(mips_target_feature)]
195+
#![feature(nvptx_target_feature)]
195196
#![feature(powerpc_target_feature)]
196197
#![feature(riscv_target_feature)]
197198
#![feature(rtm_target_feature)]

src/doc/rustc/src/platform-support/nvptx64-nvidia-cuda.md

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,46 @@ platform.
1010
[@RDambrosio016](https://github.com/RDambrosio016)
1111
[@kjetilkjeka](https://github.com/kjetilkjeka)
1212

13+
## Requirements
14+
15+
This target is `no_std` and will typically be built with crate-type `cdylib` and `-C linker-flavor=llbc`, which generates PTX.
16+
The necessary components for this workflow are:
17+
18+
- `rustup toolchain add nightly`
19+
- `rustup component add llvm-tools --toolchain nightly`
20+
- `rustup component add llvm-bitcode-linker --toolchain nightly`
21+
22+
There are two options for using the core library:
23+
24+
- `rustup component add rust-src --toolchain nightly` and build using `-Z build-std=core`.
25+
- `rustup target add nvptx64-nvidia-cuda --toolchain nightly`
26+
27+
### Target and features
28+
29+
It is generally necessary to specify the target, such as `-C target-cpu=sm_89`, because the default is very old. This implies two target features: `sm_89` and `ptx78` (and all preceding features within `sm_*` and `ptx*`). Rust will default to using the oldest PTX version that supports the target processor (see [this table](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#release-notes-ptx-release-history)), which maximizes driver compatibility.
30+
One can use `-C target-feature=+ptx80` to choose a later PTX version without changing the target (the default in this case, `ptx78`, requires CUDA driver version 11.8, while `ptx80` would require driver version 12.0).
31+
Later PTX versions may allow more efficient code generation.
32+
33+
Although Rust follows LLVM in representing `ptx*` and `sm_*` as target features, they should be thought of as having crate granularity, set via (either via `-Ctarget-cpu` and optionally `-Ctarget-feature`).
34+
While the compiler accepts `#[target_feature(enable = "ptx80", enable = "sm_89")]`, it is not supported, may not behave as intended, and may become erroneous in the future.
35+
36+
## Building Rust kernels
37+
38+
A `no_std` crate containing one or more functions with `extern "ptx-kernel"` can be compiled to PTX using a command like the following.
39+
40+
```console
41+
$ RUSTFLAGS='-Ctarget-cpu=sm_89' cargo +nightly rustc --target=nvptx64-nvidia-cuda -Zbuild-std=core --crate-type=cdylib -- -Clinker-flavor=llbc -Zunstable-options
42+
```
43+
44+
Intrinsics in `core::arch::nvptx` may use `#[cfg(target_feature = "...")]`, thus it's necessary to use `-Zbuild-std=core` with appropriate `RUSTFLAGS`. The following components are needed for this workflow:
45+
46+
```console
47+
$ rustup component add rust-src --toolchain nightly
48+
$ rustup component add llvm-tools --toolchain nightly
49+
$ rustup component add llvm-bitcode-linker --toolchain nightly
50+
```
51+
52+
1353
<!-- FIXME: fill this out
1454
1555
## Requirements

tests/ui/check-cfg/target_feature.stderr

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,35 @@ LL | cfg!(target_feature = "_UNEXPECTED_VALUE");
198198
`power9-altivec`
199199
`power9-vector`
200200
`prfchw`
201+
`ptx32`
202+
`ptx40`
203+
`ptx41`
204+
`ptx42`
205+
`ptx43`
206+
`ptx50`
207+
`ptx60`
208+
`ptx61`
209+
`ptx62`
210+
`ptx63`
211+
`ptx64`
212+
`ptx65`
213+
`ptx70`
214+
`ptx71`
215+
`ptx72`
216+
`ptx73`
217+
`ptx74`
218+
`ptx75`
219+
`ptx76`
220+
`ptx77`
221+
`ptx78`
222+
`ptx80`
223+
`ptx81`
224+
`ptx82`
225+
`ptx83`
226+
`ptx84`
227+
`ptx85`
228+
`ptx86`
229+
`ptx87`
201230
`quadword-atomics`
202231
`rand`
203232
`ras`
@@ -222,6 +251,33 @@ LL | cfg!(target_feature = "_UNEXPECTED_VALUE");
222251
`simd128`
223252
`sm3`
224253
`sm4`
254+
`sm_100`
255+
`sm_100a`
256+
`sm_101`
257+
`sm_101a`
258+
`sm_120`
259+
`sm_120a`
260+
`sm_20`
261+
`sm_21`
262+
`sm_30`
263+
`sm_32`
264+
`sm_35`
265+
`sm_37`
266+
`sm_50`
267+
`sm_52`
268+
`sm_53`
269+
`sm_60`
270+
`sm_61`
271+
`sm_62`
272+
`sm_70`
273+
`sm_72`
274+
`sm_75`
275+
`sm_80`
276+
`sm_86`
277+
`sm_87`
278+
`sm_89`
279+
`sm_90`
280+
`sm_90a`
225281
`sme`
226282
`sme-b16b16`
227283
`sme-f16f16`

tests/ui/target-feature/gate.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
// gate-test-arm_target_feature
77
// gate-test-hexagon_target_feature
88
// gate-test-mips_target_feature
9+
// gate-test-nvptx_target_feature
910
// gate-test-wasm_target_feature
1011
// gate-test-adx_target_feature
1112
// gate-test-cmpxchg16b_target_feature

tests/ui/target-feature/gate.stderr

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
error[E0658]: the target feature `x87` is currently unstable
2-
--> $DIR/gate.rs:29:18
2+
--> $DIR/gate.rs:30:18
33
|
44
LL | #[target_feature(enable = "x87")]
55
| ^^^^^^^^^^^^^^

0 commit comments

Comments
 (0)