Skip to content

add nvptx_target_feature #138689

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 17 additions & 5 deletions compiler/rustc_codegen_llvm/src/llvm_util.rs
Original file line number Diff line number Diff line change
@@ -274,6 +274,15 @@ pub(crate) fn to_llvm_features<'a>(sess: &Session, s: &'a str) -> Option<LLVMFea
// Filter out features that are not supported by the current LLVM version
("aarch64", "fpmr") => None, // only existed in 18
("arm", "fp16") => Some(LLVMFeature::new("fullfp16")),
// NVPTX targets added in LLVM 20
("nvptx64", "sm_100") if get_version().0 < 20 => None,
("nvptx64", "sm_100a") if get_version().0 < 20 => None,
("nvptx64", "sm_101") if get_version().0 < 20 => None,
("nvptx64", "sm_101a") if get_version().0 < 20 => None,
("nvptx64", "sm_120") if get_version().0 < 20 => None,
("nvptx64", "sm_120a") if get_version().0 < 20 => None,
("nvptx64", "ptx86") if get_version().0 < 20 => None,
("nvptx64", "ptx87") if get_version().0 < 20 => None,
// Filter out features that are not supported by the current LLVM version
("loongarch64", "div32" | "lam-bh" | "lamcas" | "ld-seq-sa" | "scq")
if get_version().0 < 20 =>
@@ -337,11 +346,12 @@ pub(crate) fn target_config(sess: &Session) -> TargetConfig {
let target_machine = create_informational_target_machine(sess, true);
// Compute which of the known target features are enabled in the 'base' target machine. We only
// consider "supported" features; "forbidden" features are not reflected in `cfg` as of now.
let mut cpu_implied_features: Vec<(bool, Symbol)> = Vec::new();
let mut features: FxHashSet<Symbol> = sess
.target
.rust_target_features()
.iter()
.filter(|(feature, _, _)| {
.filter(|(feature, _, implied)| {
// skip checking special features, as LLVM may not understand them
if RUSTC_SPECIAL_FEATURES.contains(feature) {
return true;
@@ -356,6 +366,7 @@ pub(crate) fn target_config(sess: &Session) -> TargetConfig {
return false;
}
}
cpu_implied_features.extend(implied.iter().map(|f| (true, Symbol::intern(f))));
true
} else {
false
@@ -364,14 +375,15 @@ pub(crate) fn target_config(sess: &Session) -> TargetConfig {
.map(|(feature, _, _)| Symbol::intern(feature))
.collect();

// Add enabled and remove disabled features.
for (enabled, feature) in
// Parse -Ctarget-feature=+feature1,-feature2
let cg_target_features =
sess.opts.cg.target_feature.split(',').filter_map(|s| match s.chars().next() {
Some('+') => Some((true, Symbol::intern(&s[1..]))),
Some('-') => Some((false, Symbol::intern(&s[1..]))),
_ => None,
})
{
});
// Add features implied by -Ctarget-cpu followed by enabling/removing those specified by -Ctarget-feature
for (enabled, feature) in cpu_implied_features.into_iter().chain(cg_target_features) {
if enabled {
// Also add all transitively implied features.

1 change: 1 addition & 0 deletions compiler/rustc_feature/src/unstable.rs
Original file line number Diff line number Diff line change
@@ -327,6 +327,7 @@ declare_features! (
(unstable, m68k_target_feature, "1.85.0", Some(134328)),
(unstable, mips_target_feature, "1.27.0", Some(44839)),
(unstable, movrs_target_feature, "1.88.0", Some(137976)),
(unstable, nvptx_target_feature, "CURRENT_RUSTC_VERSION", Some(44839)),
(unstable, powerpc_target_feature, "1.27.0", Some(44839)),
(unstable, prfchw_target_feature, "1.78.0", Some(44839)),
(unstable, riscv_target_feature, "1.45.0", Some(44839)),
1 change: 1 addition & 0 deletions compiler/rustc_span/src/symbol.rs
Original file line number Diff line number Diff line change
@@ -1487,6 +1487,7 @@ symbols! {
not,
notable_trait,
note,
nvptx_target_feature,
object_safe_for_dispatch,
of,
off,
69 changes: 68 additions & 1 deletion compiler/rustc_target/src/target_features.rs
Original file line number Diff line number Diff line change
@@ -512,6 +512,71 @@ const MIPS_FEATURES: &[(&str, Stability, ImpliedFeatures)] = &[
// tidy-alphabetical-end
];

const NVPTX_FEATURES: &[(&str, Stability, ImpliedFeatures)] = &[
// tidy-alphabetical-start
("sm_20", Unstable(sym::nvptx_target_feature), &[]),
("sm_21", Unstable(sym::nvptx_target_feature), &["sm_20"]),
("sm_30", Unstable(sym::nvptx_target_feature), &["sm_21"]),
("sm_32", Unstable(sym::nvptx_target_feature), &["sm_30"]),
("sm_35", Unstable(sym::nvptx_target_feature), &["sm_32"]),
("sm_37", Unstable(sym::nvptx_target_feature), &["sm_35"]),
("sm_50", Unstable(sym::nvptx_target_feature), &["sm_37"]),
("sm_52", Unstable(sym::nvptx_target_feature), &["sm_50"]),
("sm_53", Unstable(sym::nvptx_target_feature), &["sm_52"]),
("sm_60", Unstable(sym::nvptx_target_feature), &["sm_53"]),
("sm_61", Unstable(sym::nvptx_target_feature), &["sm_60"]),
("sm_62", Unstable(sym::nvptx_target_feature), &["sm_61"]),
("sm_70", Unstable(sym::nvptx_target_feature), &["sm_62"]),
("sm_72", Unstable(sym::nvptx_target_feature), &["sm_70"]),
("sm_75", Unstable(sym::nvptx_target_feature), &["sm_72"]),
("sm_80", Unstable(sym::nvptx_target_feature), &["sm_75"]),
("sm_86", Unstable(sym::nvptx_target_feature), &["sm_80"]),
("sm_87", Unstable(sym::nvptx_target_feature), &["sm_86"]),
("sm_89", Unstable(sym::nvptx_target_feature), &["sm_87"]),
("sm_90", Unstable(sym::nvptx_target_feature), &["sm_89"]),
("sm_90a", Unstable(sym::nvptx_target_feature), &["sm_90"]),
// tidy-alphabetical-end
// tidy-alphabetical-start
("sm_100", Unstable(sym::nvptx_target_feature), &["sm_90"]),
("sm_100a", Unstable(sym::nvptx_target_feature), &["sm_100"]),
("sm_101", Unstable(sym::nvptx_target_feature), &["sm_100"]),
("sm_101a", Unstable(sym::nvptx_target_feature), &["sm_101"]),
("sm_120", Unstable(sym::nvptx_target_feature), &["sm_101"]),
("sm_120a", Unstable(sym::nvptx_target_feature), &["sm_120"]),
// tidy-alphabetical-end
// tidy-alphabetical-start
("ptx32", Unstable(sym::nvptx_target_feature), &[]),
("ptx40", Unstable(sym::nvptx_target_feature), &["ptx32"]),
("ptx41", Unstable(sym::nvptx_target_feature), &["ptx40"]),
("ptx42", Unstable(sym::nvptx_target_feature), &["ptx41"]),
("ptx43", Unstable(sym::nvptx_target_feature), &["ptx42"]),
("ptx50", Unstable(sym::nvptx_target_feature), &["ptx43"]),
("ptx60", Unstable(sym::nvptx_target_feature), &["ptx50"]),
("ptx61", Unstable(sym::nvptx_target_feature), &["ptx60"]),
("ptx62", Unstable(sym::nvptx_target_feature), &["ptx61"]),
("ptx63", Unstable(sym::nvptx_target_feature), &["ptx62"]),
("ptx64", Unstable(sym::nvptx_target_feature), &["ptx63"]),
("ptx65", Unstable(sym::nvptx_target_feature), &["ptx64"]),
("ptx70", Unstable(sym::nvptx_target_feature), &["ptx65"]),
("ptx71", Unstable(sym::nvptx_target_feature), &["ptx70"]),
("ptx72", Unstable(sym::nvptx_target_feature), &["ptx71"]),
("ptx73", Unstable(sym::nvptx_target_feature), &["ptx72"]),
("ptx74", Unstable(sym::nvptx_target_feature), &["ptx73"]),
("ptx75", Unstable(sym::nvptx_target_feature), &["ptx74"]),
("ptx76", Unstable(sym::nvptx_target_feature), &["ptx75"]),
("ptx77", Unstable(sym::nvptx_target_feature), &["ptx76"]),
("ptx78", Unstable(sym::nvptx_target_feature), &["ptx77"]),
("ptx80", Unstable(sym::nvptx_target_feature), &["ptx78"]),
("ptx81", Unstable(sym::nvptx_target_feature), &["ptx80"]),
("ptx82", Unstable(sym::nvptx_target_feature), &["ptx81"]),
("ptx83", Unstable(sym::nvptx_target_feature), &["ptx82"]),
("ptx84", Unstable(sym::nvptx_target_feature), &["ptx83"]),
("ptx85", Unstable(sym::nvptx_target_feature), &["ptx84"]),
("ptx86", Unstable(sym::nvptx_target_feature), &["ptx85"]),
("ptx87", Unstable(sym::nvptx_target_feature), &["ptx86"]),
// tidy-alphabetical-end
];

static RISCV_FEATURES: &[(&str, Stability, ImpliedFeatures)] = &[
// tidy-alphabetical-start
("a", Stable, &["zaamo", "zalrsc"]),
@@ -770,6 +835,7 @@ pub fn all_rust_features() -> impl Iterator<Item = (&'static str, Stability)> {
.chain(HEXAGON_FEATURES.iter())
.chain(POWERPC_FEATURES.iter())
.chain(MIPS_FEATURES.iter())
.chain(NVPTX_FEATURES.iter())
.chain(RISCV_FEATURES.iter())
.chain(WASM_FEATURES.iter())
.chain(BPF_FEATURES.iter())
@@ -835,6 +901,7 @@ impl Target {
"x86" | "x86_64" => X86_FEATURES,
"hexagon" => HEXAGON_FEATURES,
"mips" | "mips32r6" | "mips64" | "mips64r6" => MIPS_FEATURES,
"nvptx64" => NVPTX_FEATURES,
"powerpc" | "powerpc64" => POWERPC_FEATURES,
"riscv32" | "riscv64" => RISCV_FEATURES,
"wasm32" | "wasm64" => WASM_FEATURES,
@@ -861,7 +928,7 @@ impl Target {
"sparc" | "sparc64" => SPARC_FEATURES_FOR_CORRECT_VECTOR_ABI,
"hexagon" => HEXAGON_FEATURES_FOR_CORRECT_VECTOR_ABI,
"mips" | "mips32r6" | "mips64" | "mips64r6" => MIPS_FEATURES_FOR_CORRECT_VECTOR_ABI,
"bpf" | "m68k" => &[], // no vector ABI
"nvptx64" | "bpf" | "m68k" => &[], // no vector ABI
"csky" => CSKY_FEATURES_FOR_CORRECT_VECTOR_ABI,
// FIXME: for some tier3 targets, we are overly cautious and always give warnings
// when passing args in vector registers.
1 change: 1 addition & 0 deletions library/core/src/lib.rs
Original file line number Diff line number Diff line change
@@ -193,6 +193,7 @@
#![feature(keylocker_x86)]
#![feature(loongarch_target_feature)]
#![feature(mips_target_feature)]
#![feature(nvptx_target_feature)]
#![feature(powerpc_target_feature)]
#![feature(riscv_target_feature)]
#![feature(rtm_target_feature)]
40 changes: 40 additions & 0 deletions src/doc/rustc/src/platform-support/nvptx64-nvidia-cuda.md
Original file line number Diff line number Diff line change
@@ -10,6 +10,46 @@ platform.
[@RDambrosio016](https://github.com/RDambrosio016)
[@kjetilkjeka](https://github.com/kjetilkjeka)

## Requirements

This target is `no_std` and will typically be built with crate-type `cdylib` and `-C linker-flavor=llbc`, which generates PTX.
The necessary components for this workflow are:

- `rustup toolchain add nightly`
- `rustup component add llvm-tools --toolchain nightly`
- `rustup component add llvm-bitcode-linker --toolchain nightly`

There are two options for using the core library:

- `rustup component add rust-src --toolchain nightly` and build using `-Z build-std=core`.
- `rustup target add nvptx64-nvidia-cuda --toolchain nightly`

### Target and features

It is generally necessary to specify the target, such as `-C target-cpu=sm_89`, because the default is very old. This implies two target features: `sm_89` and `ptx78` (and all preceding features within `sm_*` and `ptx*`). Rust will default to using the oldest PTX version that supports the target processor (see [this table](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#release-notes-ptx-release-history)), which maximizes driver compatibility.
One can use `-C target-feature=+ptx80` to choose a later PTX version without changing the target (the default in this case, `ptx78`, requires CUDA driver version 11.8, while `ptx80` would require driver version 12.0).
Later PTX versions may allow more efficient code generation.

Although Rust follows LLVM in representing `ptx*` and `sm_*` as target features, they should be thought of as having crate granularity, set via (either via `-Ctarget-cpu` and optionally `-Ctarget-feature`).
While the compiler accepts `#[target_feature(enable = "ptx80", enable = "sm_89")]`, it is not supported, may not behave as intended, and may become erroneous in the future.

## Building Rust kernels

A `no_std` crate containing one or more functions with `extern "ptx-kernel"` can be compiled to PTX using a command like the following.

```console
$ RUSTFLAGS='-Ctarget-cpu=sm_89' cargo +nightly rustc --target=nvptx64-nvidia-cuda -Zbuild-std=core --crate-type=cdylib -- -Clinker-flavor=llbc -Zunstable-options
```

Intrinsics in `core::arch::nvptx` may use `#[cfg(target_feature = "...")]`, thus it's necessary to use `-Zbuild-std=core` with appropriate `RUSTFLAGS`. The following components are needed for this workflow:

```console
$ rustup component add rust-src --toolchain nightly
$ rustup component add llvm-tools --toolchain nightly
$ rustup component add llvm-bitcode-linker --toolchain nightly
```


<!-- FIXME: fill this out

## Requirements
56 changes: 56 additions & 0 deletions tests/ui/check-cfg/target_feature.stderr
Original file line number Diff line number Diff line change
@@ -188,6 +188,35 @@ LL | cfg!(target_feature = "_UNEXPECTED_VALUE");
`power9-altivec`
`power9-vector`
`prfchw`
`ptx32`
`ptx40`
`ptx41`
`ptx42`
`ptx43`
`ptx50`
`ptx60`
`ptx61`
`ptx62`
`ptx63`
`ptx64`
`ptx65`
`ptx70`
`ptx71`
`ptx72`
`ptx73`
`ptx74`
`ptx75`
`ptx76`
`ptx77`
`ptx78`
`ptx80`
`ptx81`
`ptx82`
`ptx83`
`ptx84`
`ptx85`
`ptx86`
`ptx87`
`quadword-atomics`
`rand`
`ras`
@@ -213,6 +242,33 @@ LL | cfg!(target_feature = "_UNEXPECTED_VALUE");
`simd128`
`sm3`
`sm4`
`sm_100`
`sm_100a`
`sm_101`
`sm_101a`
`sm_120`
`sm_120a`
`sm_20`
`sm_21`
`sm_30`
`sm_32`
`sm_35`
`sm_37`
`sm_50`
`sm_52`
`sm_53`
`sm_60`
`sm_61`
`sm_62`
`sm_70`
`sm_72`
`sm_75`
`sm_80`
`sm_86`
`sm_87`
`sm_89`
`sm_90`
`sm_90a`
`sme`
`sme-b16b16`
`sme-f16f16`
1 change: 1 addition & 0 deletions tests/ui/target-feature/gate.rs
Original file line number Diff line number Diff line change
@@ -6,6 +6,7 @@
// gate-test-arm_target_feature
// gate-test-hexagon_target_feature
// gate-test-mips_target_feature
// gate-test-nvptx_target_feature
// gate-test-wasm_target_feature
// gate-test-adx_target_feature
// gate-test-cmpxchg16b_target_feature
2 changes: 1 addition & 1 deletion tests/ui/target-feature/gate.stderr
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
error[E0658]: the target feature `x87` is currently unstable
--> $DIR/gate.rs:29:18
--> $DIR/gate.rs:30:18
|
LL | #[target_feature(enable = "x87")]
| ^^^^^^^^^^^^^^
28 changes: 28 additions & 0 deletions tests/ui/target-feature/implied-features-nvptx.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
//@ assembly-output: ptx-linker
//@ compile-flags: --crate-type cdylib -C target-cpu=sm_80 -Z unstable-options -Clinker-flavor=llbc
//@ only-nvptx64
//@ build-pass
#![no_std]
#![allow(dead_code)]

#[panic_handler]
pub fn panic(_info: &core::panic::PanicInfo) -> ! {
loop {}
}

// -Ctarget-cpu=sm_80 directly enables sm_80 and ptx70
#[cfg(not(all(target_feature = "sm_80", target_feature = "ptx70")))]
compile_error!("direct target features not enabled");

// -Ctarget-cpu=sm_80 implies all earlier sm_* and ptx* features.
#[cfg(not(all(
target_feature = "sm_60",
target_feature = "sm_70",
target_feature = "ptx50",
target_feature = "ptx60",
)))]
compile_error!("implied target features not enabled");

// -Ctarget-cpu=sm_80 implies all earlier sm_* and ptx* features.
#[cfg(target_feature = "ptx71")]
compile_error!("sm_80 requires only ptx70, but ptx71 enabled");