Skip to content

Commit 35a485d

Browse files
committed
target-feature: enable rust target features implied by target-cpu
Normally LLVM and rustc agree about what features are implied by target-cpu, but for NVPTX, LLVM considers sm_* and ptx* features to be exclusive, which makes sense for codegen purposes. But in Rust, we want to think of them as: sm_{sver} means that the target supports the hardware features of sver ptx{pver} means the driver supports PTX ISA pver Intrinsics usually require a minimum sm_{sver} and ptx{pver}. Prior to this commit, -Ctarget-cpu=sm_70 would activate only sm_70 and ptx60 (the minimum PTX version that supports sm_70, which maximizes driver compatibility). With this commit, it also activates all the implied target features (sm_20, ..., sm_62; ptx32, ..., ptx50).
1 parent 6dbac3f commit 35a485d

File tree

3 files changed

+44
-8
lines changed

3 files changed

+44
-8
lines changed

compiler/rustc_codegen_llvm/src/llvm_util.rs

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -333,15 +333,12 @@ pub(crate) fn to_llvm_features<'a>(sess: &Session, s: &'a str) -> Option<LLVMFea
333333
///
334334
/// We do not have to worry about RUSTC_SPECIFIC_FEATURES here, those are handled outside codegen.
335335
pub(crate) fn target_config(sess: &Session) -> TargetConfig {
336-
// Add base features for the target.
337-
// We do *not* add the -Ctarget-features there, and instead duplicate the logic for that below.
338-
// The reason is that if LLVM considers a feature implied but we do not, we don't want that to
339-
// show up in `cfg`. That way, `cfg` is entirely under our control -- except for the handling of
340-
// the target CPU, that is still expanded to target features (with all their implied features)
341-
// by LLVM.
342336
let target_machine = create_informational_target_machine(sess, true);
343337

344338
let (unstable_target_features, target_features) = cfg_target_feature(sess, |feature| {
339+
// This closure determines whether the target CPU has the feature according to LLVM. We do
340+
// *not* consider the `-Ctarget-feature`s here, as that will be handled later in
341+
// `cfg_target_feature`.
345342
if let Some(feat) = to_llvm_features(sess, feature) {
346343
// All the LLVM features this expands to must be enabled.
347344
for llvm_feature in feat {

compiler/rustc_codegen_ssa/src/target_features.rs

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,10 @@ fn parse_rust_feature_flag<'a>(
224224
/// 2nd component of the return value, respectively).
225225
///
226226
/// `target_base_has_feature` should check whether the given feature (a Rust feature name!) is
227-
/// enabled in the "base" target machine, i.e., without applying `-Ctarget-feature`.
227+
/// enabled in the "base" target machine, i.e., without applying `-Ctarget-feature`. Note that LLVM
228+
/// may consider features to be implied that we do not and vice-versa. We want `cfg` to be entirely
229+
/// consistent with Rust feature implications, and thus only consult LLVM to expand the target CPU
230+
/// to target features.
228231
///
229232
/// We do not have to worry about RUSTC_SPECIFIC_FEATURES here, those are handled elsewhere.
230233
pub fn cfg_target_feature(
@@ -238,7 +241,15 @@ pub fn cfg_target_feature(
238241
.rust_target_features()
239242
.iter()
240243
.filter(|(feature, _, _)| target_base_has_feature(feature))
241-
.map(|(feature, _, _)| Symbol::intern(feature))
244+
.flat_map(|(base_feature, _, _)| {
245+
// Expand the direct base feature into all transitively-implied features. Note that we
246+
// cannot simply use the `implied` field of the tuple since that only contains
247+
// directly-implied features.
248+
//
249+
// Iteration order is irrelevant because we're collecting into an `UnordSet`.
250+
#[allow(rustc::potential_query_instability)]
251+
sess.target.implied_target_features(base_feature).into_iter().map(|f| Symbol::intern(f))
252+
})
242253
.collect();
243254

244255
// Add enabled and remove disabled features.
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
//@ assembly-output: ptx-linker
2+
//@ compile-flags: --crate-type cdylib -C target-cpu=sm_80 -Z unstable-options -Clinker-flavor=llbc
3+
//@ only-nvptx64
4+
//@ build-pass
5+
#![no_std]
6+
#![allow(dead_code)]
7+
8+
#[panic_handler]
9+
pub fn panic(_info: &core::panic::PanicInfo) -> ! {
10+
loop {}
11+
}
12+
13+
// -Ctarget-cpu=sm_80 directly enables sm_80 and ptx70
14+
#[cfg(not(all(target_feature = "sm_80", target_feature = "ptx70")))]
15+
compile_error!("direct target features not enabled");
16+
17+
// -Ctarget-cpu=sm_80 implies all earlier sm_* and ptx* features.
18+
#[cfg(not(all(
19+
target_feature = "sm_60",
20+
target_feature = "sm_70",
21+
target_feature = "ptx50",
22+
target_feature = "ptx60",
23+
)))]
24+
compile_error!("implied target features not enabled");
25+
26+
// -Ctarget-cpu=sm_80 implies all earlier sm_* and ptx* features.
27+
#[cfg(target_feature = "ptx71")]
28+
compile_error!("sm_80 requires only ptx70, but ptx71 enabled");

0 commit comments

Comments
 (0)