-
Notifications
You must be signed in to change notification settings - Fork 193
Expand file tree
/
Copy pathCMakeLists.txt
More file actions
122 lines (109 loc) · 3.12 KB
/
Copy pathCMakeLists.txt
File metadata and controls
122 lines (109 loc) · 3.12 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
# target sources
add_executable(
samples
sdpa/fp16_fwd.cpp
sdpa/fp16_bwd.cpp
sdpa/fp16_cached.cpp
sdpa/fp16_benchmark.cpp
sdpa/fp16_fwd_with_flexible_graphs.cpp
sdpa/fp16_bwd_with_flexible_graphs.cpp
sdpa/fp16_fwd_with_custom_dropout.cpp
sdpa/fp16_fwd_with_paged_caches.cpp
sdpa/fp16_fwd_with_cu_seq_len.cpp
sdpa/fp16_dynamic_shapes.cpp
sdpa/fp16_fwd_paged_decode_and_prefill.cpp
sdpa/fp16_fwd_with_cudagraphs.cpp
sdpa/fp16_bwd_with_cudagraphs.cpp
sdpa/fp8_fwd.cpp
sdpa/mxfp8_fwd.cpp
sdpa/mxfp8_bwd.cpp
sdpa/fp8_fwd_current_scaling.cpp
sdpa/fp8_bwd_with_current_scaling.cpp
sdpa/fp8_bwd.cpp
sdpa/fp8_fwd_bottom_right_causal_mask.cpp
sdpa/fp8_bwd_bottom_right_causal_mask.cpp
sdpa/fp16_fwd_with_sink_token.cpp
sdpa/fp16_bwd_with_sink_token.cpp
sdpa/fp16_fwd_with_max_and_sum_exp.cpp
sdpa/fp16_fwd_with_block_mask.cpp
sdpa/prefill_oss_engine.cpp
convolution/fprop.cpp
convolution/fp8_fprop.cpp
convolution/int8_fprop.cpp
convolution/dgrads.cpp
convolution/wgrads.cpp
convolution/conv_dynamic_shape_benchmark.cpp
matmul/matmuls.cpp
matmul/fp8_matmul.cpp
matmul/int8_matmul.cpp
matmul/mixed_matmul.cpp
matmul/blackwell_nvfp4_mxfp8_block_scale_matmul.cpp
matmul/general_block_scale_matmul.cpp
matmul/complex_fp32_matmul.cpp
moe_grouped_matmul/moe_grouped_matmul.cpp
norm/batchnorm.cpp
norm/layernorm.cpp
norm/adaptive_layernorm.cpp
norm/norm_zero_centered_gamma.cpp
norm/rmsnorm.cpp
norm/norm_block_scale.cpp
norm/layernorm_bitmask_relu.cpp
misc/serialization.cpp
misc/autotuning.cpp
misc/custom_plan.cpp
misc/parallel_compilation.cpp
misc/pointwise.cpp
misc/resample.cpp
misc/slice.cpp
misc/sm_carveout.cpp
misc/cudagraphs.cpp
misc/deviceless_aot_compilation.cpp
misc/compile_time_constant_example.cpp
membound/transpose.cpp
membound/reshape.cpp
membound/slice.cpp
membound/concat.cpp
membound/membound_fusion.cpp
membound/boolean_fusion.cpp
causal_conv1d/causal_conv1d.cpp
)
# target flags
if(MSVC)
target_compile_options(
samples PRIVATE
/W4 /WX # warning level 3 and all warnings as errors
/wd4100 # allow unused parameters
/wd4458 # local hides class member (currently a problem for all inline setters)
/wd4505 # unreferenced function with internal linkage has been removed
/wd4101 /wd4189 # unreferenced local
/bigobj # increase number of sections in .Obj file
)
else()
target_compile_options(
samples PRIVATE
-Wall
-Wextra
-Werror
-Wno-unused-function
)
endif()
# target links
target_link_libraries(
samples PRIVATE
Threads::Threads
Catch2::Catch2WithMain
cudnn_frontend
_cudnn_frontend_pch
CUDNN::cudnn
CUDA::cublasLt
CUDA::cudart
CUDA::nvrtc
)
if(TARGET CUDNN::cudnn_ext)
target_link_libraries(samples PRIVATE CUDNN::cudnn_ext)
endif()
# target cmake properties
set_target_properties(
samples PROPERTIES
RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/bin
)