Skip to content

Commit f532972

Browse files
authored
fix: avoid precision issues on vulkan backend (#980)
1 parent d5b05f7 commit f532972

File tree

2 files changed

+12
-4
lines changed

2 files changed

+12
-4
lines changed

common.hpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -242,14 +242,18 @@ class FeedForward : public GGMLBlock {
242242
}
243243

244244
// net_1 is nn.Dropout(), skip for inference
245-
float scale = 1.f;
245+
bool force_prec_f32 = false;
246+
float scale = 1.f;
246247
if (precision_fix) {
247248
scale = 1.f / 128.f;
249+
#ifdef SD_USE_VULKAN
250+
force_prec_f32 = true;
251+
#endif
248252
}
249253
// The purpose of the scale here is to prevent NaN issues in certain situations.
250254
// For example, when using Vulkan without enabling force_prec_f32,
251255
// or when using CUDA but the weights are k-quants.
252-
blocks["net.2"] = std::shared_ptr<GGMLBlock>(new Linear(inner_dim, dim_out, true, false, false, scale));
256+
blocks["net.2"] = std::shared_ptr<GGMLBlock>(new Linear(inner_dim, dim_out, true, false, force_prec_f32, scale));
253257
}
254258

255259
struct ggml_tensor* forward(GGMLRunnerContext* ctx, struct ggml_tensor* x) {

qwen_image.hpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,10 +94,14 @@ namespace Qwen {
9494
blocks["norm_added_q"] = std::shared_ptr<GGMLBlock>(new RMSNorm(dim_head, eps));
9595
blocks["norm_added_k"] = std::shared_ptr<GGMLBlock>(new RMSNorm(dim_head, eps));
9696

97-
float scale = 1.f / 32.f;
97+
float scale = 1.f / 32.f;
98+
bool force_prec_f32 = false;
99+
#ifdef SD_USE_VULKAN
100+
force_prec_f32 = true;
101+
#endif
98102
// The purpose of the scale here is to prevent NaN issues in certain situations.
99103
// For example when using CUDA but the weights are k-quants (not all prompts).
100-
blocks["to_out.0"] = std::shared_ptr<GGMLBlock>(new Linear(inner_dim, out_dim, out_bias, false, false, scale));
104+
blocks["to_out.0"] = std::shared_ptr<GGMLBlock>(new Linear(inner_dim, out_dim, out_bias, false, force_prec_f32, scale));
101105
// to_out.1 is nn.Dropout
102106

103107
blocks["to_add_out"] = std::shared_ptr<GGMLBlock>(new Linear(inner_dim, out_context_dim, out_bias, false, false, scale));

0 commit comments

Comments
 (0)