From e018f40e6e327bd4d4a0a1ebd98f0ae7b57e1f1c Mon Sep 17 00:00:00 2001 From: Stephen Jia Date: Mon, 8 Dec 2025 18:24:12 -0800 Subject: [PATCH] QConv: Use buffer storage for weights Summary: TSIA! Differential Revision: D88701731 --- .../vulkan/runtime/graph/ops/glsl/conv2d_q8_utils.glslh | 2 +- .../runtime/graph/ops/glsl/conv2d_q8ta_q8csw_q8to.yaml | 2 ++ .../vulkan/runtime/graph/ops/impl/QuantizedConvolution.cpp | 6 +----- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/backends/vulkan/runtime/graph/ops/glsl/conv2d_q8_utils.glslh b/backends/vulkan/runtime/graph/ops/glsl/conv2d_q8_utils.glslh index 279f4f17f13..0db9f5f84a3 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/conv2d_q8_utils.glslh +++ b/backends/vulkan/runtime/graph/ops/glsl/conv2d_q8_utils.glslh @@ -90,7 +90,7 @@ ivec4 load_weight_block( const int Kw, const int Kh, const int OC4) { -#ifdef PACKED_INT8_WEIGHTS_BUFFER +#ifdef WEIGHT_BUFFER const int block_x = oc4 * Kw + kx; const int block_y = ky * IC4 + ic4; return t_packed_int8_weight[block_y * (Kw * OC4) + block_x]; diff --git a/backends/vulkan/runtime/graph/ops/glsl/conv2d_q8ta_q8csw_q8to.yaml b/backends/vulkan/runtime/graph/ops/glsl/conv2d_q8ta_q8csw_q8to.yaml index 7d33434940c..1bb31c8127b 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/conv2d_q8ta_q8csw_q8to.yaml +++ b/backends/vulkan/runtime/graph/ops/glsl/conv2d_q8ta_q8csw_q8to.yaml @@ -13,7 +13,9 @@ conv2d_q8ta_q8csw_q8to: combination: parameter_names: [IO_STORAGE, WEIGHT_STORAGE] combos: + - parameter_values: [buffer, buffer] - parameter_values: [buffer, texture2d] + - parameter_values: [texture3d, buffer] - parameter_values: [texture3d, texture2d] DTYPE: - VALUE: float diff --git a/backends/vulkan/runtime/graph/ops/impl/QuantizedConvolution.cpp b/backends/vulkan/runtime/graph/ops/impl/QuantizedConvolution.cpp index d7d5ad6db1e..844993c1f74 100644 --- a/backends/vulkan/runtime/graph/ops/impl/QuantizedConvolution.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/QuantizedConvolution.cpp @@ -442,11 +442,7 @@ ValueRef prepack_quantized_conv2d_weight( std::vector packed_weight_sizes{output_height, output_width}; - utils::StorageType storage_type = utils::kTexture2D; - uint32_t max_extent = graph.context()->adapter_ptr()->max_texture2d_dim(); - if (output_width > max_extent * 4 || output_height > max_extent) { - storage_type = utils::kBuffer; - } + utils::StorageType storage_type = utils::kBuffer; ValueRef packed_weight = graph.add_tensor( packed_weight_sizes,