Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions backends/vulkan/op_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -636,7 +636,7 @@ def register_quantized_binary_op():
def register_quantize_for_conv2d_op():
return OpFeatures(
inputs_storage=[
utils.CHANNELS_PACKED_TEXTURE,
utils.CHANNELS_PACKED_TEXTURE_OR_CONTIGUOUS_BUFFER,
],
outputs_storage=[
utils.PACKED_INT8_4W4C_BUFFER,
Expand All @@ -656,7 +656,7 @@ def register_dequantize_for_conv2d_op():
utils.PACKED_INT8_4W4C_BUFFER,
],
outputs_storage=[
utils.CHANNELS_PACKED_TEXTURE,
utils.CHANNELS_PACKED_TEXTURE_OR_CONTIGUOUS_BUFFER,
],
supports_resize=False,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,21 @@
#include "linear_fp_input_tile.glslh"

VEC4_T load_fp_input_texel(const Conv2dTensorIndex tidx) {
#ifdef INPUT_BUFFER
VEC4_T texel = VEC4_T(0);
const int c_idx = mul_4(tidx.data.z);
const int c_stride = input_sizes.y * input_sizes.x;

const int base_buf_i = c_idx * c_stride + tidx.data.y * input_sizes.x + tidx.data.x;
const int limit = min(input_sizes.z - c_idx, 4);

for (int i = 0; i < limit; i++) {
texel[i] = t_fp_input[base_buf_i + i * c_stride];
}
return texel;
#else
return texelFetch(t_fp_input, tidx.data, 0);
#endif
}

void load_fp_input_tile(
Expand All @@ -23,7 +37,9 @@ void load_fp_input_tile(
#if TILE_M == 4 && TILE_K4 == 1
Conv2dTensorIndex load_tidx = block_idx_to_tensor_idx(block_idx);
[[unroll]] for (int w = 0; w < TILE_M; w++) {
tile.data[w][0] = load_fp_input_texel(load_tidx);
if (load_tidx.data.x < input_sizes.x) {
tile.data[w][0] = load_fp_input_texel(load_tidx);
}
load_tidx.data.x++;
}
#else
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ layout(std430) buffer;
#include "conv2d_common.glslh"

${layout_declare_tensor(B, "w", "t_packed_int8_input", "int", OUTPUT_STORAGE, is_scalar_array=False)}
${layout_declare_tensor(B, "r", "t_fp_input", DTYPE, INPUT_STORAGE, is_scalar_array=False)}
${layout_declare_tensor(B, "r", "t_fp_input", DTYPE, INPUT_STORAGE)}

${layout_declare_ubo(B, "ivec4", "input_sizes")}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ quantize_and_pack_q8ta_conv2d_input:
combos:
- parameter_values: [texture3d, texture3d]
- parameter_values: [buffer, texture3d]
- parameter_values: [buffer, buffer]
DTYPE:
- VALUE: float
shader_variants:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ layout(std430) buffer;

#include "conv2d_common.glslh"

${layout_declare_tensor(B, "w", "t_fp_output", DTYPE, OUTPUT_STORAGE, is_scalar_array=False)}
${layout_declare_tensor(B, "w", "t_fp_output", DTYPE, OUTPUT_STORAGE)}
${layout_declare_tensor(B, "r", "t_packed_int8_output", "int", INPUT_STORAGE, is_scalar_array=False)}

${layout_declare_ubo(B, "ivec4", "output_sizes")}
Expand Down Expand Up @@ -84,15 +84,29 @@ void unpack_and_dequantize(
void store_fp_output_texel(
const Conv2dTensorIndex tidx,
const VEC4_T out_texel) {
#ifdef OUTPUT_BUFFER
const int c_idx = mul_4(tidx.data.z);
const int c_stride = output_sizes.y * output_sizes.x;

const int base_buf_i = c_idx * c_stride + tidx.data.y * output_sizes.x + tidx.data.x;
const int limit = min(output_sizes.z - c_idx, 4);

for (int i = 0; i < limit; ++i) {
t_fp_output[base_buf_i + i * c_stride] = out_texel[i];
}
#else
imageStore(t_fp_output, tidx.data, out_texel);
#endif
}

void store_fp_tile(
const FPInputTile block,
const Conv2dBlockIndex block_idx) {
Conv2dTensorIndex store_tidx = block_idx_to_tensor_idx(block_idx);
[[unroll]] for (int w = 0; w < 4; w++) {
store_fp_output_texel(store_tidx, block.data[w][0]);
if (store_tidx.data.x < output_sizes.x) {
store_fp_output_texel(store_tidx, block.data[w][0]);
}
store_tidx.data.x++;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ unpack_and_dequantize_q8ta_conv2d_output:
combos:
- parameter_values: [texture3d, texture3d]
- parameter_values: [texture3d, buffer]
- parameter_values: [buffer, buffer]
DTYPE:
- VALUE: float
shader_variants:
Expand Down
16 changes: 11 additions & 5 deletions backends/vulkan/test/custom_ops/q8ta_q8csw_q8to_conv2d.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,15 @@ TestCase create_test_case_from_config(
std::vector<int64_t> input_size = {
1, config.channels.in, config.input_size.h, config.input_size.w};

utils::GPUMemoryLayout io_memory_layout = storage_type == utils::kBuffer
? utils::kWidthPacked
: utils::kChannelsPacked;

ValueSpec input_tensor(
input_size,
input_dtype,
storage_type,
utils::kChannelsPacked,
io_memory_layout,
DataGenType::RANDOM);

if (debugging()) {
Expand Down Expand Up @@ -139,7 +143,7 @@ TestCase create_test_case_from_config(
{1, config.channels.out, H_out, W_out},
input_dtype,
storage_type,
utils::kChannelsPacked,
io_memory_layout,
DataGenType::ZEROS);

// Add all specs to test case for q8ta_q8csw_q8to operation
Expand Down Expand Up @@ -182,7 +186,8 @@ std::vector<TestCase> generate_quantized_conv2d_easy_cases() {
config.op_name = "conv2d_q8ta_q8csw_q8to";

// Test with both storage types and data types for completeness
std::vector<utils::StorageType> storage_types = {utils::kTexture3D};
std::vector<utils::StorageType> storage_types = {
utils::kTexture3D, utils::kBuffer};
std::vector<vkapi::ScalarType> float_types = {vkapi::kFloat};

// Generate test cases for each combination
Expand Down Expand Up @@ -341,7 +346,8 @@ std::vector<TestCase> generate_quantized_conv2d_test_cases() {
4}};

// Test with different storage types and data types
std::vector<utils::StorageType> storage_types = {utils::kTexture3D};
std::vector<utils::StorageType> storage_types = {
utils::kTexture3D, utils::kBuffer};

// Generate test cases for each combination
for (auto& config : configs) {
Expand Down Expand Up @@ -621,7 +627,7 @@ int main(int argc, char* argv[]) {
quantized_conv2d_flop_calculator,
"QuantizedConv2dQ8ToQ8To",
0,
10,
1,
ref_fn);

return 0;
Expand Down
25 changes: 9 additions & 16 deletions backends/vulkan/test/custom_ops/q8ta_q8ta_q8to_add.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,21 +38,17 @@ TestCase create_quantized_add_test_case(
// Set the operator name for the test case
test_case.set_operator_name("et_vk.add_q8ta_q8ta_q8to.test");

utils::GPUMemoryLayout io_memory_layout = storage_type == utils::kBuffer
? utils::kWidthPacked
: utils::kChannelsPacked;

// Input tensor A (float/half)
ValueSpec input_a(
sizes,
input_dtype,
storage_type,
utils::kChannelsPacked,
DataGenType::RANDOM);
sizes, input_dtype, storage_type, io_memory_layout, DataGenType::RANDOM);

// Input tensor B (float/half)
ValueSpec input_b(
sizes,
input_dtype,
storage_type,
utils::kChannelsPacked,
DataGenType::RANDOM);
sizes, input_dtype, storage_type, io_memory_layout, DataGenType::RANDOM);

// Quantization parameters for input A
float input_a_scale_val = 0.007843; // 2/255 approximately
Expand Down Expand Up @@ -81,11 +77,7 @@ TestCase create_quantized_add_test_case(

// Output tensor (float/half)
ValueSpec output(
sizes,
input_dtype,
storage_type,
utils::kChannelsPacked,
DataGenType::ZEROS);
sizes, input_dtype, storage_type, io_memory_layout, DataGenType::ZEROS);

// Add all specs to test case for q8ta_q8ta_q8to add operation
test_case.add_input_spec(input_a);
Expand Down Expand Up @@ -119,7 +111,8 @@ std::vector<TestCase> generate_quantized_add_test_cases() {
};

// Storage types to test
std::vector<utils::StorageType> storage_types = {utils::kTexture3D};
std::vector<utils::StorageType> storage_types = {
utils::kTexture3D, utils::kBuffer};

// Data types to test
std::vector<vkapi::ScalarType> data_types = {vkapi::kFloat};
Expand Down
8 changes: 8 additions & 0 deletions backends/vulkan/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -772,6 +772,14 @@ def make_filtered_tensor_repset(
HEIGHT_PACKED_TEXTURE = TensorRepSet(set(), {VkMemoryLayout.TENSOR_HEIGHT_PACKED})
CHANNELS_PACKED_TEXTURE = TensorRepSet(set(), {VkMemoryLayout.TENSOR_CHANNELS_PACKED})

CHANNELS_PACKED_ANY = TensorRepSet(
{VkMemoryLayout.TENSOR_CHANNELS_PACKED}, {VkMemoryLayout.TENSOR_CHANNELS_PACKED}
)

CHANNELS_PACKED_TEXTURE_OR_CONTIGUOUS_BUFFER = TensorRepSet(
{VkMemoryLayout.TENSOR_WIDTH_PACKED}, {VkMemoryLayout.TENSOR_CHANNELS_PACKED}
)

ANY_TEXTURE = TensorRepSet(set(), all_memory_layouts)
ANY_BUFFER = TensorRepSet(all_memory_layouts, set())

Expand Down
Loading