Skip to content

Commit

Permalink
reviewer comments
Browse files Browse the repository at this point in the history
  • Loading branch information
protonu committed Jan 3, 2025
1 parent ad6021c commit c90b21c
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 25 deletions.
20 changes: 13 additions & 7 deletions csrc/ops/composite.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,19 +79,25 @@ TensorView* triu(TensorView* tv, Val* offset) {
// Gives:
//[1, 1, 1, 1]
//[0, 1, 1, 1]
// If triu has an offset of k, we shift/subtract the iota of the columns by k
// before broadcasting and comparing with the iota of the rows.
auto dims = TensorDomain::noReductions(tv->getLogicalDomain()).size();
auto tv_logical_no_reductions =
TensorDomain::noReductions(tv->getLogicalDomain());
auto dims = tv_logical_no_reductions.size();

auto tv_rows = iota(
tv->domain()->logical()[dims - 2]->extent(),
tv_logical_no_reductions[dims - 2]->extent(),
IrBuilder::create<Val>(0, DataType::Index),
IrBuilder::create<Val>(1, DataType::Index),
DataType::Index);

// If triu has an offset of k, we shift/subtract the iota of the columns by k
// before broadcasting and comparing with the iota of the rows.
// So when building an iota op, instead of starting from 0 with a step of 1
// we start from -offset (== -k) with a step of 1.
auto start_shifted_by_offset = SimplifyingIrBuilder::mulExpr(
offset, IrBuilder::create<Val>(-1, DataType::Index));
auto tv_columns = iota(
tv->domain()->logical()[dims - 1]->extent(),
SimplifyingIrBuilder::mulExpr(
offset, IrBuilder::create<Val>(-1, DataType::Index)),
tv_logical_no_reductions[dims - 1]->extent(),
start_shifted_by_offset,
IrBuilder::create<Val>(1, DataType::Index),
DataType::Index);

Expand Down
47 changes: 29 additions & 18 deletions tests/cpp/test_tensor_factories.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -231,30 +231,41 @@ TEST_F(TensorFactoryTest, StandaloneIota) {
}

TEST_F(TensorFactoryTest, SimpleTriu) {
std::vector<std::vector<int64_t>> input_sizes = {
{64, 64}, {4, 16}, {16, 4}, {16, 8, 32}};
std::vector<std::vector<int64_t>> input_sizes_2d = {
{64, 64}, {4, 16}, {16, 4}};
std::vector<std::vector<int64_t>> input_sizes_3d = {{16, 8, 32}};
auto offsets = {0, 1, 2, -1, -2, 200, -200};

for (auto input_size : input_sizes) {
for (auto offset : offsets) {
auto fusion = std::make_unique<Fusion>();
FusionGuard fg(fusion.get());

auto tv_to_triu_on =
makeSymbolicTensor(input_size.size(), DataType::Half);
fusion->addInput(tv_to_triu_on);
for (auto in : {input_sizes_2d, input_sizes_3d}) {
auto fusion = std::make_unique<Fusion>();
FusionGuard fg(fusion.get());

auto out =
triu(tv_to_triu_on, IrBuilder::create<Val>(offset, DataType::Index));
fusion->addOutput(out);
auto tv_to_triu_on = makeSymbolicTensor(in.at(0).size(), DataType::Half);
auto input_offset = IrBuilder::create<Val>(DataType::Index);
auto out = triu(tv_to_triu_on, input_offset);

auto options = at::TensorOptions().dtype(at::kHalf).device(at::kCUDA);
auto in_tensor = at::randn(input_size, options);
fusion->addInput(tv_to_triu_on);
fusion->addInput(input_offset);
fusion->addOutput(out);

FusionExecutorCache executor_cache(std::move(fusion));
auto cg_outputs = executor_cache.runFusionWithInputs({in_tensor});
FusionExecutorCache executor_cache(std::move(fusion));

EXPECT_TRUE(at::equal(cg_outputs[0], at::triu(in_tensor, offset)));
for (auto input_size : in) {
for (auto offset : offsets) {
auto options = at::TensorOptions().dtype(at::kHalf).device(at::kCUDA);
auto in_tensor = at::randn(input_size, options);

auto cg_outputs =
executor_cache.runFusionWithInputs({in_tensor, offset});

testValidate(
executor_cache.fusion(),
cg_outputs,
{in_tensor, offset},
{at::triu(in_tensor, offset)},
__LINE__,
__FILE__);
}
}
}
}
Expand Down

0 comments on commit c90b21c

Please sign in to comment.