Skip to content

Commit

Permalink
Add as_tensor with provided shape method to python API (#953)
Browse files Browse the repository at this point in the history
- user can cast TensorList to Tensor with provided shape from
  python API, as long as TensorList is continuous and volumes
  matches

Signed-off-by: Janusz Lisiecki <[email protected]>
  • Loading branch information
JanuszL committed Jun 14, 2019
1 parent 04cade1 commit 71c499c
Show file tree
Hide file tree
Showing 4 changed files with 176 additions and 28 deletions.
34 changes: 33 additions & 1 deletion dali/pipeline/data/tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,38 @@ class Tensor : public Buffer<Backend> {
ShareData(ptr, bytes, vector<Index>());
}

/**
* @brief Wraps a TensorList and gives it a new shape
* TensorList has to be a valid tensor
* (there must be at least 1 tensor stored in the TensorList,
* volumes of the new and old shape need to match and
* all tensors need to be stored without
* any padding between them)
*/
inline void ShareDataReshape(TensorList<Backend> *tl, const vector<Index> &new_shape) {
DALI_ENFORCE(tl != nullptr, "Input TensorList is nullptr");
DALI_ENFORCE(tl->ntensor() > 0, "Input TensorList has 0 elements!");
DALI_ENFORCE(IsValidType(tl->type()), "To share data, "
"the input TensorList must have a valid data type.");
DALI_ENFORCE(tl->IsContinuousTensor(),
"All tensors in the input TensorList must be continuous in memory.");
Index product = 0;
for (auto &shape : tl->shape()) {
product += volume(shape);
}
DALI_ENFORCE(product == volume(new_shape),
"Requested shape need to have the same volume as the tensor list.");
data_.reset(tl->raw_mutable_tensor(0), [](void *) {});

// Get the meta-data for the target tensor
shape_ = new_shape;
size_ = volume(shape_);
type_ = tl->type();
num_bytes_ = type_.size() * size_;
device_ = tl->device_id();
shares_data_ = true;
}

/**
* @brief Wraps a TensorList
* TensorList has to be a valid tensor
Expand All @@ -274,11 +306,11 @@ class Tensor : public Buffer<Backend> {
*/
inline void ShareData(TensorList<Backend> *tl) {
DALI_ENFORCE(tl != nullptr, "Input TensorList is nullptr");
DALI_ENFORCE(tl->ntensor() > 0, "Input TensorList has 0 elements!");
DALI_ENFORCE(IsValidType(tl->type()), "To share data, "
"the input TensorList must have a valid data type.");
DALI_ENFORCE(tl->IsDenseTensor(),
"All tensors in the input TensorList must have the same shape and be densely packed.");
DALI_ENFORCE(tl->ntensor() > 0, "Input TensorList has 0 elements!");
data_.reset(tl->raw_mutable_tensor(0), [](void *) {});

// Get the meta-data for the target tensor
Expand Down
105 changes: 78 additions & 27 deletions dali/pipeline/data/tensor_list.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
#include <cstring>
#include <string>
#include <vector>
#include <list>
#include <memory>
#include <utility>
#include "dali/pipeline/data/backend.h"
#include "dali/pipeline/data/buffer.h"
#include "dali/pipeline/data/meta.h"
Expand All @@ -43,12 +46,9 @@ template <typename Backend>
class DLL_PUBLIC TensorList : public Buffer<Backend> {
public:
DLL_PUBLIC TensorList()
: layout_(DALI_NHWC)
, tensor_view_(nullptr) {}
: layout_(DALI_NHWC) {}

DLL_PUBLIC ~TensorList() override {
delete tensor_view_;
}
DLL_PUBLIC ~TensorList() = default;

/**
* @brief Resizes this TensorList to match the shape of the input.
Expand Down Expand Up @@ -132,10 +132,8 @@ class DLL_PUBLIC TensorList : public Buffer<Backend> {
ResizeHelper(new_size);
shape_ = new_shape;

// Tensor view of this TensorList is no longer valid
if (tensor_view_) {
tensor_view_->ShareData(this);
}
// Tensor views of this TensorList is no longer valid
tensor_views_.clear();

meta_.resize(num_tensor, DALIMeta(layout_));
}
Expand Down Expand Up @@ -165,10 +163,8 @@ class DLL_PUBLIC TensorList : public Buffer<Backend> {
num_bytes_ = other->num_bytes_;
device_ = other->device_;

// Tensor view of this TensorList is no longer valid
if (tensor_view_) {
tensor_view_->ShareData(this);
}
// Tensor views of this TensorList is no longer valid
tensor_views_.clear();

// If the other tensor has a non-zero size allocation, mark that
// we are now sharing an allocation with another buffer
Expand Down Expand Up @@ -202,10 +198,8 @@ class DLL_PUBLIC TensorList : public Buffer<Backend> {
size_ = 0;
device_ = -1;

// Tensor view of this TensorList is no longer valid
if (tensor_view_) {
tensor_view_->ShareData(this);
}
// Tensor views of this TensorList is no longer valid
tensor_views_.clear();

// If the input pointer stores a non-zero size allocation, mark
// that we are sharing our underlying data
Expand Down Expand Up @@ -282,6 +276,26 @@ class DLL_PUBLIC TensorList : public Buffer<Backend> {
return shape_;
}

/**
* @brief Checks whether the TensorList is
* continuous. It returns true if and only if
* all of the stored Tensors are densely packed in memory.
*/
inline bool IsContinuousTensor() const {
if (ntensor() == 0 || size_ == 0) {
return true;
}
Index offset = 0;

for (size_t i = 0; i < shape_.size(); ++i) {
if (offset != offsets_[i]) {
return false;
}
offset += volume(shape_[i]);
}
return true;
}

/**
* @brief Checks whether the TensorList is
* a dense Tensor. It returns true if and only if
Expand Down Expand Up @@ -322,18 +336,55 @@ class DLL_PUBLIC TensorList : public Buffer<Backend> {
}

/**
* @brief Returns a Tensor which shares the data
* with this TensorList. The tensor obtained
* through this function stays valid for the lifetime
* of the parent TensorList.
* @brief Returns a Tensor view with given shape or nullptr if no
* such exists
*/
Tensor<Backend> * AsTensor() {
if (tensor_view_ == nullptr) {
tensor_view_ = new Tensor<Backend>();
tensor_view_->ShareData(this);
inline Tensor<Backend> * GetViewWithShape(const vector<Index> &shape) {
for (auto &t : tensor_views_) {
if (t.shape() == shape) {
return &t;
}
}
return nullptr;
}

return tensor_view_;
/**
* @brief Returns a pointer to Tensor which shares the data
* with this TensorList and give it the provided shape.
* Tensor list owns the memory. The tensor obtained through
* this function stays valid for as long as TensorList data is unchanged.
*/
DLL_PUBLIC inline Tensor<Backend> * AsReshapedTensor(const vector<Index> &new_shape) {
auto t = GetViewWithShape(new_shape);
if (t) {
return t;
}

// need to create a new view
tensor_views_.emplace_back();
tensor_views_.back().ShareDataReshape(this, new_shape);

return &tensor_views_.back();
}

/**
* @brief Returns a pointer to Tensor which shares the data
* with this TensorList. Tensor list owns the memory. The tensor
* obtained through this function stays valid for as long
* as TensorList data is unchanged.
*/
DLL_PUBLIC inline Tensor<Backend> * AsTensor() {
// To prevent situation when AsReshapedTensor is called first with some shape, and then
// AsTensor which return non-dense tensor after all
// i.e. [[2], [3], [1]] is not dense but requesting [3, 2] AsReshapedTensor will work
// while AsTensor should not return for that case
DALI_ENFORCE(this->IsDenseTensor(),
"All tensors in the input TensorList must have the same shape and be densely packed.");
vector<Index> requested_shape;
requested_shape = this->tensor_shape(0);
requested_shape.insert(requested_shape.begin(), this->ntensor());

return this->AsReshapedTensor(requested_shape);
}


Expand Down Expand Up @@ -385,7 +436,7 @@ class DLL_PUBLIC TensorList : public Buffer<Backend> {
// when sharing data with a Tensor, we will store a pointer to
// Tensor that shares the data with this TensorList (valid only
// if IsDenseTensor returns true)
Tensor<Backend> * tensor_view_;
std::list<Tensor<Backend> > tensor_views_;

USE_BUFFER_MEMBERS();
};
Expand Down
22 changes: 22 additions & 0 deletions dali/python/backend_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,17 @@ void ExposeTensorList(py::module &m) { // NOLINT
Parameters
----------
)code")
.def("as_reshaped_tensor",
[](TensorList<CPUBackend> &tl, const vector<Index> &new_shape) -> Tensor<CPUBackend>* {
return tl.AsReshapedTensor(new_shape);
},
R"code(
Returns a tensor that is a view of this `TensorList` cast to the given shape.
This function can only be called if `TensorList` is continuous in memory and
the volumes of requested `Tensor` and `TensorList` matches.
)code",
py::return_value_policy::reference_internal)
.def("as_tensor", &TensorList<CPUBackend>::AsTensor,
R"code(
Returns a tensor that is a view of this `TensorList`.
Expand Down Expand Up @@ -396,6 +407,17 @@ void ExposeTensorList(py::module &m) { // NOLINT
----------
)code",
py::keep_alive<0, 1>())
.def("as_reshaped_tensor",
[](TensorList<GPUBackend> &tl, const vector<Index> &new_shape) -> Tensor<GPUBackend>* {
return tl.AsReshapedTensor(new_shape);
},
R"code(
Returns a tensor that is a view of this `TensorList` cast to the given shape.
This function can only be called if `TensorList` is continuous in memory and
the volumes of requested `Tensor` and `TensorList` matches.
)code",
py::return_value_policy::reference_internal)
.def("as_tensor", &TensorList<GPUBackend>::AsTensor,
R"code(
Returns a tensor that is a view of this `TensorList`.
Expand Down
43 changes: 43 additions & 0 deletions dali/test/python/test_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import numpy as np
from numpy.testing import assert_array_equal, assert_allclose
import os
import random

from test_utils import check_batch
from test_utils import compare_pipelines
Expand Down Expand Up @@ -1394,3 +1395,45 @@ def test_skip_cached_images():
compare_pipelines(CachedPipeline(reader_type, batch_size, is_cached=False),
CachedPipeline(reader_type, batch_size, is_cached=True, skip_cached_images=True),
batch_size=batch_size, N_iterations=100)

def test_as_tensor():
class HybridPipe(Pipeline):
def __init__(self, batch_size, num_threads, device_id):
super(HybridPipe, self).__init__(batch_size, num_threads, device_id, seed = 12)
self.input = ops.CaffeReader(path = caffe_db_folder, random_shuffle = True)

def define_graph(self):
_, self.labels = self.input()
return self.labels
batch_size = 8
shape = [[2, 2, 2], [8, 1], [1, 8], [4, 2], [2, 4], [8], [1, 2, 1, 2, 1, 2], [1, 1, 1, 8]]
pipe = HybridPipe(batch_size=batch_size, num_threads=2, device_id = 0)
pipe.build()
for sh in shape:
pipe_out = pipe.run()[0]
assert(pipe_out.as_tensor().shape() == [batch_size, 1])
assert(pipe_out.as_reshaped_tensor(sh).shape() == sh)
different_shape = random.choice(shape)
assert(pipe_out.as_reshaped_tensor(different_shape).shape() == different_shape)

def test_as_tensor_fail():
class HybridPipe(Pipeline):
def __init__(self, batch_size, num_threads, device_id):
super(HybridPipe, self).__init__(batch_size, num_threads, device_id, seed = 12)
self.input = ops.CaffeReader(path = caffe_db_folder, random_shuffle = True)

def define_graph(self):
_, self.labels = self.input()
return self.labels
batch_size = 8
shape = [[2, 2, 2, 3], [8, 1, 6], [1, 8, 4], [4, 2, 9], [2, 4, 0], [8, 2], [1, 2, 1, 2, 1, 2, 3], [7, 1, 1, 1, 8]]
pipe = HybridPipe(batch_size=batch_size, num_threads=2, device_id = 0)
pipe.build()
for sh in shape:
pipe_out = pipe.run()[0]
assert(pipe_out.as_tensor().shape() == [batch_size, 1])
try:
assert(pipe_out.as_reshaped_tensor(sh).shape() == sh)
assert(False)
except RuntimeError:
assert(True)

0 comments on commit 71c499c

Please sign in to comment.