Skip to content

Commit

Permalink
Fix
Browse files Browse the repository at this point in the history
  • Loading branch information
kasyanovse committed Sep 5, 2023
1 parent 3ab79ed commit d1982b1
Show file tree
Hide file tree
Showing 3 changed files with 115 additions and 28 deletions.
6 changes: 5 additions & 1 deletion fedot/core/data/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -490,16 +490,20 @@ def subset_indices(self, selected_idx: List):
raise IndexError(f"Next indexes are missing: {missing_values}")
return self.slice_by_index(row_nums)

def subset_features(self, features_ids: list, with_target: bool = False):
def subset_features(self, features_ids: list, with_target: bool = False, ravel_if_only_ids: bool = False):
"""Return new :obj:`InputData` with subset of features based on ``features_ids`` list
"""
subsample_input = self.copy()
subsample_input.features = self.features[:, features_ids]
if ravel_if_only_ids and len(features_ids) == 1:
subsample_input.features = np.ravel(subsample_input.features)
if with_target:
if self.target.shape[1] != self.features.shape[1]:
raise ValueError((f"Shapes of features ({self.features.shape}) and"
f" target ({self.target.shape}) mismatch. Cannot create subset for target"))
subsample_input.target = self.target[:, features_ids]
if ravel_if_only_ids and len(features_ids) == 1:
subsample_input.target = np.ravel(subsample_input.target)
return subsample_input

def shuffle(self, seed: Optional[int] = None):
Expand Down
86 changes: 59 additions & 27 deletions fedot/core/data/multi_modal.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,33 @@
from fedot.core.repository.tasks import Task, TaskTypesEnum


def get_from_main_input_data(item):
def _temporary_function(self, item=item):
for input_data in self.values():
if input_data.supplementary_data.is_main_target:
return getattr(input_data, item)
return _temporary_function


def set_to_all_input_data(item):
def _temporary_function(self, value, item=item):
for key in self:
setattr(self[key], item, value)
return _temporary_function


def map_over_multimodal_data(fun):
def _temporary_function(self, *args, **kwargs):
new_self = self.copy()
for key in self:
new_self[key] = getattr(self[key], fun.__name__)(*args, **kwargs)
return new_self

_temporary_function.__name__ = fun.__name__
_temporary_function.__doc__ = fun.__doc__
return _temporary_function


class MultiModalData(Dict[str, InputData]):
""" Dictionary with InputData as values and primary node names as keys """

Expand All @@ -22,40 +49,45 @@ def __init__(self, *arg, **kw):
# Check if input data contains different targets
self.contain_side_inputs = not all(value.supplementary_data.is_main_target for value in self.values())

target = property(get_from_main_input_data('target'), set_to_all_input_data('target'))
idx = property(get_from_main_input_data('idx'), set_to_all_input_data('idx'))
task = property(get_from_main_input_data('task'), set_to_all_input_data('task'))
class_labels = property(get_from_main_input_data('class_labels'))
num_classes = property(get_from_main_input_data('num_classes'))
is_ts_forecasting = property(get_from_main_input_data('is_ts_forecasting'))
is_classification = property(get_from_main_input_data('is_classification'))
is_clustering = property(get_from_main_input_data('is_clustering'))
is_regression = property(get_from_main_input_data('is_regression'))

@property
def data_type(self):
return [input_data.data_type for input_data in iter(self.values())]

def __getattr__(self, item):
if item in ('target', 'idx', 'task', 'class_labels', 'num_classes',
'is_ts_forecasting', 'is_classification', 'is_clustering', 'is_regression'):
for input_data in self.values():
if input_data.supplementary_data.is_main_target:
return getattr(input_data, item)

if item in ('subset_range', 'subset_indices', 'slice', 'slice_by_index',
'convert_non_int_indexes_for_fit', 'convert_non_int_indexes_for_predict'):
new = self.copy()

def _temporary_function(*args, _multimodaldata=new, **kwargs):
for key in _multimodaldata.keys():
_multimodaldata[key] = getattr(_multimodaldata[key], item)(*args, **kwargs)
return new

return _temporary_function
raise AttributeError(f"Unknown attribute {item} for class MultiModalData")

def __setattr__(self, item, value):
if item in ('target', 'idx', 'task'):
for input_data in self.values():
setattr(input_data, item, value)
else:
object.__setattr__(self, item, value)
@map_over_multimodal_data
def subset_range(self, start, end):
pass

@map_over_multimodal_data
def slice(self, start, stop, step, in_sample):
pass

@map_over_multimodal_data
def slice_by_index(self, indexes, step, in_sample):
pass

@map_over_multimodal_data
def subset_indices(self, selected_idx):
pass

@map_over_multimodal_data
def subset_features(self, features_ids, with_target, ravel_if_only_ids):
pass

def copy(self):
def copy(self, copy_supplementary_data: Optional[bool] = None):
kwargs = dict() if copy_supplementary_data is None else {'copy_supplementary_data': False}
new = MultiModalData()
for key in self.keys():
new[key] = self[key].copy()
new[key] = self[key].copy(**kwargs)
return new

def shuffle(self, seed: Optional[int] = None):
Expand Down
51 changes: 51 additions & 0 deletions test/unit/data/test_multimodal_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,17 @@
from fedot.core.utils import fedot_project_root


def get_multimodal_data(input_data_count=5, length=100, feature_count=5):
data = MultiModalData()
for i in range(input_data_count):
data[str(i)] = InputData(idx=np.arange(length),
features=np.random.rand(length, feature_count),
target=np.random.rand(length),
data_type=DataTypesEnum.table,
task=Task(TaskTypesEnum.regression))
return data


def test_multi_modal_data():
"""
Checking basic functionality of MultiModalData class.
Expand Down Expand Up @@ -44,10 +55,20 @@ def test_multi_modal_data():
# check setter
new_target = np.asarray([1, 1, 1, 1, 1])
multi_modal.target = new_target
for input_data in multi_modal.values():
assert np.array_equal(input_data.target, new_target)
assert np.array_equal(multi_modal.target, new_target)

new_idx = np.asarray([1, 1, 1, 1, 1])
multi_modal.idx = new_idx
for input_data in multi_modal.values():
assert np.array_equal(input_data.idx, new_idx)
assert np.array_equal(multi_modal.idx, new_idx)

new_task = Task(TaskTypesEnum.regression)
multi_modal.task = new_task
for input_data in multi_modal.values():
assert input_data.task == new_task
assert multi_modal.task == new_task


Expand Down Expand Up @@ -144,3 +165,33 @@ def test_multimodal_data_with_complicated_types():
assert len(file_mm_data) == 2
assert 'data_source_text/5' in file_mm_data
assert file_mm_data['data_source_table'].features.shape == (18, 11)


@pytest.mark.parametrize(['start', 'stop', 'step'],
[(None, 10, None),
(2, 20, 3),
(None, -20, -2)])
def test_multimodal_slice(start, stop, step):
data = get_multimodal_data()
sliced = data.slice(start, stop, step)
for indata1, indata2 in zip(data.values(), sliced.values()):
indata1 = indata1.slice(start, stop, step)
assert np.array_equal(indata1.idx, indata2.idx)
assert np.array_equal(indata1.features, indata2.features)
assert np.array_equal(indata1.target, indata2.target)


@pytest.mark.parametrize(['indexes', 'step'],
[([0, 1, 2], 1),
([-3, -2, -1], 1),
([-1, -2, -3], -1),
([0, 3, 6], 3)
])
def test_slice_by_index(indexes, step):
data = get_multimodal_data()
sliced = data.slice_by_index(indexes, step)
for indata1, indata2 in zip(data.values(), sliced.values()):
indata1 = indata1.slice_by_index(indexes, step)
assert np.array_equal(indata1.idx, indata2.idx)
assert np.array_equal(indata1.features, indata2.features)
assert np.array_equal(indata1.target, indata2.target)

0 comments on commit d1982b1

Please sign in to comment.