diff --git a/cases/spam_detection.py b/cases/spam_detection.py index 5ec26a2ee2..42f593fda5 100644 --- a/cases/spam_detection.py +++ b/cases/spam_detection.py @@ -7,6 +7,7 @@ from fedot.core.data.data_split import train_test_data_setup from fedot.core.pipelines.node import PipelineNode from fedot.core.pipelines.pipeline import Pipeline +from fedot.core.repository.dataset_types import DataTypesEnum def execute_pipeline_for_text_problem(train_data, test_data): @@ -26,7 +27,8 @@ def execute_pipeline_for_text_problem(train_data, test_data): def run_text_problem_from_meta_file(): data_file_abspath = os.path.abspath(os.path.join('data', 'spam', 'spamham.csv')) - data = InputData.from_text_meta_file(meta_file_path=data_file_abspath) + data = InputData.from_csv(file_path=data_file_abspath, + data_type=DataTypesEnum.text) train_data, test_data = train_test_data_setup(data, split_ratio=0.7) @@ -52,7 +54,8 @@ def run_text_problem_from_files(): def run_text_problem_from_saved_meta_file(path): - data = InputData.from_text_meta_file(meta_file_path=path) + data = InputData.from_csv(file_path=path, + data_type=DataTypesEnum.text) train_data, test_data = train_test_data_setup(data, split_ratio=0.7) diff --git a/docs/source/advanced/cli_call.rst b/docs/source/advanced/cli_call.rst index 281f91e8f8..8293610967 100644 --- a/docs/source/advanced/cli_call.rst +++ b/docs/source/advanced/cli_call.rst @@ -69,4 +69,4 @@ problems decision are presented. The string below helps to run classification problem decision from the console: -``python --problem classification --train ../../test/data/simple_classification.csv --test ../../test/data/simple_classification.csv --target Y --timeout 0.1`` +``python --problem classification --train ../../test/data/classification/simple_classification.csv --test ../../test/data/classification/simple_classification.csv --target Y --timeout 0.1`` diff --git a/examples/simple/cli_application/cli_classification_call.bat b/examples/simple/cli_application/cli_classification_call.bat index 1b69b74391..5fe7cd973d 100644 --- a/examples/simple/cli_application/cli_classification_call.bat +++ b/examples/simple/cli_application/cli_classification_call.bat @@ -1,3 +1,3 @@ set python_path = "DEFAULT" cd ../../fedot/api -%python_path% fedot_cli.py --problem classification --train ../../test/data/simple_classification.csv --test ../../test/data/simple_classification.csv --target Y --timeout 0.1 +%python_path% fedot_cli.py --problem classification --train ../../test/data/classification/simple_classification.csv --test ../../test/data/classification/simple_classification.csv --target Y --timeout 0.1 diff --git a/fedot/core/data/data.py b/fedot/core/data/data.py index 3719806f58..886a75a70d 100644 --- a/fedot/core/data/data.py +++ b/fedot/core/data/data.py @@ -356,26 +356,6 @@ def from_image(images: Union[str, np.ndarray] = None, return InputData(idx=idx, features=features, target=target, task=task, data_type=DataTypesEnum.image) - @staticmethod - def from_text_meta_file(meta_file_path: str = None, - label: str = 'label', - task: Task = Task(TaskTypesEnum.classification), - data_type: DataTypesEnum = DataTypesEnum.text) -> InputData: - - if os.path.isdir(meta_file_path): - raise ValueError("""CSV file expected but got directory""") - - df_text = pd.read_csv(meta_file_path) - df_text = df_text.sample(frac=1).reset_index(drop=True) - messages = df_text['text'].astype('U').tolist() - - features = np.array(messages) - target = np.array(df_text[label]).reshape(-1, 1) - idx = [index for index in range(len(target))] - - return InputData(idx=idx, features=features, - target=target, task=task, data_type=data_type) - @staticmethod def from_text_files(files_path: str, label: str = 'label', diff --git a/test/data/multiclass_classification.csv b/test/data/classification/multiclass_classification.csv similarity index 100% rename from test/data/multiclass_classification.csv rename to test/data/classification/multiclass_classification.csv diff --git a/test/data/classification/multiclass_classification.npy b/test/data/classification/multiclass_classification.npy new file mode 100644 index 0000000000..343530cf17 Binary files /dev/null and b/test/data/classification/multiclass_classification.npy differ diff --git a/test/data/simple_classification.csv b/test/data/classification/simple_classification.csv similarity index 100% rename from test/data/simple_classification.csv rename to test/data/classification/simple_classification.csv diff --git a/test/data/classification/simple_classification.npy b/test/data/classification/simple_classification.npy new file mode 100644 index 0000000000..c8fb4d3017 Binary files /dev/null and b/test/data/classification/simple_classification.npy differ diff --git a/test/data/regression/simple_regression.csv b/test/data/regression/simple_regression.csv new file mode 100644 index 0000000000..224c38a884 --- /dev/null +++ b/test/data/regression/simple_regression.csv @@ -0,0 +1,62 @@ +Unnamed: 0,Asphalt,N-par,Iso-par,AROM UV,P,V,S,target +41,0.21,71.09,18.5,2.13,839.3,13.44,0.09,0.223 +8,1.2,63.47,20.44,7.53,891.8,42.49,0.61,0.168 +43,2.37,61.27,20.41,9.0,906.8,62.9,1.01,0.11 +21,4.16,59.83,19.72,9.79,916.5,81.55,1.55,0.07 +27,1.26,68.27,18.77,4.04,858.6,19.59,0.47,0.99 +51,0.98,68.44,18.9,3.96,857.5,19.19,0.39,1.75 +20,4.24,59.62,19.75,9.93,917.9,85.16,1.58,0.05 +42,2.3,59.63,21.0,10.17,918.2,87.75,1.04,0.03 +24,3.29,62.36,19.44,8.07,899.1,50.44,1.23,0.34 +53,1.59,65.0,19.66,4.11,881.2,32.51,0.67,1.21 +59,2.93,60.93,20.16,4.03,909.1,66.68,1.18,0.242 +10,1.05,64.52,20.18,6.78,884.5,35.43,0.53,0.196 +45,2.51,64.44,19.24,6.66,884.9,35.24,0.94,1.03 +22,4.09,60.05,19.7,9.64,915.0,78.14,1.53,0.102 +60,3.45,60.61,19.93,2.43,911.2,70.47,1.33,0.252 +14,0.22,70.36,18.74,2.66,844.3,14.71,0.11,0.239 +29,0.31,71.04,18.45,2.16,839.6,13.53,0.12,0.35 +55,3.28,60.71,20.0,2.96,910.5,69.17,1.28,0.223 +32,2.07,61.45,20.55,8.88,905.5,60.93,0.92,0.03 +6,1.31,62.75,20.61,8.03,896.7,48.26,0.66,0.072 +57,1.86,61.6,20.65,7.35,904.7,59.6,0.85,0.087 +37,1.76,64.89,19.59,6.43,881.9,33.0,0.72,0.618 +54,1.63,66.53,19.11,2.03,870.5,25.36,0.63,2.06 +33,2.97,60.06,20.43,9.77,915.0,79.26,1.21,0.123 +38,1.3,66.73,19.26,5.15,869.2,24.7,0.53,0.874 +12,0.69,67.1,19.54,4.97,866.8,23.53,0.35,0.465 +58,2.37,61.27,20.41,5.76,906.8,62.86,1.01,0.178 +28,0.62,70.13,18.55,2.78,845.8,15.21,0.23,0.68 +31,2.21,60.53,20.77,9.53,911.9,72.93,0.99,0.02 +46,2.59,64.39,19.21,6.68,885.2,35.5,0.97,1.726 +25,2.59,64.39,19.21,6.68,885.2,35.5,0.97,1.726 +7,1.28,62.93,20.57,7.91,895.5,46.73,0.65,0.134 +48,0.72,62.53,24.37,5.1,877.2,28.25,0.37,0.28 +9,1.18,63.64,20.39,7.4,890.6,41.19,0.6,0.169 +39,0.85,68.51,18.95,3.92,859.0,19.0,0.35,0.59 +52,1.51,63.44,20.24,7.51,891.9,42.51,0.7,0.341 +49,0.93,65.39,19.96,6.17,878.5,30.7,0.47,0.452 +11,0.93,65.39,19.96,6.17,878.5,30.7,0.47,0.452 +18,4.39,59.19,19.8,10.23,920.9,93.0,1.64,0.03 +35,2.47,62.04,20.09,8.4,901.5,54.18,1.01,0.322 +47,0.85,58.08,28.54,5.7,890.6,37.14,0.43,0.19 +4,1.36,62.39,20.7,8.29,899.2,51.54,0.69,0.019 +15,0.11,71.15,18.54,2.1,838.9,13.36,0.06,0.108 +36,2.23,63.0,19.92,7.73,894.8,45.5,0.91,0.65 +1,1.97,58.1,21.76,11.32,928.7,122.8,0.99,0.02 +56,3.17,59.1,20.63,5.42,921.8,97.04,1.3,0.02 +17,4.76,58.09,19.92,10.97,928.4,117.2,1.78,0.02 +23,4.02,60.26,19.68,9.5,913.5,74.9,1.5,0.119 +0,2.24,56.15,22.24,12.7,942.2,193.1,1.14,0.02 +3,1.43,61.85,20.84,8.67,902.9,57.04,0.73,0.01 +34,2.72,61.06,20.26,9.08,908.2,65.18,1.11,0.123 +50,0.93,66.95,19.43,5.04,867.8,24.0,0.42,0.66 +30,2.35,59.6,21.0,10.19,918.3,88.19,1.05,0.02 +44,2.44,62.88,19.82,7.8,895.7,46.43,0.98,0.62 +13,0.45,68.75,19.14,3.8,855.4,18.47,0.23,0.39 +26,1.92,66.36,18.98,5.34,871.7,25.95,0.72,2.657 +40,0.42,70.24,18.65,2.72,845.1,15.0,0.17,0.51 +19,4.31,59.4,19.77,10.08,919.4,89.0,1.61,0.04 +2,1.7,60.0,21.29,9.98,915.6,81.93,0.86,0.015 +16,5.53,55.86,20.18,12.5,943.7,194.9,2.07,0.02 +5,1.33,62.57,20.66,8.16,897.9,49.87,0.67,0.047 diff --git a/test/data/regression/simple_regression.npy b/test/data/regression/simple_regression.npy new file mode 100644 index 0000000000..41447758b4 Binary files /dev/null and b/test/data/regression/simple_regression.npy differ diff --git a/test/integration/api/test_api_cli_params.py b/test/integration/api/test_api_cli_params.py index 631456afcc..42694169f3 100644 --- a/test/integration/api/test_api_cli_params.py +++ b/test/integration/api/test_api_cli_params.py @@ -26,7 +26,7 @@ def test_cli_with_parameters(): f'--cv_folds 2 --target sea_height --train {ts_train_path} ' f'--test {ts_train_path} --for_len 10' ).split() - class_train_path = project_root_path.joinpath('test/data/simple_classification.csv') + class_train_path = project_root_path.joinpath('test/data/classification/simple_classification.csv') class_call = ( f'--problem classification --train {class_train_path} --test {class_train_path} --target Y ' '--preset fast_train --timeout 0.1 --depth 3 --arity 3 ' diff --git a/test/integration/api/test_api_utils.py b/test/integration/api/test_api_utils.py index ac23a25339..a47f260838 100644 --- a/test/integration/api/test_api_utils.py +++ b/test/integration/api/test_api_utils.py @@ -14,7 +14,7 @@ from fedot.preprocessing.preprocessing import DataPreprocessor from test.data.datasets import get_cholesterol_dataset from test.integration.api.test_main_api import get_dataset -from test.unit.tasks.test_classification import get_binary_classification_data +from test.unit.tasks.test_classification import get_binary_classification_data_from_csv def test_compose_fedot_model_without_tuning(): @@ -32,7 +32,7 @@ def test_output_binary_classification_correct(): task_type = 'classification' - data = get_binary_classification_data() + data = get_binary_classification_data_from_csv() train_data, test_data = train_test_data_setup(data, shuffle=True) diff --git a/test/integration/composer/test_history.py b/test/integration/composer/test_history.py index c266ba7539..71a7e61ff0 100644 --- a/test/integration/composer/test_history.py +++ b/test/integration/composer/test_history.py @@ -68,7 +68,7 @@ def _test_individuals_in_history(history: OptHistory): @pytest.mark.parametrize('n_jobs', [1, 2]) def test_newly_generated_history(n_jobs: int): - file_path_train = fedot_project_root().joinpath('test/data/simple_classification.csv') + file_path_train = fedot_project_root().joinpath('test/data/classification/simple_classification.csv') num_of_gens = 2 auto_model = Fedot(problem='classification', seed=42, diff --git a/test/integration/pipelines/tuning/test_pipeline_tuning.py b/test/integration/pipelines/tuning/test_pipeline_tuning.py index 48510f0339..4386ea343f 100644 --- a/test/integration/pipelines/tuning/test_pipeline_tuning.py +++ b/test/integration/pipelines/tuning/test_pipeline_tuning.py @@ -40,7 +40,7 @@ def regression_dataset(): @pytest.fixture() def classification_dataset(): test_file_path = str(os.path.dirname(__file__)) - file = os.path.join(str(fedot_project_root()), 'test/data/simple_classification.csv') + file = os.path.join(str(fedot_project_root()), 'test/data/classification/simple_classification.csv') return InputData.from_csv(os.path.join(test_file_path, file), task=Task(TaskTypesEnum.classification)) diff --git a/test/integration/real_applications/test_real_cases.py b/test/integration/real_applications/test_real_cases.py index 57e7e3f343..4ce5c09c99 100644 --- a/test/integration/real_applications/test_real_cases.py +++ b/test/integration/real_applications/test_real_cases.py @@ -13,7 +13,8 @@ def test_credit_scoring_problem(): - full_path_train = full_path_test = fedot_project_root().joinpath('test/data/simple_classification.csv') + full_path_train = full_path_test = \ + fedot_project_root().joinpath('test/data/classification/simple_classification.csv') roc_auc_test = run_credit_scoring_problem(full_path_train, full_path_test, timeout=5, target='Y', n_jobs=1) assert roc_auc_test > 0.5 diff --git a/test/unit/data/test_data.py b/test/unit/data/test_data.py index 3f23fb289f..0dd5bd1214 100644 --- a/test/unit/data/test_data.py +++ b/test/unit/data/test_data.py @@ -48,7 +48,7 @@ def test_data_subset_incorrect(data_setup): def test_data_from_csv(): test_file_path = str(os.path.dirname(__file__)) - file = '../../data/simple_classification.csv' + file = '../../data/classification/simple_classification.csv' task = Task(TaskTypesEnum.classification) df = pd.read_csv(os.path.join(test_file_path, file)) data_array = np.array(df).T @@ -71,7 +71,7 @@ def test_data_from_csv(): def test_with_custom_target(): test_file_path = str(os.path.dirname(__file__)) - file = '../../data/simple_classification.csv' + file = '../../data/classification/simple_classification.csv' file_custom = '../../data/simple_classification_with_custom_target.csv' file_data = InputData.from_csv( @@ -140,7 +140,7 @@ def test_target_data_from_csv_correct(): def test_table_data_shuffle(): test_file_path = str(os.path.dirname(__file__)) - file = '../../data/simple_classification.csv' + file = '../../data/classification/simple_classification.csv' data = InputData.from_csv(os.path.join(test_file_path, file)) shuffled_data = deepcopy(data) diff --git a/test/unit/data/test_multimodal_data.py b/test/unit/data/test_multimodal_data.py index efc6abf57b..3c77150430 100644 --- a/test/unit/data/test_multimodal_data.py +++ b/test/unit/data/test_multimodal_data.py @@ -106,7 +106,7 @@ def test_text_data_only(data_type): data_source_name = 'data_source_text/description' elif data_type is DataTypesEnum.table: # Case when there is no text data in csv, but MultiModalData.from_csv() is used - file_path = 'test/data/simple_classification.csv' + file_path = 'test/data/classification/simple_classification.csv' data_source_name = 'data_source_table' path = Path(fedot_project_root(), file_path) diff --git a/test/unit/optimizer/gp_operators/test_mutation.py b/test/unit/optimizer/gp_operators/test_mutation.py index d090c47477..72007adbe2 100644 --- a/test/unit/optimizer/gp_operators/test_mutation.py +++ b/test/unit/optimizer/gp_operators/test_mutation.py @@ -33,7 +33,7 @@ def get_requirements_and_params_for_task(task: TaskTypesEnum): def file_data(): - test_file_path = Path(__file__).parents[3].joinpath('data', 'simple_classification.csv') + test_file_path = Path(__file__).parents[3].joinpath('data', 'classification', 'simple_classification.csv') input_data = InputData.from_csv(test_file_path) input_data.idx = to_categorical_codes(categorical_ids=input_data.idx) return input_data diff --git a/test/unit/pipelines/test_pipeline.py b/test/unit/pipelines/test_pipeline.py index 21eede0588..17bad5799f 100644 --- a/test/unit/pipelines/test_pipeline.py +++ b/test/unit/pipelines/test_pipeline.py @@ -47,7 +47,7 @@ def classification_dataset(): @pytest.fixture() def file_data_setup(): test_file_path = str(os.path.dirname(__file__)) - file = '../../data/simple_classification.csv' + file = '../../data/classification/simple_classification.csv' input_data = InputData.from_csv( os.path.join(test_file_path, file)) input_data.idx = to_categorical_codes(categorical_ids=input_data.idx) diff --git a/test/unit/tasks/test_classification.py b/test/unit/tasks/test_classification.py index 7373f758be..a7c23a9542 100644 --- a/test/unit/tasks/test_classification.py +++ b/test/unit/tasks/test_classification.py @@ -1,8 +1,10 @@ import os import numpy as np +import pandas as pd +import pytest from sklearn.datasets import load_iris, make_classification -from sklearn.metrics import roc_auc_score as roc_auc +from sklearn.metrics import roc_auc_score as roc_auc, f1_score as f1 from examples.simple.classification.image_classification_problem import run_image_classification_problem from fedot.core.data.data import InputData @@ -37,6 +39,13 @@ def pipeline_with_pca() -> Pipeline: return pipeline +def simple_text_pipeline() -> Pipeline: + node_tfidf = PipelineNode('tfidf') + model_node = PipelineNode('logit', nodes_from=[node_tfidf]) + pipeline = Pipeline(model_node) + return pipeline + + def get_synthetic_classification_data(n_samples=1000, n_features=10, random_state=None) -> InputData: synthetic_data = make_classification(n_samples=n_samples, n_features=n_features, random_state=random_state) input_data = InputData(idx=np.arange(0, len(synthetic_data[1])), @@ -60,12 +69,26 @@ def get_iris_data() -> InputData: return input_data -def get_binary_classification_data(): +def get_classification_data(source: str, problem: str) -> InputData: test_file_path = str(os.path.dirname(__file__)) - file = '../../data/simple_classification.csv' - input_data = InputData.from_csv( - os.path.join(test_file_path, file)) - return input_data + if source == 'numpy': + file = f'../../data/classification/{problem}_classification.npy' + numpy_data = np.load(os.path.join(test_file_path, file)) + features_array = numpy_data[:, :-1] + target_array = numpy_data[:, -1] + return InputData.from_numpy(features_array=features_array, + target_array=target_array) + elif source == 'dataframe': + file = f'../../data/classification/{problem}_classification.csv' + df_data = pd.read_csv(os.path.join(test_file_path, file)) + features_df = df_data.iloc[:, :-1] + target_df = df_data.iloc[:, -1] + return InputData.from_dataframe(features_df=features_df, + target_df=target_df) + elif source == 'csv': + file = f'../../data/classification/{problem}_classification.csv' + return InputData.from_csv( + os.path.join(test_file_path, file)) def get_image_classification_data(composite_flag: bool = True): @@ -96,8 +119,32 @@ def get_image_classification_data(composite_flag: bool = True): return roc_auc_on_valid, dataset_to_train, dataset_to_validate -def test_multiclassification_pipeline_fit_correct(): - data = get_iris_data() +CLASSIFICATION_DATA_SOURCES = ['numpy', + 'dataframe', + 'csv', + # 'from_text_files', + # 'from_json_files', + ] + + +@pytest.mark.parametrize('source', CLASSIFICATION_DATA_SOURCES) +def test_binary_classification_pipeline_fit_correct(source: str): + data = get_classification_data(source, 'simple') + pipeline = pipeline_simple() + train_data, test_data = train_test_data_setup(data, shuffle=True) + + pipeline.fit(input_data=train_data) + results = pipeline.predict(input_data=test_data) + + roc_auc_on_test = roc_auc(y_true=test_data.target, + y_score=results.predict) + + assert roc_auc_on_test > 0.8 + + +@pytest.mark.parametrize('source', CLASSIFICATION_DATA_SOURCES) +def test_multiclassification_pipeline_fit_correct(source: str): + data = get_classification_data(source, 'multiclass') pipeline = pipeline_simple() train_data, test_data = train_test_data_setup(data, shuffle=True) @@ -106,7 +153,7 @@ def test_multiclassification_pipeline_fit_correct(): roc_auc_on_test = roc_auc(y_true=test_data.target, y_score=results.predict, - multi_class='ovo', + multi_class='ovr', # TODO: strange bug when ovo is chosen average='macro') assert roc_auc_on_test > 0.95 @@ -154,7 +201,7 @@ def test_output_mode_labels(): def test_output_mode_full_probs(): - data = get_binary_classification_data() + data = get_classification_data('csv', 'simple') pipeline = pipeline_simple() train_data, test_data = train_test_data_setup(data, shuffle=True) @@ -167,3 +214,27 @@ def test_output_mode_full_probs(): assert np.array_equal(results_probs.predict, results_default.predict) assert results.predict.shape == (len(test_data.target), 2) assert results_probs.predict.shape == (len(test_data.target), 1) + + +def test_image_pipeline_fit_correct(): + roc_auc_on_valid, _, _ = get_image_classification_data() + + assert roc_auc_on_valid >= 0.5 + + +def test_text_classification_pipeline_fit_correct(): + test_file_path = str(os.path.dirname(__file__)) + file = '../../data/simple_multimodal_classification_text.csv' + data = InputData.from_csv(file_path=os.path.join(test_file_path, file), + data_type=DataTypesEnum.text) + pipeline = simple_text_pipeline() + train_data, test_data = train_test_data_setup(data, shuffle=True) + + pipeline.fit(input_data=train_data) + results = pipeline.predict(input_data=test_data, output_mode='labels') + + f1_on_test = f1(y_true=test_data.target, + y_pred=results.predict, + average='micro') + + assert f1_on_test >= 0.5 diff --git a/test/unit/tasks/test_regression.py b/test/unit/tasks/test_regression.py index 7489d1c5b6..8b31282982 100644 --- a/test/unit/tasks/test_regression.py +++ b/test/unit/tasks/test_regression.py @@ -1,5 +1,9 @@ +import os + import numpy as np +import pandas as pd import pytest +from typing import Callable from sklearn.datasets import make_regression from sklearn.metrics import mean_squared_error as mse @@ -52,6 +56,31 @@ def get_synthetic_regression_data(n_samples=1000, n_features=10, random_state=No return input_data +def get_regression_data(source: str) -> InputData: + test_file_path = str(os.path.dirname(__file__)) + if source == 'numpy': + file = '../../data/regression/simple_regression.npy' + numpy_data = np.load(os.path.join(test_file_path, file)) + features_array = numpy_data[:, :-1] + target_array = numpy_data[:, -1] + return InputData.from_numpy(features_array=features_array, + target_array=target_array, + task='regression') + elif source == 'dataframe': + file = '../../data/regression/simple_regression.csv' + df_data = pd.read_csv(os.path.join(test_file_path, file)) + features_df = df_data.iloc[:, :-1] + target_df = df_data.iloc[:, -1] + return InputData.from_dataframe(features_df=features_df, + target_df=target_df, + task='regression') + elif source == 'csv': + file = '../../data/regression/simple_regression.csv' + return InputData.from_csv( + os.path.join(test_file_path, file), + task='regression') + + def get_rmse_value(pipeline: Pipeline, train_data: InputData, test_data: InputData) -> (float, float): train_pred = pipeline.predict(input_data=train_data) test_pred = pipeline.predict(input_data=test_data) @@ -61,7 +90,32 @@ def get_rmse_value(pipeline: Pipeline, train_data: InputData, test_data: InputDa return rmse_value_train, rmse_value_test -def test_regression_pipeline_fit_predict_correct(): +REGRESSION_DATA_SOURCES = ['numpy', + 'dataframe', + 'csv', + # 'from_text_meta_file', + # 'from_text_files', + # 'from_json_files', + ] + + +@pytest.mark.parametrize('source', REGRESSION_DATA_SOURCES) +def test_regression_pipeline_fit_predict_correct(source: str): + data = get_regression_data(source) + pipeline = generate_pipeline() + train_data, test_data = train_test_data_setup(data, shuffle=True) + + pipeline.fit(input_data=train_data) + results = pipeline.predict(input_data=test_data) + + rmse_on_test = mse(y_true=test_data.target, + y_pred=results.predict, + squared=False) + + assert rmse_on_test < 0.8 + + +def test_synthetic_regression_pipeline_fit_predict_correct(): data = get_synthetic_regression_data() pipeline = generate_pipeline() diff --git a/test/unit/validation/test_table_cv.py b/test/unit/validation/test_table_cv.py index 8ec6ce1a16..5f469e298e 100644 --- a/test/unit/validation/test_table_cv.py +++ b/test/unit/validation/test_table_cv.py @@ -31,7 +31,7 @@ def sample_pipeline(): def get_classification_data(): - file_path = fedot_project_root().joinpath('test/data/simple_classification.csv') + file_path = fedot_project_root().joinpath('test/data/classification/simple_classification.csv') input_data = InputData.from_csv(file_path, task=Task(TaskTypesEnum.classification)) return input_data