From 4eb61c248163562ad51951014bbd4c036a4708b9 Mon Sep 17 00:00:00 2001 From: Andrew Yin Date: Thu, 29 Jul 2021 14:37:25 -0500 Subject: [PATCH] Add scipy to requirements (#369) --- dataprofiler/profilers/categorical_column_profile.py | 11 +---------- dataprofiler/profilers/numerical_column_stats.py | 11 +---------- requirements-ml.txt | 1 - requirements.txt | 1 + 4 files changed, 3 insertions(+), 21 deletions(-) diff --git a/dataprofiler/profilers/categorical_column_profile.py b/dataprofiler/profilers/categorical_column_profile.py index 40628339c..53cdfbcea 100644 --- a/dataprofiler/profilers/categorical_column_profile.py +++ b/dataprofiler/profilers/categorical_column_profile.py @@ -3,6 +3,7 @@ from operator import itemgetter import numpy as np +import scipy.stats from . import BaseColumnProfiler from .profiler_options import CategoricalOptions @@ -175,16 +176,6 @@ def _perform_chi_squared_test(categories1, sample_size1, ** 2 / expected2 results["chi2-statistic"] = chi2_statistic - try: - import scipy.stats - except ImportError: - # Failed, so we return the stats but don't perform the test - warnings.warn("Could not import necessary statistical packages. " - "To successfully perform the chi-squared test, please run 'pip " - "install scipy.' Test results will be incomplete.", - RuntimeWarning) - return results - # Calculate p-value, i.e. P(X > chi2_statistic) p_value = 1 - scipy.stats.chi2(df).cdf(chi2_statistic) results["p-value"] = p_value diff --git a/dataprofiler/profilers/numerical_column_stats.py b/dataprofiler/profilers/numerical_column_stats.py index aa2b5a8e0..56d269e23 100644 --- a/dataprofiler/profilers/numerical_column_stats.py +++ b/dataprofiler/profilers/numerical_column_stats.py @@ -7,6 +7,7 @@ from __future__ import print_function from __future__ import division +import scipy.stats from future.utils import with_metaclass import copy import abc @@ -370,16 +371,6 @@ def _perform_t_test(mean1, var1, n1, results['conservative']['df'] = conservative_df results['welch']['df'] = welch_df - try: - import scipy.stats - except ImportError: - # Failed, so we return the stats but don't perform the test - warnings.warn("Could not import necessary statistical packages. " - "To successfully perform the t-test, please run 'pip " - "install scipy.' T-test results will be incomplete.", - RuntimeWarning) - return results - # If scipy import was successful, now perform the *two-sided* t-test conservative_t = scipy.stats.t(conservative_df) conservative_p_val = (1 - conservative_t.cdf(abs(t))) * 2 welch_t = scipy.stats.t(welch_df) diff --git a/requirements-ml.txt b/requirements-ml.txt index f16a960c3..67d3406ae 100644 --- a/requirements-ml.txt +++ b/requirements-ml.txt @@ -1,6 +1,5 @@ scikit-learn>=0.23.2 keras>=2.4.3 -scipy>=1.4.1 tensorflow-gpu>=2.3.0; sys.platform == 'linux' tensorflow>=2.3.0; sys.platform == 'darwin' tqdm>=4.0.0 diff --git a/requirements.txt b/requirements.txt index 04dee7f8d..fc390e8f1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,3 +12,4 @@ fastavro>=1.0.0.post1 python-snappy>=0.5.4 charset-normalizer>=1.3.6 psutil>=4.0.0 +scipy>=1.4.1