From 4eb61c248163562ad51951014bbd4c036a4708b9 Mon Sep 17 00:00:00 2001
From: Andrew Yin <andrewyin1@gmail.com>
Date: Thu, 29 Jul 2021 14:37:25 -0500
Subject: [PATCH] Add scipy to requirements (#369)

---
 dataprofiler/profilers/categorical_column_profile.py | 11 +----------
 dataprofiler/profilers/numerical_column_stats.py     | 11 +----------
 requirements-ml.txt                                  |  1 -
 requirements.txt                                     |  1 +
 4 files changed, 3 insertions(+), 21 deletions(-)

diff --git a/dataprofiler/profilers/categorical_column_profile.py b/dataprofiler/profilers/categorical_column_profile.py
index 40628339c..53cdfbcea 100644
--- a/dataprofiler/profilers/categorical_column_profile.py
+++ b/dataprofiler/profilers/categorical_column_profile.py
@@ -3,6 +3,7 @@
 from operator import itemgetter
 
 import numpy as np
+import scipy.stats
 
 from . import BaseColumnProfiler
 from .profiler_options import CategoricalOptions
@@ -175,16 +176,6 @@ def _perform_chi_squared_test(categories1, sample_size1,
                               ** 2 / expected2
         results["chi2-statistic"] = chi2_statistic
 
-        try:
-            import scipy.stats
-        except ImportError:
-            # Failed, so we return the stats but don't perform the test
-            warnings.warn("Could not import necessary statistical packages. "
-                          "To successfully perform the chi-squared test, please run 'pip "
-                          "install scipy.' Test results will be incomplete.",
-                          RuntimeWarning)
-            return results
-
         # Calculate p-value, i.e. P(X > chi2_statistic)
         p_value = 1 - scipy.stats.chi2(df).cdf(chi2_statistic)
         results["p-value"] = p_value
diff --git a/dataprofiler/profilers/numerical_column_stats.py b/dataprofiler/profilers/numerical_column_stats.py
index aa2b5a8e0..56d269e23 100644
--- a/dataprofiler/profilers/numerical_column_stats.py
+++ b/dataprofiler/profilers/numerical_column_stats.py
@@ -7,6 +7,7 @@
 from __future__ import print_function
 from __future__ import division
 
+import scipy.stats
 from future.utils import with_metaclass
 import copy
 import abc
@@ -370,16 +371,6 @@ def _perform_t_test(mean1, var1, n1,
         results['conservative']['df'] = conservative_df
         results['welch']['df'] = welch_df
         
-        try:
-            import scipy.stats
-        except ImportError:
-            # Failed, so we return the stats but don't perform the test
-            warnings.warn("Could not import necessary statistical packages. "
-                          "To successfully perform the t-test, please run 'pip "
-                          "install scipy.' T-test results will be incomplete.",
-                          RuntimeWarning)
-            return results
-        # If scipy import was successful, now perform the *two-sided* t-test
         conservative_t = scipy.stats.t(conservative_df)
         conservative_p_val = (1 - conservative_t.cdf(abs(t))) * 2
         welch_t = scipy.stats.t(welch_df)
diff --git a/requirements-ml.txt b/requirements-ml.txt
index f16a960c3..67d3406ae 100644
--- a/requirements-ml.txt
+++ b/requirements-ml.txt
@@ -1,6 +1,5 @@
 scikit-learn>=0.23.2
 keras>=2.4.3
-scipy>=1.4.1
 tensorflow-gpu>=2.3.0; sys.platform == 'linux'
 tensorflow>=2.3.0; sys.platform == 'darwin'
 tqdm>=4.0.0
diff --git a/requirements.txt b/requirements.txt
index 04dee7f8d..fc390e8f1 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -12,3 +12,4 @@ fastavro>=1.0.0.post1
 python-snappy>=0.5.4
 charset-normalizer>=1.3.6
 psutil>=4.0.0
+scipy>=1.4.1