From 2ff709c0f3d40c15e43afcd84e94edae0827b4da Mon Sep 17 00:00:00 2001
From: Tim Chen <115333718+951378644@users.noreply.github.com>
Date: Tue, 19 Sep 2023 13:18:48 -0400
Subject: [PATCH 01/34] Create PSI Detector.py

PSI Drift Detector
---
 PSI Detector.py | 124 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 124 insertions(+)
 create mode 100644 PSI Detector.py

diff --git a/PSI Detector.py b/PSI Detector.py
new file mode 100644
index 00000000..fd6ebac9
--- /dev/null
+++ b/PSI Detector.py	
@@ -0,0 +1,124 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+# In[57]:
+
+
+from detector import *
+class PSI_Detector(BatchDetector):
+    
+    input_type = "batch"
+
+    def __init__(
+        self,
+        
+    ):
+
+        # This initializes batch detector's parent class
+        super().__init__() 
+        
+        # Initialize any parameters to be used in this algorithm
+
+
+    def set_reference(self, X, y_true=None, y_pred=None):
+       
+        # leave this, it uses the parent class to validate input
+        # and sets the self.reference variable to refer to the reference dataset 
+     
+        X, _, _ = super()._validate_input(X, None, None)
+        X = pd.DataFrame(
+            X, columns=self._input_cols
+        )  
+
+        # Initialize reference dataset
+        self.reference = copy.deepcopy(X)
+
+        self.reset() 
+        
+        
+    def reset(self):
+
+
+        super().reset()
+
+        
+    def update(self, X, by_feature=True, X_by_feature=None, y_true=None, y_pred=None):
+        
+        # this function will update the detector with the test batch 
+        eps = 1e-4
+        # create a variable to store psi values for each feature 
+        feature_psi = []
+        o = pd.DataFrame(
+            columns=["Column", 'Moderate population change', 'Significant population change',"PSI"])
+        z = 0
+        # 1. iterate through each feature in reference and test data, identify minimum and maximum value 
+        for column in self.reference.columns:
+            min_val = min(min(self.reference[column]), min(X[column]))
+            max_val = max(max(self.reference[column]), max(X[column]))
+            
+             # 2. use _bin_data function to bucketize reference, append to reference buckets array
+            bins = self._bin_data(self.reference[column],min_val,max_val)
+            bins_initial = pd.cut(self.reference[column], bins = bins, labels = range(1,len(bins)))
+            df_initial = pd.DataFrame({'initial': self.reference[column], 'bin': bins_initial})
+            grp_initial = df_initial.groupby('bin').count()
+            grp_initial['percent_initial'] = grp_initial['initial'] / sum(grp_initial['initial'])
+             # 3. use _bin_data function to bucketize test, append to reference test array 
+            bins_new = pd.cut(X[column], bins = bins, labels = range(1,len(bins)))
+            df_new = pd.DataFrame({'new': X[column], 'bin': bins_new})
+            grp_new = df_new.groupby('bin').count()
+            grp_new['percent_new'] = grp_new['new'] / sum(grp_new['new'])
+            # 4. Call PSI function to calculate PSI on test and reference bucket representation,
+            psi_value = self._PSI(grp_initial,grp_new)
+            feature_psi.append([column,psi_value])
+            # store PSI for each feature in feature_psi array 
+            if psi_value >= 0.1 and psi_value <= 0.2:
+                o.loc[z] = [column,'Yes','No',psi_value]
+                z += 1
+            elif psi_value > 0.2:
+                o.loc[z] = [column,'No','Yes',psi_value]
+                z += 1
+        # 5. Aggregate PSI values to determine if dataset is drifting
+        if o.any()['Column'] == True:
+            self.drift_state == 'drift'
+            return feature_psi, o
+        else:
+            return 'no drift detected',feature_psi
+        # If PSI indicates drift, set self.drift_state == 'drift'
+        
+        # Create a dictionary to store if each individual feature is drifting 
+
+        # Update self.reference dataset to refer to test data, X 
+        self.reference = X
+
+        
+
+    def _bin_data(self, feature, min, max):
+        eps = 1e-4
+        if len(feature.unique()) < 10:
+            bins = [min + (max - min)*(i)/len(feature.unique()) for i in range(len(feature.unique())+1)]
+            bins[0] = min - eps # Correct the lower boundary
+            bins[-1] = max + eps # Correct the higher boundary
+            return bins
+        else:
+            bins = [min + (max - min)*(i)/10 for i in range(10+1)]
+            bins[0] = min - eps # Correct the lower boundary
+            bins[-1] = max + eps # Correct the higher boundary
+            return bins
+            # return an array containing the sample counts within each bucket 
+
+
+    def _PSI(self, reference_feature, test_feature):
+        eps = 1e-4
+        # Compare the bins to calculate PSI
+        psi_df = reference_feature.join(test_feature, on = "bin", how = "inner")
+    
+        # Add a small value for when the percent is zero
+        psi_df['percent_initial'] = psi_df['percent_initial'].apply(lambda x: eps if x == 0 else x)
+        psi_df['percent_new'] = psi_df['percent_new'].apply(lambda x: eps if x == 0 else x)
+    
+        # Calculate the psi
+        psi_df['psi'] = (psi_df['percent_initial'] - psi_df['percent_new']) * np.log(psi_df['percent_initial'] / psi_df['percent_new'])
+    
+        # Return the mean of psi values
+        return np.mean(psi_df['psi'])
+

From 2c4a9e38673f8f6e3a2406582dd5498d30fcfed4 Mon Sep 17 00:00:00 2001
From: Tim Chen <115333718+951378644@users.noreply.github.com>
Date: Tue, 19 Sep 2023 15:06:48 -0400
Subject: [PATCH 02/34] Move file to a different location

---
 PSI Detector.py => menelaus/data_drift/PSI Detector.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename PSI Detector.py => menelaus/data_drift/PSI Detector.py (100%)

diff --git a/PSI Detector.py b/menelaus/data_drift/PSI Detector.py
similarity index 100%
rename from PSI Detector.py
rename to menelaus/data_drift/PSI Detector.py

From 360f552419a32ec43de92e0f22f9a5a8c8e24634 Mon Sep 17 00:00:00 2001
From: Tim Chen <115333718+951378644@users.noreply.github.com>
Date: Tue, 19 Sep 2023 15:19:52 -0400
Subject: [PATCH 03/34] Update PSI Detector.py

---
 menelaus/data_drift/PSI Detector.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/menelaus/data_drift/PSI Detector.py b/menelaus/data_drift/PSI Detector.py
index fd6ebac9..e8842e1c 100644
--- a/menelaus/data_drift/PSI Detector.py	
+++ b/menelaus/data_drift/PSI Detector.py	
@@ -1,10 +1,6 @@
-#!/usr/bin/env python
-# coding: utf-8
-
-# In[57]:
+from detector import *
 
 
-from detector import *
 class PSI_Detector(BatchDetector):
     
     input_type = "batch"

From fd3cac56c6539fd7b5a74b5311942e32601d2b1e Mon Sep 17 00:00:00 2001
From: Tim Chen <115333718+951378644@users.noreply.github.com>
Date: Tue, 19 Sep 2023 15:22:21 -0400
Subject: [PATCH 04/34] Update PSI Detector.py

---
 menelaus/data_drift/PSI Detector.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/menelaus/data_drift/PSI Detector.py b/menelaus/data_drift/PSI Detector.py
index e8842e1c..27fac056 100644
--- a/menelaus/data_drift/PSI Detector.py	
+++ b/menelaus/data_drift/PSI Detector.py	
@@ -1,4 +1,4 @@
-from detector import *
+from menelaus.detector import BatchDetector
 
 
 class PSI_Detector(BatchDetector):

From b806132f0f99903c7e2022c04a381caf520e712c Mon Sep 17 00:00:00 2001
From: Tim Chen <115333718+951378644@users.noreply.github.com>
Date: Tue, 19 Sep 2023 15:27:55 -0400
Subject: [PATCH 05/34] Update PSI Detector.py

---
 menelaus/data_drift/PSI Detector.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/menelaus/data_drift/PSI Detector.py b/menelaus/data_drift/PSI Detector.py
index 27fac056..7d638c8b 100644
--- a/menelaus/data_drift/PSI Detector.py	
+++ b/menelaus/data_drift/PSI Detector.py	
@@ -1,5 +1,6 @@
 from menelaus.detector import BatchDetector
-
+import pandas as pd
+import numpy as np
 
 class PSI_Detector(BatchDetector):
     

From 6e18b840e39294b64467a6d16c7246b6ecb384a3 Mon Sep 17 00:00:00 2001
From: Tim Chen <115333718+951378644@users.noreply.github.com>
Date: Tue, 19 Sep 2023 15:46:04 -0400
Subject: [PATCH 06/34] Update PSI Detector.py

---
 menelaus/data_drift/PSI Detector.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/menelaus/data_drift/PSI Detector.py b/menelaus/data_drift/PSI Detector.py
index 7d638c8b..b1a9d67a 100644
--- a/menelaus/data_drift/PSI Detector.py	
+++ b/menelaus/data_drift/PSI Detector.py	
@@ -1,6 +1,8 @@
 from menelaus.detector import BatchDetector
 import pandas as pd
 import numpy as np
+import copy
+
 
 class PSI_Detector(BatchDetector):
     

From 5c7de0ccf4c07cb1bb51922fcbe2ff1e8733e04b Mon Sep 17 00:00:00 2001
From: Tim Chen <115333718+951378644@users.noreply.github.com>
Date: Tue, 19 Sep 2023 16:06:29 -0400
Subject: [PATCH 07/34] rename

---
 menelaus/data_drift/{PSI Detector.py => psi_detector.py} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename menelaus/data_drift/{PSI Detector.py => psi_detector.py} (100%)

diff --git a/menelaus/data_drift/PSI Detector.py b/menelaus/data_drift/psi_detector.py
similarity index 100%
rename from menelaus/data_drift/PSI Detector.py
rename to menelaus/data_drift/psi_detector.py

From 5c0039f671e9192d0dc006c58fdda3bfc2ea9601 Mon Sep 17 00:00:00 2001
From: Tim Chen <115333718+951378644@users.noreply.github.com>
Date: Tue, 19 Sep 2023 16:16:50 -0400
Subject: [PATCH 08/34] Reformatted code with black

---
 menelaus/data_drift/psi_detector.py | 135 ++++++++++++++--------------
 1 file changed, 70 insertions(+), 65 deletions(-)

diff --git a/menelaus/data_drift/psi_detector.py b/menelaus/data_drift/psi_detector.py
index b1a9d67a..a0fd1ec3 100644
--- a/menelaus/data_drift/psi_detector.py
+++ b/menelaus/data_drift/psi_detector.py
@@ -5,119 +5,124 @@
 
 
 class PSI_Detector(BatchDetector):
-    
     input_type = "batch"
 
     def __init__(
         self,
-        
     ):
-
         # This initializes batch detector's parent class
-        super().__init__() 
-        
-        # Initialize any parameters to be used in this algorithm
+        super().__init__()
 
+        # Initialize any parameters to be used in this algorithm
 
     def set_reference(self, X, y_true=None, y_pred=None):
-       
         # leave this, it uses the parent class to validate input
-        # and sets the self.reference variable to refer to the reference dataset 
-     
+        # and sets the self.reference variable to refer to the reference dataset
+
         X, _, _ = super()._validate_input(X, None, None)
-        X = pd.DataFrame(
-            X, columns=self._input_cols
-        )  
+        X = pd.DataFrame(X, columns=self._input_cols)
 
         # Initialize reference dataset
         self.reference = copy.deepcopy(X)
 
-        self.reset() 
-        
-        
-    def reset(self):
-
+        self.reset()
 
+    def reset(self):
         super().reset()
 
-        
     def update(self, X, by_feature=True, X_by_feature=None, y_true=None, y_pred=None):
-        
-        # this function will update the detector with the test batch 
+        # this function will update the detector with the test batch
         eps = 1e-4
-        # create a variable to store psi values for each feature 
+        # create a variable to store psi values for each feature
         feature_psi = []
         o = pd.DataFrame(
-            columns=["Column", 'Moderate population change', 'Significant population change',"PSI"])
+            columns=[
+                "Column",
+                "Moderate population change",
+                "Significant population change",
+                "PSI",
+            ]
+        )
         z = 0
-        # 1. iterate through each feature in reference and test data, identify minimum and maximum value 
+        # 1. iterate through each feature in reference and test data, identify minimum and maximum value
         for column in self.reference.columns:
             min_val = min(min(self.reference[column]), min(X[column]))
             max_val = max(max(self.reference[column]), max(X[column]))
-            
-             # 2. use _bin_data function to bucketize reference, append to reference buckets array
-            bins = self._bin_data(self.reference[column],min_val,max_val)
-            bins_initial = pd.cut(self.reference[column], bins = bins, labels = range(1,len(bins)))
-            df_initial = pd.DataFrame({'initial': self.reference[column], 'bin': bins_initial})
-            grp_initial = df_initial.groupby('bin').count()
-            grp_initial['percent_initial'] = grp_initial['initial'] / sum(grp_initial['initial'])
-             # 3. use _bin_data function to bucketize test, append to reference test array 
-            bins_new = pd.cut(X[column], bins = bins, labels = range(1,len(bins)))
-            df_new = pd.DataFrame({'new': X[column], 'bin': bins_new})
-            grp_new = df_new.groupby('bin').count()
-            grp_new['percent_new'] = grp_new['new'] / sum(grp_new['new'])
+
+            # 2. use _bin_data function to bucketize reference, append to reference buckets array
+            bins = self._bin_data(self.reference[column], min_val, max_val)
+            bins_initial = pd.cut(
+                self.reference[column], bins=bins, labels=range(1, len(bins))
+            )
+            df_initial = pd.DataFrame(
+                {"initial": self.reference[column], "bin": bins_initial}
+            )
+            grp_initial = df_initial.groupby("bin").count()
+            grp_initial["percent_initial"] = grp_initial["initial"] / sum(
+                grp_initial["initial"]
+            )
+            # 3. use _bin_data function to bucketize test, append to reference test array
+            bins_new = pd.cut(X[column], bins=bins, labels=range(1, len(bins)))
+            df_new = pd.DataFrame({"new": X[column], "bin": bins_new})
+            grp_new = df_new.groupby("bin").count()
+            grp_new["percent_new"] = grp_new["new"] / sum(grp_new["new"])
             # 4. Call PSI function to calculate PSI on test and reference bucket representation,
-            psi_value = self._PSI(grp_initial,grp_new)
-            feature_psi.append([column,psi_value])
-            # store PSI for each feature in feature_psi array 
+            psi_value = self._PSI(grp_initial, grp_new)
+            feature_psi.append([column, psi_value])
+            # store PSI for each feature in feature_psi array
             if psi_value >= 0.1 and psi_value <= 0.2:
-                o.loc[z] = [column,'Yes','No',psi_value]
+                o.loc[z] = [column, "Yes", "No", psi_value]
                 z += 1
             elif psi_value > 0.2:
-                o.loc[z] = [column,'No','Yes',psi_value]
+                o.loc[z] = [column, "No", "Yes", psi_value]
                 z += 1
         # 5. Aggregate PSI values to determine if dataset is drifting
-        if o.any()['Column'] == True:
-            self.drift_state == 'drift'
+        if o.any()["Column"] == True:
+            self.drift_state == "drift"
             return feature_psi, o
         else:
-            return 'no drift detected',feature_psi
+            return "no drift detected", feature_psi
         # If PSI indicates drift, set self.drift_state == 'drift'
-        
-        # Create a dictionary to store if each individual feature is drifting 
 
-        # Update self.reference dataset to refer to test data, X 
-        self.reference = X
+        # Create a dictionary to store if each individual feature is drifting
 
-        
+        # Update self.reference dataset to refer to test data, X
+        self.reference = X
 
     def _bin_data(self, feature, min, max):
         eps = 1e-4
         if len(feature.unique()) < 10:
-            bins = [min + (max - min)*(i)/len(feature.unique()) for i in range(len(feature.unique())+1)]
-            bins[0] = min - eps # Correct the lower boundary
-            bins[-1] = max + eps # Correct the higher boundary
+            bins = [
+                min + (max - min) * (i) / len(feature.unique())
+                for i in range(len(feature.unique()) + 1)
+            ]
+            bins[0] = min - eps  # Correct the lower boundary
+            bins[-1] = max + eps  # Correct the higher boundary
             return bins
         else:
-            bins = [min + (max - min)*(i)/10 for i in range(10+1)]
-            bins[0] = min - eps # Correct the lower boundary
-            bins[-1] = max + eps # Correct the higher boundary
+            bins = [min + (max - min) * (i) / 10 for i in range(10 + 1)]
+            bins[0] = min - eps  # Correct the lower boundary
+            bins[-1] = max + eps  # Correct the higher boundary
             return bins
-            # return an array containing the sample counts within each bucket 
-
+            # return an array containing the sample counts within each bucket
 
     def _PSI(self, reference_feature, test_feature):
         eps = 1e-4
         # Compare the bins to calculate PSI
-        psi_df = reference_feature.join(test_feature, on = "bin", how = "inner")
-    
+        psi_df = reference_feature.join(test_feature, on="bin", how="inner")
+
         # Add a small value for when the percent is zero
-        psi_df['percent_initial'] = psi_df['percent_initial'].apply(lambda x: eps if x == 0 else x)
-        psi_df['percent_new'] = psi_df['percent_new'].apply(lambda x: eps if x == 0 else x)
-    
+        psi_df["percent_initial"] = psi_df["percent_initial"].apply(
+            lambda x: eps if x == 0 else x
+        )
+        psi_df["percent_new"] = psi_df["percent_new"].apply(
+            lambda x: eps if x == 0 else x
+        )
+
         # Calculate the psi
-        psi_df['psi'] = (psi_df['percent_initial'] - psi_df['percent_new']) * np.log(psi_df['percent_initial'] / psi_df['percent_new'])
-    
-        # Return the mean of psi values
-        return np.mean(psi_df['psi'])
+        psi_df["psi"] = (psi_df["percent_initial"] - psi_df["percent_new"]) * np.log(
+            psi_df["percent_initial"] / psi_df["percent_new"]
+        )
 
+        # Return the mean of psi values
+        return np.mean(psi_df["psi"])

From 2b808ab1cb7e6f24339fe046246c09cf333a9888 Mon Sep 17 00:00:00 2001
From: Thomas Schill <33845624+tms-bananaquit@users.noreply.github.com>
Date: Fri, 17 Nov 2023 13:01:10 -0500
Subject: [PATCH 09/34] add PSI to data_drift.init, add skeleton for unit tests

---
 menelaus/data_drift/__init__.py                |  7 ++++---
 tests/menelaus/data_drift/test_psi_detector.py | 16 ++++++++++++++++
 2 files changed, 20 insertions(+), 3 deletions(-)
 create mode 100644 tests/menelaus/data_drift/test_psi_detector.py

diff --git a/menelaus/data_drift/__init__.py b/menelaus/data_drift/__init__.py
index c14608c8..67f121ad 100644
--- a/menelaus/data_drift/__init__.py
+++ b/menelaus/data_drift/__init__.py
@@ -13,9 +13,10 @@
 are applied and when the results are verified. Data drift detection is also
 applicable in unsupervised learning settings.
 """
+from menelaus.data_drift.cdbd import CDBD
 from menelaus.data_drift.hdddm import HDDDM
+from menelaus.data_drift.histogram_density_method import HistogramDensityMethod
 from menelaus.data_drift.kdq_tree import KdqTreeStreaming, KdqTreeBatch
-from menelaus.data_drift.pca_cd import PCACD
 from menelaus.data_drift.nndvi import NNDVI
-from menelaus.data_drift.cdbd import CDBD
-from menelaus.data_drift.histogram_density_method import HistogramDensityMethod
+from menelaus.data_drift.pca_cd import PCACD
+from menelaus.data_drift.psi_detector import PSI
diff --git a/tests/menelaus/data_drift/test_psi_detector.py b/tests/menelaus/data_drift/test_psi_detector.py
new file mode 100644
index 00000000..efe8c3b3
--- /dev/null
+++ b/tests/menelaus/data_drift/test_psi_detector.py
@@ -0,0 +1,16 @@
+import pytest
+import numpy as np
+from menelaus.data_drift.psi_detector import PSI
+
+def test_validation(...):
+    ...
+
+def test_set_reference(...):
+    ...
+
+def test_no_drift(...):
+    ...
+
+
+def test_drift(...):
+    ...
\ No newline at end of file

From 954157413479e8a37f72824b47f0ccc832eaef1e Mon Sep 17 00:00:00 2001
From: Tim Chen <115333718+951378644@users.noreply.github.com>
Date: Tue, 21 Nov 2023 16:36:24 -0500
Subject: [PATCH 10/34] Update refs.bib

Adding PSI
---
 docs/source/refs.bib | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/docs/source/refs.bib b/docs/source/refs.bib
index 897cb234..2c28700d 100644
--- a/docs/source/refs.bib
+++ b/docs/source/refs.bib
@@ -183,4 +183,20 @@ @misc{souza2020
   year={2020},
   howpublished="\url{https://arxiv.org/abs/2005.00113}",
   note={Online; accessed 20-July-2022},
-}
\ No newline at end of file
+}
+
+@misc{Psi2022,
+  title={Is your ML model stable? Checking model stability and population drift with PSI and CSI},
+  author={Vinícius Trevisan},
+  year={2022},
+  howpublished="\url{https://towardsdatascience.com/checking-model-stability-and-population-shift-with-psi-and-csi-6d12af008783}",
+  note={Online; accessed 20-June-2023},
+}
+
+@misc{Psi2022,
+  title={Population Stability Index (PSI)},
+  author={Selva Prabhakaran},
+  year={2022},
+  howpublished="\url{https://www.machinelearningplus.com/deployment/population-stability-index-psi/}",
+  note={Online; accessed 20-June-2023},
+}

From 395d84003d5c0ee2d466b348ca5ec438189b68ba Mon Sep 17 00:00:00 2001
From: Tim Chen <115333718+951378644@users.noreply.github.com>
Date: Tue, 21 Nov 2023 18:28:24 -0500
Subject: [PATCH 11/34] Update psi_detector.py

---
 menelaus/data_drift/psi_detector.py | 221 +++++++++++++++-------------
 1 file changed, 118 insertions(+), 103 deletions(-)

diff --git a/menelaus/data_drift/psi_detector.py b/menelaus/data_drift/psi_detector.py
index a0fd1ec3..9e84e6ce 100644
--- a/menelaus/data_drift/psi_detector.py
+++ b/menelaus/data_drift/psi_detector.py
@@ -1,128 +1,143 @@
 from menelaus.detector import BatchDetector
 import pandas as pd
 import numpy as np
-import copy
-
-
 class PSI_Detector(BatchDetector):
+    """
+    Parent class for PSI-based drift detector, it serves as a fundamental framework for batch data applications.
+    
+    The PSI (Population Stability Index) is employed for detecting distributional shifts between a reference population
+    and a comparison population. This detector assesses changes by calculating the PSI, which measures the distributional
+    change based on percentiles. The psi function in the detector compares the distributions of scores in reference and 
+    test populations and calculates the PSI values for different bins.
+    
+    In summary, the PSI drift detector provides a robust mechanism for monitoring and detecting distributional changes in 
+    populations, making it adaptable for various data settings and applications.
+    
+    Ref. :cite:t:`Psi2022`
+    """
     input_type = "batch"
 
     def __init__(
         self,
+        eps=1e-4,
+        threshold=0.1
     ):
-        # This initializes batch detector's parent class
-        super().__init__()
+        """
+        Args:
+            eps:The eps parameter in the function represents a small constant (1e-4) introduced to prevent division by zero 
+                when calculating percentages, ensuring numerical stability.
+            threshold: It represents the threshold for detecting drift, if the calculated PSI value for a feature exceeds 
+                this threshold, it indicates drift in that feature, and the drift_state is set to 'drift'. This threshold is a 
+                user-defined value, and when crossed, it signifies a significant distributional change between the reference 
+                and test populations.
+        """
+        super().__init__() 
+        self.eps = eps
+        self.threshold = threshold
 
-        # Initialize any parameters to be used in this algorithm
 
     def set_reference(self, X, y_true=None, y_pred=None):
-        # leave this, it uses the parent class to validate input
-        # and sets the self.reference variable to refer to the reference dataset
-
+        """
+        Set the detector's reference batch to an updated value; typically
+        used in ``update``.
+
+        Attributes:
+            X (numpy.array): updated reference batch
+            y_true (numpy.array): true labels, not used in NNDVI
+            y_pred (numpy.array): predicted labels, not used in NNDVI
+        """
         X, _, _ = super()._validate_input(X, None, None)
-        X = pd.DataFrame(X, columns=self._input_cols)
-
-        # Initialize reference dataset
-        self.reference = copy.deepcopy(X)
-
-        self.reset()
-
+        self.reference = X.reshape(len(X),)
+        
+        
     def reset(self):
+        """
+        Initialize relevant attributes to original values, to ensure information
+        only stored from samples_since_reset onwards. Intended for use
+        after ``drift_state == 'drift'``.
+        """
         super().reset()
 
-    def update(self, X, by_feature=True, X_by_feature=None, y_true=None, y_pred=None):
-        # this function will update the detector with the test batch
-        eps = 1e-4
-        # create a variable to store psi values for each feature
-        feature_psi = []
-        o = pd.DataFrame(
-            columns=[
-                "Column",
-                "Moderate population change",
-                "Significant population change",
-                "PSI",
-            ]
-        )
-        z = 0
-        # 1. iterate through each feature in reference and test data, identify minimum and maximum value
-        for column in self.reference.columns:
-            min_val = min(min(self.reference[column]), min(X[column]))
-            max_val = max(max(self.reference[column]), max(X[column]))
-
-            # 2. use _bin_data function to bucketize reference, append to reference buckets array
-            bins = self._bin_data(self.reference[column], min_val, max_val)
-            bins_initial = pd.cut(
-                self.reference[column], bins=bins, labels=range(1, len(bins))
-            )
-            df_initial = pd.DataFrame(
-                {"initial": self.reference[column], "bin": bins_initial}
-            )
-            grp_initial = df_initial.groupby("bin").count()
-            grp_initial["percent_initial"] = grp_initial["initial"] / sum(
-                grp_initial["initial"]
-            )
-            # 3. use _bin_data function to bucketize test, append to reference test array
-            bins_new = pd.cut(X[column], bins=bins, labels=range(1, len(bins)))
-            df_new = pd.DataFrame({"new": X[column], "bin": bins_new})
-            grp_new = df_new.groupby("bin").count()
-            grp_new["percent_new"] = grp_new["new"] / sum(grp_new["new"])
-            # 4. Call PSI function to calculate PSI on test and reference bucket representation,
-            psi_value = self._PSI(grp_initial, grp_new)
-            feature_psi.append([column, psi_value])
-            # store PSI for each feature in feature_psi array
-            if psi_value >= 0.1 and psi_value <= 0.2:
-                o.loc[z] = [column, "Yes", "No", psi_value]
-                z += 1
-            elif psi_value > 0.2:
-                o.loc[z] = [column, "No", "Yes", psi_value]
-                z += 1
-        # 5. Aggregate PSI values to determine if dataset is drifting
-        if o.any()["Column"] == True:
-            self.drift_state == "drift"
-            return feature_psi, o
-        else:
-            return "no drift detected", feature_psi
-        # If PSI indicates drift, set self.drift_state == 'drift'
+        
+    def update(self, X: np.array, y_true=None, y_pred=None):
+        """
+        Update the detector with a new test batch. If drift is detected, new
+        reference batch becomes most recent test batch.
 
-        # Create a dictionary to store if each individual feature is drifting
+        Args:
+          X (numpy.array): next batch of data to detect drift on.
+          y_true (numpy.array): true labels, not used in PSI
+          y_pred (numpy.array): predicted labels, not used in PSI
+        """ 
+        if self._drift_state == "drift":
+            self.reset()
 
-        # Update self.reference dataset to refer to test data, X
-        self.reference = X
+        X, _, _ = super()._validate_input(X, None, None)
+
+        super().update(X=X, y_true=None, y_pred=None)
+        test_batch = (np.array(X)).reshape(len(X),)
+        min_val = min(min(self.reference), min(test_batch))
+        max_val = max(max(self.reference), max(test_batch))
+        bins = self._bin_data(self.reference,min_val,max_val)
+        bins_initial = pd.cut(self.reference, bins = bins, labels = range(1,len(bins)))
+        df_initial = pd.DataFrame({'initial': self.reference, 'bin': bins_initial})
+        grp_initial = df_initial.groupby('bin').count()
+        grp_initial['percent_initial'] = grp_initial['initial'] / sum(grp_initial['initial'])
+        bins_new = pd.cut(test_batch, bins = bins, labels = range(1,len(bins)))
+        df_new = pd.DataFrame({'new': test_batch, 'bin': bins_new})
+        grp_new = df_new.groupby('bin').count()
+        grp_new['percent_new'] = grp_new['new'] / sum(grp_new['new'])
+        psi_value = self._PSI(grp_initial,grp_new)
+        self.PSI_value = psi_value
+        if psi_value >= self.threshold:
+            self._drift_state = "drift"
+            self.set_reference(test_batch)
 
     def _bin_data(self, feature, min, max):
-        eps = 1e-4
-        if len(feature.unique()) < 10:
-            bins = [
-                min + (max - min) * (i) / len(feature.unique())
-                for i in range(len(feature.unique()) + 1)
-            ]
-            bins[0] = min - eps  # Correct the lower boundary
-            bins[-1] = max + eps  # Correct the higher boundary
+        """
+        Bin the given feature based on the specified minimum and maximum values.
+
+        Args:
+            feature (numpy.array): The feature to be binned.
+            min (float): The minimum value for binning.
+            max (float): The maximum value for binning.
+
+        Returns:
+            list: A list of bin edges for the given feature.
+        """
+        if len(np.unique(feature)) < 10:
+            bins = [min + (max - min)*(i)/len(np.unique(feature)) for i in range(len(np.unique(feature))+1)]
+            bins[0] = min - self.eps
+            bins[-1] = max + self.eps
             return bins
         else:
-            bins = [min + (max - min) * (i) / 10 for i in range(10 + 1)]
-            bins[0] = min - eps  # Correct the lower boundary
-            bins[-1] = max + eps  # Correct the higher boundary
+            bins = [min + (max - min)*(i)/10 for i in range(10+1)]
+            bins[0] = min - self.eps
+            bins[-1] = max + self.eps
             return bins
-            # return an array containing the sample counts within each bucket
+
 
     def _PSI(self, reference_feature, test_feature):
-        eps = 1e-4
-        # Compare the bins to calculate PSI
-        psi_df = reference_feature.join(test_feature, on="bin", how="inner")
-
-        # Add a small value for when the percent is zero
-        psi_df["percent_initial"] = psi_df["percent_initial"].apply(
-            lambda x: eps if x == 0 else x
-        )
-        psi_df["percent_new"] = psi_df["percent_new"].apply(
-            lambda x: eps if x == 0 else x
-        )
-
-        # Calculate the psi
-        psi_df["psi"] = (psi_df["percent_initial"] - psi_df["percent_new"]) * np.log(
-            psi_df["percent_initial"] / psi_df["percent_new"]
-        )
-
-        # Return the mean of psi values
-        return np.mean(psi_df["psi"])
+        """
+        Calculate the Population Stability Index (PSI) between reference and test features.
+
+        Args:
+            reference_feature (pandas.DataFrame): Reference feature distribution.
+            test_feature (pandas.DataFrame): Test feature distribution.
+
+        Returns:
+            float: The calculated PSI value indicating distributional change.
+        """
+        psi_df = reference_feature.join(test_feature, on = "bin", how = "inner")
+        psi_df['percent_initial'] = psi_df['percent_initial'].apply(lambda x: self.eps if x == 0 else x)
+        psi_df['percent_new'] = psi_df['percent_new'].apply(lambda x: self.eps if x == 0 else x)
+        psi_df['psi'] = (psi_df['percent_initial'] - psi_df['percent_new']) * np.log(psi_df['percent_initial'] / psi_df['percent_new'])
+        return np.mean(psi_df['psi'])
+    def PSI_value(self):
+        """
+        Get the last calculated PSI value.
+
+        Returns:
+            float: The PSI value from the most recent update.
+        """
+        return self.PSI_value

From 6a131c9fc2a8ce8e181f863038bc80d463de70ed Mon Sep 17 00:00:00 2001
From: Tim Chen <115333718+951378644@users.noreply.github.com>
Date: Tue, 21 Nov 2023 18:41:59 -0500
Subject: [PATCH 12/34] formating code with black

---
 menelaus/data_drift/psi_detector.py | 91 ++++++++++++++++-------------
 1 file changed, 51 insertions(+), 40 deletions(-)

diff --git a/menelaus/data_drift/psi_detector.py b/menelaus/data_drift/psi_detector.py
index 9e84e6ce..3abc9d76 100644
--- a/menelaus/data_drift/psi_detector.py
+++ b/menelaus/data_drift/psi_detector.py
@@ -1,41 +1,39 @@
 from menelaus.detector import BatchDetector
 import pandas as pd
 import numpy as np
+
+
 class PSI_Detector(BatchDetector):
     """
     Parent class for PSI-based drift detector, it serves as a fundamental framework for batch data applications.
-    
+
     The PSI (Population Stability Index) is employed for detecting distributional shifts between a reference population
     and a comparison population. This detector assesses changes by calculating the PSI, which measures the distributional
-    change based on percentiles. The psi function in the detector compares the distributions of scores in reference and 
+    change based on percentiles. The psi function in the detector compares the distributions of scores in reference and
     test populations and calculates the PSI values for different bins.
-    
-    In summary, the PSI drift detector provides a robust mechanism for monitoring and detecting distributional changes in 
+
+    In summary, the PSI drift detector provides a robust mechanism for monitoring and detecting distributional changes in
     populations, making it adaptable for various data settings and applications.
-    
+
     Ref. :cite:t:`Psi2022`
     """
+
     input_type = "batch"
 
-    def __init__(
-        self,
-        eps=1e-4,
-        threshold=0.1
-    ):
+    def __init__(self, eps=1e-4, threshold=0.1):
         """
         Args:
-            eps:The eps parameter in the function represents a small constant (1e-4) introduced to prevent division by zero 
+            eps:The eps parameter in the function represents a small constant (1e-4) introduced to prevent division by zero
                 when calculating percentages, ensuring numerical stability.
-            threshold: It represents the threshold for detecting drift, if the calculated PSI value for a feature exceeds 
-                this threshold, it indicates drift in that feature, and the drift_state is set to 'drift'. This threshold is a 
-                user-defined value, and when crossed, it signifies a significant distributional change between the reference 
+            threshold: It represents the threshold for detecting drift, if the calculated PSI value for a feature exceeds
+                this threshold, it indicates drift in that feature, and the drift_state is set to 'drift'. This threshold is a
+                user-defined value, and when crossed, it signifies a significant distributional change between the reference
                 and test populations.
         """
-        super().__init__() 
+        super().__init__()
         self.eps = eps
         self.threshold = threshold
 
-
     def set_reference(self, X, y_true=None, y_pred=None):
         """
         Set the detector's reference batch to an updated value; typically
@@ -47,9 +45,10 @@ def set_reference(self, X, y_true=None, y_pred=None):
             y_pred (numpy.array): predicted labels, not used in NNDVI
         """
         X, _, _ = super()._validate_input(X, None, None)
-        self.reference = X.reshape(len(X),)
-        
-        
+        self.reference = X.reshape(
+            len(X),
+        )
+
     def reset(self):
         """
         Initialize relevant attributes to original values, to ensure information
@@ -58,7 +57,6 @@ def reset(self):
         """
         super().reset()
 
-        
     def update(self, X: np.array, y_true=None, y_pred=None):
         """
         Update the detector with a new test batch. If drift is detected, new
@@ -68,26 +66,30 @@ def update(self, X: np.array, y_true=None, y_pred=None):
           X (numpy.array): next batch of data to detect drift on.
           y_true (numpy.array): true labels, not used in PSI
           y_pred (numpy.array): predicted labels, not used in PSI
-        """ 
+        """
         if self._drift_state == "drift":
             self.reset()
 
         X, _, _ = super()._validate_input(X, None, None)
 
         super().update(X=X, y_true=None, y_pred=None)
-        test_batch = (np.array(X)).reshape(len(X),)
+        test_batch = (np.array(X)).reshape(
+            len(X),
+        )
         min_val = min(min(self.reference), min(test_batch))
         max_val = max(max(self.reference), max(test_batch))
-        bins = self._bin_data(self.reference,min_val,max_val)
-        bins_initial = pd.cut(self.reference, bins = bins, labels = range(1,len(bins)))
-        df_initial = pd.DataFrame({'initial': self.reference, 'bin': bins_initial})
-        grp_initial = df_initial.groupby('bin').count()
-        grp_initial['percent_initial'] = grp_initial['initial'] / sum(grp_initial['initial'])
-        bins_new = pd.cut(test_batch, bins = bins, labels = range(1,len(bins)))
-        df_new = pd.DataFrame({'new': test_batch, 'bin': bins_new})
-        grp_new = df_new.groupby('bin').count()
-        grp_new['percent_new'] = grp_new['new'] / sum(grp_new['new'])
-        psi_value = self._PSI(grp_initial,grp_new)
+        bins = self._bin_data(self.reference, min_val, max_val)
+        bins_initial = pd.cut(self.reference, bins=bins, labels=range(1, len(bins)))
+        df_initial = pd.DataFrame({"initial": self.reference, "bin": bins_initial})
+        grp_initial = df_initial.groupby("bin").count()
+        grp_initial["percent_initial"] = grp_initial["initial"] / sum(
+            grp_initial["initial"]
+        )
+        bins_new = pd.cut(test_batch, bins=bins, labels=range(1, len(bins)))
+        df_new = pd.DataFrame({"new": test_batch, "bin": bins_new})
+        grp_new = df_new.groupby("bin").count()
+        grp_new["percent_new"] = grp_new["new"] / sum(grp_new["new"])
+        psi_value = self._PSI(grp_initial, grp_new)
         self.PSI_value = psi_value
         if psi_value >= self.threshold:
             self._drift_state = "drift"
@@ -106,17 +108,19 @@ def _bin_data(self, feature, min, max):
             list: A list of bin edges for the given feature.
         """
         if len(np.unique(feature)) < 10:
-            bins = [min + (max - min)*(i)/len(np.unique(feature)) for i in range(len(np.unique(feature))+1)]
+            bins = [
+                min + (max - min) * (i) / len(np.unique(feature))
+                for i in range(len(np.unique(feature)) + 1)
+            ]
             bins[0] = min - self.eps
             bins[-1] = max + self.eps
             return bins
         else:
-            bins = [min + (max - min)*(i)/10 for i in range(10+1)]
+            bins = [min + (max - min) * (i) / 10 for i in range(10 + 1)]
             bins[0] = min - self.eps
             bins[-1] = max + self.eps
             return bins
 
-
     def _PSI(self, reference_feature, test_feature):
         """
         Calculate the Population Stability Index (PSI) between reference and test features.
@@ -128,11 +132,18 @@ def _PSI(self, reference_feature, test_feature):
         Returns:
             float: The calculated PSI value indicating distributional change.
         """
-        psi_df = reference_feature.join(test_feature, on = "bin", how = "inner")
-        psi_df['percent_initial'] = psi_df['percent_initial'].apply(lambda x: self.eps if x == 0 else x)
-        psi_df['percent_new'] = psi_df['percent_new'].apply(lambda x: self.eps if x == 0 else x)
-        psi_df['psi'] = (psi_df['percent_initial'] - psi_df['percent_new']) * np.log(psi_df['percent_initial'] / psi_df['percent_new'])
-        return np.mean(psi_df['psi'])
+        psi_df = reference_feature.join(test_feature, on="bin", how="inner")
+        psi_df["percent_initial"] = psi_df["percent_initial"].apply(
+            lambda x: self.eps if x == 0 else x
+        )
+        psi_df["percent_new"] = psi_df["percent_new"].apply(
+            lambda x: self.eps if x == 0 else x
+        )
+        psi_df["psi"] = (psi_df["percent_initial"] - psi_df["percent_new"]) * np.log(
+            psi_df["percent_initial"] / psi_df["percent_new"]
+        )
+        return np.mean(psi_df["psi"])
+
     def PSI_value(self):
         """
         Get the last calculated PSI value.

From 9fed735cfbd69a33909b06e98a25741d42d82e3c Mon Sep 17 00:00:00 2001
From: Tim Chen <115333718+951378644@users.noreply.github.com>
Date: Wed, 22 Nov 2023 14:32:11 -0500
Subject: [PATCH 13/34] Update test_psi_detector.py

---
 .../menelaus/data_drift/test_psi_detector.py  | 74 ++++++++++++++++---
 1 file changed, 65 insertions(+), 9 deletions(-)

diff --git a/tests/menelaus/data_drift/test_psi_detector.py b/tests/menelaus/data_drift/test_psi_detector.py
index efe8c3b3..4f224203 100644
--- a/tests/menelaus/data_drift/test_psi_detector.py
+++ b/tests/menelaus/data_drift/test_psi_detector.py
@@ -1,16 +1,72 @@
 import pytest
 import numpy as np
-from menelaus.data_drift.psi_detector import PSI
+from menelaus.data_drift.psi_detector import PSI_Detector
 
-def test_validation(...):
-    ...
+def test_psi_init():
+    """Test correct default initialization for PSI"""
+    det = PSI_Detector()
+    assert det.eps == 1e-4
+    assert det.threshold == 0.1
+    assert det.batches_since_reset == 0
+    assert det.drift_state is None
+    
+def test_psi_set_reference():
+    """Assert PSI.set_reference works as intended"""
+    det = PSI_Detector()
+    ref = np.random.randint(0, 5, (3, 3))
+    det.set_reference(ref)
+    assert np.array_equal(ref, det.reference_batch)
+    
+def test_psi_update_1():
+    """Ensure PSI can update with small random batches"""
+    det = PSI_Detector()
+    det.set_reference(np.random.randint(0, 5, (10, 10)))
+    det.update(X=np.random.randint(0, 5, (10, 10)))
 
-def test_set_reference(...):
-    ...
+def test_psi_update_2():
+    """Ensure PSI can update with drift actions triggered"""
+    det = PSI_Detector()
+    # XXX - AS added this method of forcing drift in psi, which
+    #       is otherwise hard to induce drift in, for small data
+    #       examples. More stable alternatives may exist
+    np.random.seed(123)
+    det.set_reference(np.random.randint(0, 5, (10, 10)))
+    det.update(X=np.random.randint(10, 40, (10, 10)))
+    assert det.drift_state is not None
 
-def test_no_drift(...):
-    ...
+def test_psi_update_3():
+    """Check PSI.update behavior after drift alarm"""
+    det = PSI_Detector()
+    det.set_reference(np.random.randint(0, 5, (5, 5)))
+    det._drift_state = "drift"
+    det.update(X=np.random.randint(0, 5, (5, 5)))
+    assert det.drift_state is None
 
+def test_psi_update_4():
+    """Check failure when batch shapes don't match"""
+    det = PSI_Detector()
+    det.set_reference(np.random.randint(0, 5, (5, 6)))
+    with pytest.raises(ValueError):
+        det.update(np.random.randint(0, 5, (5, 5)))
 
-def test_drift(...):
-    ...
\ No newline at end of file
+def test_psi_reset():
+    """Check psi.reset works as intended"""
+    det = PSI_Detector()
+    det.batches_since_reset = 1
+    det.drift_state = "drift"
+    det.reset()
+    assert det.batches_since_reset == 0
+    assert det.drift_state is None
+
+    
+def test_psi_compute_PSI():
+    """Check psi._compute_threshold works correctly"""
+    det = PSI_Detector()
+    # XXX - Hardcoded known example added by AS, in the future a
+    #       dynamic way to test this function may be used
+    np.random.seed(123)
+    threshold = det._PSI(
+        v_ref=np.random.randint(0, 2, 5),
+        v_test=np.random.randint(0, 2, 5),
+    )
+    assert threshold >= 0 and threshold <= 1

From 36f96e2e848fab79cc494683e44e244f9f239bc3 Mon Sep 17 00:00:00 2001
From: Tim Chen <115333718+951378644@users.noreply.github.com>
Date: Wed, 22 Nov 2023 14:36:10 -0500
Subject: [PATCH 14/34] Update psi_detector.py

---
 menelaus/data_drift/psi_detector.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/menelaus/data_drift/psi_detector.py b/menelaus/data_drift/psi_detector.py
index 3abc9d76..3960343d 100644
--- a/menelaus/data_drift/psi_detector.py
+++ b/menelaus/data_drift/psi_detector.py
@@ -3,7 +3,7 @@
 import numpy as np
 
 
-class PSI_Detector(BatchDetector):
+class PSI(BatchDetector):
     """
     Parent class for PSI-based drift detector, it serves as a fundamental framework for batch data applications.
 

From 3db9642d24d5a17328a276f05ed5b233ac855314 Mon Sep 17 00:00:00 2001
From: Tim Chen <115333718+951378644@users.noreply.github.com>
Date: Wed, 22 Nov 2023 14:37:12 -0500
Subject: [PATCH 15/34] Update test_psi_detector.py

---
 tests/menelaus/data_drift/test_psi_detector.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/tests/menelaus/data_drift/test_psi_detector.py b/tests/menelaus/data_drift/test_psi_detector.py
index 4f224203..7ac8fdc5 100644
--- a/tests/menelaus/data_drift/test_psi_detector.py
+++ b/tests/menelaus/data_drift/test_psi_detector.py
@@ -1,10 +1,10 @@
 import pytest
 import numpy as np
-from menelaus.data_drift.psi_detector import PSI_Detector
+from menelaus.data_drift.psi_detector import PSI
 
 def test_psi_init():
     """Test correct default initialization for PSI"""
-    det = PSI_Detector()
+    det = PSI()
     assert det.eps == 1e-4
     assert det.threshold == 0.1
     assert det.batches_since_reset == 0
@@ -12,20 +12,20 @@ def test_psi_init():
     
 def test_psi_set_reference():
     """Assert PSI.set_reference works as intended"""
-    det = PSI_Detector()
+    det = PSI()
     ref = np.random.randint(0, 5, (3, 3))
     det.set_reference(ref)
     assert np.array_equal(ref, det.reference_batch)
     
 def test_psi_update_1():
     """Ensure PSI can update with small random batches"""
-    det = PSI_Detector()
+    det = PSI()
     det.set_reference(np.random.randint(0, 5, (10, 10)))
     det.update(X=np.random.randint(0, 5, (10, 10)))
 
 def test_psi_update_2():
     """Ensure PSI can update with drift actions triggered"""
-    det = PSI_Detector()
+    det = PSI()
     # XXX - AS added this method of forcing drift in psi, which
     #       is otherwise hard to induce drift in, for small data
     #       examples. More stable alternatives may exist
@@ -36,7 +36,7 @@ def test_psi_update_2():
 
 def test_psi_update_3():
     """Check PSI.update behavior after drift alarm"""
-    det = PSI_Detector()
+    det = PSI()
     det.set_reference(np.random.randint(0, 5, (5, 5)))
     det._drift_state = "drift"
     det.update(X=np.random.randint(0, 5, (5, 5)))
@@ -44,14 +44,14 @@ def test_psi_update_3():
 
 def test_psi_update_4():
     """Check failure when batch shapes don't match"""
-    det = PSI_Detector()
+    det = PSI()
     det.set_reference(np.random.randint(0, 5, (5, 6)))
     with pytest.raises(ValueError):
         det.update(np.random.randint(0, 5, (5, 5)))
 
 def test_psi_reset():
     """Check psi.reset works as intended"""
-    det = PSI_Detector()
+    det = PSI()
     det.batches_since_reset = 1
     det.drift_state = "drift"
     det.reset()
@@ -61,7 +61,7 @@ def test_psi_reset():
     
 def test_psi_compute_PSI():
     """Check psi._compute_threshold works correctly"""
-    det = PSI_Detector()
+    det = PSI()
     # XXX - Hardcoded known example added by AS, in the future a
     #       dynamic way to test this function may be used
     np.random.seed(123)

From 46c72e79e88ff29bc78aa376accc2b1891c7ca09 Mon Sep 17 00:00:00 2001
From: Tim Chen <115333718+951378644@users.noreply.github.com>
Date: Wed, 22 Nov 2023 14:48:08 -0500
Subject: [PATCH 16/34] Update test_psi_detector.py

---
 tests/menelaus/data_drift/test_psi_detector.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/menelaus/data_drift/test_psi_detector.py b/tests/menelaus/data_drift/test_psi_detector.py
index 7ac8fdc5..1cb20caa 100644
--- a/tests/menelaus/data_drift/test_psi_detector.py
+++ b/tests/menelaus/data_drift/test_psi_detector.py
@@ -1,6 +1,6 @@
 import pytest
 import numpy as np
-from menelaus.data_drift.psi_detector import PSI
+from menelaus.data_drift import PSI
 
 def test_psi_init():
     """Test correct default initialization for PSI"""

From 4fbbe4c26fbce2dc595117eb35847708a79bf0f0 Mon Sep 17 00:00:00 2001
From: Tim Chen <115333718+951378644@users.noreply.github.com>
Date: Wed, 22 Nov 2023 15:00:37 -0500
Subject: [PATCH 17/34] Update test_psi_detector.py

---
 .../menelaus/data_drift/test_psi_detector.py  | 22 +++++++++----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/tests/menelaus/data_drift/test_psi_detector.py b/tests/menelaus/data_drift/test_psi_detector.py
index 1cb20caa..07af81e4 100644
--- a/tests/menelaus/data_drift/test_psi_detector.py
+++ b/tests/menelaus/data_drift/test_psi_detector.py
@@ -13,15 +13,15 @@ def test_psi_init():
 def test_psi_set_reference():
     """Assert PSI.set_reference works as intended"""
     det = PSI()
-    ref = np.random.randint(0, 5, (3, 3))
+    ref = np.random.randint(0, 5, (9, 1))
     det.set_reference(ref)
     assert np.array_equal(ref, det.reference_batch)
     
 def test_psi_update_1():
     """Ensure PSI can update with small random batches"""
     det = PSI()
-    det.set_reference(np.random.randint(0, 5, (10, 10)))
-    det.update(X=np.random.randint(0, 5, (10, 10)))
+    det.set_reference(np.random.randint(0, 5, (100, 1)))
+    det.update(X=np.random.randint(0, 5, (100, 1)))
 
 def test_psi_update_2():
     """Ensure PSI can update with drift actions triggered"""
@@ -30,24 +30,24 @@ def test_psi_update_2():
     #       is otherwise hard to induce drift in, for small data
     #       examples. More stable alternatives may exist
     np.random.seed(123)
-    det.set_reference(np.random.randint(0, 5, (10, 10)))
-    det.update(X=np.random.randint(10, 40, (10, 10)))
+    det.set_reference(np.random.randint(0, 5, (100, 1)))
+    det.update(X=np.random.randint(10, 40, (100, 1)))
     assert det.drift_state is not None
 
 def test_psi_update_3():
     """Check PSI.update behavior after drift alarm"""
     det = PSI()
-    det.set_reference(np.random.randint(0, 5, (5, 5)))
+    det.set_reference(np.random.randint(0, 5, (25, 1)))
     det._drift_state = "drift"
-    det.update(X=np.random.randint(0, 5, (5, 5)))
+    det.update(X=np.random.randint(0, 5, (25, 1)))
     assert det.drift_state is None
 
 def test_psi_update_4():
     """Check failure when batch shapes don't match"""
     det = PSI()
-    det.set_reference(np.random.randint(0, 5, (5, 6)))
+    det.set_reference(np.random.randint(0, 5, (30, 1)))
     with pytest.raises(ValueError):
-        det.update(np.random.randint(0, 5, (5, 5)))
+        det.update(np.random.randint(0, 5, (25, 1)))
 
 def test_psi_reset():
     """Check psi.reset works as intended"""
@@ -66,7 +66,7 @@ def test_psi_compute_PSI():
     #       dynamic way to test this function may be used
     np.random.seed(123)
     threshold = det._PSI(
-        v_ref=np.random.randint(0, 2, 5),
-        v_test=np.random.randint(0, 2, 5),
+        reference_feature=np.random.randint(0, 2, 5),
+        test_feature=np.random.randint(0, 2, 5),
     )
     assert threshold >= 0 and threshold <= 1

From 326687290895979cb691ba17086e1158962c9c18 Mon Sep 17 00:00:00 2001
From: Tim Chen <115333718+951378644@users.noreply.github.com>
Date: Wed, 22 Nov 2023 15:13:33 -0500
Subject: [PATCH 18/34] Update test_psi_detector.py

---
 .../menelaus/data_drift/test_psi_detector.py  | 19 +++++--------------
 1 file changed, 5 insertions(+), 14 deletions(-)

diff --git a/tests/menelaus/data_drift/test_psi_detector.py b/tests/menelaus/data_drift/test_psi_detector.py
index 07af81e4..4d27ec79 100644
--- a/tests/menelaus/data_drift/test_psi_detector.py
+++ b/tests/menelaus/data_drift/test_psi_detector.py
@@ -1,5 +1,6 @@
 import pytest
 import numpy as np
+import pandas as pd
 from menelaus.data_drift import PSI
 
 def test_psi_init():
@@ -15,7 +16,7 @@ def test_psi_set_reference():
     det = PSI()
     ref = np.random.randint(0, 5, (9, 1))
     det.set_reference(ref)
-    assert np.array_equal(ref, det.reference_batch)
+    assert np.array_equal(ref, det.reference)
     
 def test_psi_update_1():
     """Ensure PSI can update with small random batches"""
@@ -42,13 +43,6 @@ def test_psi_update_3():
     det.update(X=np.random.randint(0, 5, (25, 1)))
     assert det.drift_state is None
 
-def test_psi_update_4():
-    """Check failure when batch shapes don't match"""
-    det = PSI()
-    det.set_reference(np.random.randint(0, 5, (30, 1)))
-    with pytest.raises(ValueError):
-        det.update(np.random.randint(0, 5, (25, 1)))
-
 def test_psi_reset():
     """Check psi.reset works as intended"""
     det = PSI()
@@ -62,11 +56,8 @@ def test_psi_reset():
 def test_psi_compute_PSI():
     """Check psi._compute_threshold works correctly"""
     det = PSI()
-    # XXX - Hardcoded known example added by AS, in the future a
-    #       dynamic way to test this function may be used
     np.random.seed(123)
-    threshold = det._PSI(
-        reference_feature=np.random.randint(0, 2, 5),
-        test_feature=np.random.randint(0, 2, 5),
-    )
+    PSI.set_reference(np.random.randint(0,100,100))
+    PSI.update(np.random.randint(0,100,100))
+    threshold = PSI.PSI_value
     assert threshold >= 0 and threshold <= 1

From 4d4d532413e5c70f62522f7dbac2fee288e82d75 Mon Sep 17 00:00:00 2001
From: Tim Chen <115333718+951378644@users.noreply.github.com>
Date: Wed, 22 Nov 2023 15:19:03 -0500
Subject: [PATCH 19/34] Update test_psi_detector.py

---
 tests/menelaus/data_drift/test_psi_detector.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tests/menelaus/data_drift/test_psi_detector.py b/tests/menelaus/data_drift/test_psi_detector.py
index 4d27ec79..7f1cd949 100644
--- a/tests/menelaus/data_drift/test_psi_detector.py
+++ b/tests/menelaus/data_drift/test_psi_detector.py
@@ -57,7 +57,9 @@ def test_psi_compute_PSI():
     """Check psi._compute_threshold works correctly"""
     det = PSI()
     np.random.seed(123)
-    PSI.set_reference(np.random.randint(0,100,100))
-    PSI.update(np.random.randint(0,100,100))
+    ref = np.random.randint(0,100,100)
+    test = np.random.randint(0,100,100)
+    PSI.set_reference(ref)
+    PSI.update(test)
     threshold = PSI.PSI_value
     assert threshold >= 0 and threshold <= 1

From 6bb398dd97fe56020ade73e610c741fe85aaccfb Mon Sep 17 00:00:00 2001
From: Tim Chen <115333718+951378644@users.noreply.github.com>
Date: Wed, 22 Nov 2023 15:23:44 -0500
Subject: [PATCH 20/34] Update test_psi_detector.py

---
 tests/menelaus/data_drift/test_psi_detector.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/tests/menelaus/data_drift/test_psi_detector.py b/tests/menelaus/data_drift/test_psi_detector.py
index 7f1cd949..a077dfe8 100644
--- a/tests/menelaus/data_drift/test_psi_detector.py
+++ b/tests/menelaus/data_drift/test_psi_detector.py
@@ -14,7 +14,7 @@ def test_psi_init():
 def test_psi_set_reference():
     """Assert PSI.set_reference works as intended"""
     det = PSI()
-    ref = np.random.randint(0, 5, (9, 1))
+    ref = np.random.randint(0, 5, 9)
     det.set_reference(ref)
     assert np.array_equal(ref, det.reference)
     
@@ -56,10 +56,9 @@ def test_psi_reset():
 def test_psi_compute_PSI():
     """Check psi._compute_threshold works correctly"""
     det = PSI()
-    np.random.seed(123)
     ref = np.random.randint(0,100,100)
     test = np.random.randint(0,100,100)
-    PSI.set_reference(ref)
-    PSI.update(test)
+    PSI.set_reference(X = ref)
+    PSI.update(X = test)
     threshold = PSI.PSI_value
     assert threshold >= 0 and threshold <= 1

From 38d15e479bce5e01038744af7600f55eaf625cb7 Mon Sep 17 00:00:00 2001
From: Tim Chen <115333718+951378644@users.noreply.github.com>
Date: Wed, 22 Nov 2023 15:38:22 -0500
Subject: [PATCH 21/34] Update test_psi_detector.py

---
 tests/menelaus/data_drift/test_psi_detector.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/menelaus/data_drift/test_psi_detector.py b/tests/menelaus/data_drift/test_psi_detector.py
index a077dfe8..fb983d44 100644
--- a/tests/menelaus/data_drift/test_psi_detector.py
+++ b/tests/menelaus/data_drift/test_psi_detector.py
@@ -14,9 +14,9 @@ def test_psi_init():
 def test_psi_set_reference():
     """Assert PSI.set_reference works as intended"""
     det = PSI()
-    ref = np.random.randint(0, 5, 9)
+    ref = np.random.randint(0, 5, (100,1))
     det.set_reference(ref)
-    assert np.array_equal(ref, det.reference)
+    assert (det.reference).ndim == 1
     
 def test_psi_update_1():
     """Ensure PSI can update with small random batches"""
@@ -58,7 +58,7 @@ def test_psi_compute_PSI():
     det = PSI()
     ref = np.random.randint(0,100,100)
     test = np.random.randint(0,100,100)
-    PSI.set_reference(X = ref)
-    PSI.update(X = test)
-    threshold = PSI.PSI_value
+    det.set_reference(X = ref)
+    det.update(X = test)
+    threshold = det.PSI_value
     assert threshold >= 0 and threshold <= 1

From 67dd5aecd4d9956aa2ebec2c31a74a255a2cb106 Mon Sep 17 00:00:00 2001
From: Tim Chen <115333718+951378644@users.noreply.github.com>
Date: Wed, 22 Nov 2023 15:55:00 -0500
Subject: [PATCH 22/34] Update test_psi_detector.py

---
 tests/menelaus/data_drift/test_psi_detector.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/tests/menelaus/data_drift/test_psi_detector.py b/tests/menelaus/data_drift/test_psi_detector.py
index fb983d44..a2e1ed75 100644
--- a/tests/menelaus/data_drift/test_psi_detector.py
+++ b/tests/menelaus/data_drift/test_psi_detector.py
@@ -27,9 +27,6 @@ def test_psi_update_1():
 def test_psi_update_2():
     """Ensure PSI can update with drift actions triggered"""
     det = PSI()
-    # XXX - AS added this method of forcing drift in psi, which
-    #       is otherwise hard to induce drift in, for small data
-    #       examples. More stable alternatives may exist
     np.random.seed(123)
     det.set_reference(np.random.randint(0, 5, (100, 1)))
     det.update(X=np.random.randint(10, 40, (100, 1)))

From 4e1cd4483b0ea4b6da3342251abfb6dbfc867319 Mon Sep 17 00:00:00 2001
From: Tim Chen <115333718+951378644@users.noreply.github.com>
Date: Wed, 22 Nov 2023 16:02:59 -0500
Subject: [PATCH 23/34] Update psi_detector.py

---
 menelaus/data_drift/psi_detector.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/menelaus/data_drift/psi_detector.py b/menelaus/data_drift/psi_detector.py
index 3960343d..491fbdd8 100644
--- a/menelaus/data_drift/psi_detector.py
+++ b/menelaus/data_drift/psi_detector.py
@@ -151,4 +151,4 @@ def PSI_value(self):
         Returns:
             float: The PSI value from the most recent update.
         """
-        return self.PSI_value
+        return self.PSI_value
\ No newline at end of file

From cc85fac0dd7ea81e28e6a05499938ee7e3a8d44a Mon Sep 17 00:00:00 2001
From: Tim Chen <115333718+951378644@users.noreply.github.com>
Date: Wed, 22 Nov 2023 16:10:19 -0500
Subject: [PATCH 24/34] Update psi_detector.py

---
 menelaus/data_drift/psi_detector.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/menelaus/data_drift/psi_detector.py b/menelaus/data_drift/psi_detector.py
index 491fbdd8..3960343d 100644
--- a/menelaus/data_drift/psi_detector.py
+++ b/menelaus/data_drift/psi_detector.py
@@ -151,4 +151,4 @@ def PSI_value(self):
         Returns:
             float: The PSI value from the most recent update.
         """
-        return self.PSI_value
\ No newline at end of file
+        return self.PSI_value

From 5d9f06fbd6c417137f636837e593a9ffb94232fc Mon Sep 17 00:00:00 2001
From: Tim Chen <115333718+951378644@users.noreply.github.com>
Date: Wed, 22 Nov 2023 16:18:28 -0500
Subject: [PATCH 25/34] Update test_psi_detector.py

---
 tests/menelaus/data_drift/test_psi_detector.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/menelaus/data_drift/test_psi_detector.py b/tests/menelaus/data_drift/test_psi_detector.py
index a2e1ed75..631da45e 100644
--- a/tests/menelaus/data_drift/test_psi_detector.py
+++ b/tests/menelaus/data_drift/test_psi_detector.py
@@ -57,5 +57,4 @@ def test_psi_compute_PSI():
     test = np.random.randint(0,100,100)
     det.set_reference(X = ref)
     det.update(X = test)
-    threshold = det.PSI_value
-    assert threshold >= 0 and threshold <= 1
+    assert det.PSI_value >= 0 and det.PSI_value <= 1

From 9e91dcc7b1b6a288d6f1edc66ab1c1c7b0f21f12 Mon Sep 17 00:00:00 2001
From: Tim Chen <115333718+951378644@users.noreply.github.com>
Date: Wed, 22 Nov 2023 16:27:40 -0500
Subject: [PATCH 26/34] Update test_psi_detector.py

---
 tests/menelaus/data_drift/test_psi_detector.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tests/menelaus/data_drift/test_psi_detector.py b/tests/menelaus/data_drift/test_psi_detector.py
index 631da45e..bfeffb7f 100644
--- a/tests/menelaus/data_drift/test_psi_detector.py
+++ b/tests/menelaus/data_drift/test_psi_detector.py
@@ -57,4 +57,9 @@ def test_psi_compute_PSI():
     test = np.random.randint(0,100,100)
     det.set_reference(X = ref)
     det.update(X = test)
+    result = det.PSI_value
+    det = PSI()
+    det.set_reference(X = ref)
+    det.update(X = test)
+    assert det.PSI_value == result
     assert det.PSI_value >= 0 and det.PSI_value <= 1

From a46710cfd7aaaddf8ab13e24b1c7b77c2c454859 Mon Sep 17 00:00:00 2001
From: Tim Chen <115333718+951378644@users.noreply.github.com>
Date: Wed, 22 Nov 2023 16:40:24 -0500
Subject: [PATCH 27/34] Update test_psi_detector.py

---
 tests/menelaus/data_drift/test_psi_detector.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/menelaus/data_drift/test_psi_detector.py b/tests/menelaus/data_drift/test_psi_detector.py
index bfeffb7f..36b46bcf 100644
--- a/tests/menelaus/data_drift/test_psi_detector.py
+++ b/tests/menelaus/data_drift/test_psi_detector.py
@@ -61,5 +61,5 @@ def test_psi_compute_PSI():
     det = PSI()
     det.set_reference(X = ref)
     det.update(X = test)
-    assert det.PSI_value == result
-    assert det.PSI_value >= 0 and det.PSI_value <= 1
+    assertEqual(det.PSI_value == result)
+    assertTrue(det.PSI_value >= 0 and det.PSI_value <= 1)

From dab9abf18482ffa00d42c139144f0d4753865282 Mon Sep 17 00:00:00 2001
From: Tim Chen <115333718+951378644@users.noreply.github.com>
Date: Wed, 22 Nov 2023 16:46:42 -0500
Subject: [PATCH 28/34] Update test_psi_detector.py

---
 tests/menelaus/data_drift/test_psi_detector.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/menelaus/data_drift/test_psi_detector.py b/tests/menelaus/data_drift/test_psi_detector.py
index 36b46bcf..bfeffb7f 100644
--- a/tests/menelaus/data_drift/test_psi_detector.py
+++ b/tests/menelaus/data_drift/test_psi_detector.py
@@ -61,5 +61,5 @@ def test_psi_compute_PSI():
     det = PSI()
     det.set_reference(X = ref)
     det.update(X = test)
-    assertEqual(det.PSI_value == result)
-    assertTrue(det.PSI_value >= 0 and det.PSI_value <= 1)
+    assert det.PSI_value == result
+    assert det.PSI_value >= 0 and det.PSI_value <= 1

From 2e9656acfc9876b90cb0d2d8316a368866ac824a Mon Sep 17 00:00:00 2001
From: Tim Chen <115333718+951378644@users.noreply.github.com>
Date: Wed, 22 Nov 2023 16:48:46 -0500
Subject: [PATCH 29/34] Update psi_detector.py

---
 menelaus/data_drift/psi_detector.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/menelaus/data_drift/psi_detector.py b/menelaus/data_drift/psi_detector.py
index 3960343d..ce335846 100644
--- a/menelaus/data_drift/psi_detector.py
+++ b/menelaus/data_drift/psi_detector.py
@@ -90,7 +90,7 @@ def update(self, X: np.array, y_true=None, y_pred=None):
         grp_new = df_new.groupby("bin").count()
         grp_new["percent_new"] = grp_new["new"] / sum(grp_new["new"])
         psi_value = self._PSI(grp_initial, grp_new)
-        self.PSI_value = psi_value
+        self._PSI_value = psi_value
         if psi_value >= self.threshold:
             self._drift_state = "drift"
             self.set_reference(test_batch)
@@ -144,7 +144,7 @@ def _PSI(self, reference_feature, test_feature):
         )
         return np.mean(psi_df["psi"])
 
-    def PSI_value(self):
+    def _PSI_value(self):
         """
         Get the last calculated PSI value.
 

From 4d8cbc21e8cbd942f6fe5037b5f9a23fb3a68741 Mon Sep 17 00:00:00 2001
From: Tim Chen <115333718+951378644@users.noreply.github.com>
Date: Wed, 22 Nov 2023 16:51:57 -0500
Subject: [PATCH 30/34] Update test_psi_detector.py

---
 tests/menelaus/data_drift/test_psi_detector.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/menelaus/data_drift/test_psi_detector.py b/tests/menelaus/data_drift/test_psi_detector.py
index bfeffb7f..2f2eb480 100644
--- a/tests/menelaus/data_drift/test_psi_detector.py
+++ b/tests/menelaus/data_drift/test_psi_detector.py
@@ -57,9 +57,9 @@ def test_psi_compute_PSI():
     test = np.random.randint(0,100,100)
     det.set_reference(X = ref)
     det.update(X = test)
-    result = det.PSI_value
+    result = det._PSI_value
     det = PSI()
     det.set_reference(X = ref)
     det.update(X = test)
-    assert det.PSI_value == result
-    assert det.PSI_value >= 0 and det.PSI_value <= 1
+    assert det._PSI_value == result
+    assert det._PSI_value >= 0 and det._PSI_value <= 1

From 1530e7fbf96565b5dd39b3fb537daaccd99597a7 Mon Sep 17 00:00:00 2001
From: Tim Chen <115333718+951378644@users.noreply.github.com>
Date: Wed, 22 Nov 2023 17:08:22 -0500
Subject: [PATCH 31/34] Update test_psi_detector.py

---
 tests/menelaus/data_drift/test_psi_detector.py | 15 ---------------
 1 file changed, 15 deletions(-)

diff --git a/tests/menelaus/data_drift/test_psi_detector.py b/tests/menelaus/data_drift/test_psi_detector.py
index 2f2eb480..f88a8d14 100644
--- a/tests/menelaus/data_drift/test_psi_detector.py
+++ b/tests/menelaus/data_drift/test_psi_detector.py
@@ -48,18 +48,3 @@ def test_psi_reset():
     det.reset()
     assert det.batches_since_reset == 0
     assert det.drift_state is None
-
-    
-def test_psi_compute_PSI():
-    """Check psi._compute_threshold works correctly"""
-    det = PSI()
-    ref = np.random.randint(0,100,100)
-    test = np.random.randint(0,100,100)
-    det.set_reference(X = ref)
-    det.update(X = test)
-    result = det._PSI_value
-    det = PSI()
-    det.set_reference(X = ref)
-    det.update(X = test)
-    assert det._PSI_value == result
-    assert det._PSI_value >= 0 and det._PSI_value <= 1

From 9bf792ab898c281911a2a7ded5af5d820d849246 Mon Sep 17 00:00:00 2001
From: Tim Chen <115333718+951378644@users.noreply.github.com>
Date: Wed, 22 Nov 2023 17:09:22 -0500
Subject: [PATCH 32/34] Update psi_detector.py

---
 menelaus/data_drift/psi_detector.py | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/menelaus/data_drift/psi_detector.py b/menelaus/data_drift/psi_detector.py
index ce335846..9ac653dd 100644
--- a/menelaus/data_drift/psi_detector.py
+++ b/menelaus/data_drift/psi_detector.py
@@ -90,11 +90,10 @@ def update(self, X: np.array, y_true=None, y_pred=None):
         grp_new = df_new.groupby("bin").count()
         grp_new["percent_new"] = grp_new["new"] / sum(grp_new["new"])
         psi_value = self._PSI(grp_initial, grp_new)
-        self._PSI_value = psi_value
         if psi_value >= self.threshold:
             self._drift_state = "drift"
             self.set_reference(test_batch)
-
+        return psi_value
     def _bin_data(self, feature, min, max):
         """
         Bin the given feature based on the specified minimum and maximum values.
@@ -143,12 +142,3 @@ def _PSI(self, reference_feature, test_feature):
             psi_df["percent_initial"] / psi_df["percent_new"]
         )
         return np.mean(psi_df["psi"])
-
-    def _PSI_value(self):
-        """
-        Get the last calculated PSI value.
-
-        Returns:
-            float: The PSI value from the most recent update.
-        """
-        return self.PSI_value

From b3a724e2bdde3067d4aff6182b025b7a5be7c645 Mon Sep 17 00:00:00 2001
From: Tim Chen <115333718+951378644@users.noreply.github.com>
Date: Wed, 22 Nov 2023 17:16:31 -0500
Subject: [PATCH 33/34] Update test_psi_detector.py

---
 tests/menelaus/data_drift/test_psi_detector.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/menelaus/data_drift/test_psi_detector.py b/tests/menelaus/data_drift/test_psi_detector.py
index f88a8d14..ff5343ce 100644
--- a/tests/menelaus/data_drift/test_psi_detector.py
+++ b/tests/menelaus/data_drift/test_psi_detector.py
@@ -21,15 +21,15 @@ def test_psi_set_reference():
 def test_psi_update_1():
     """Ensure PSI can update with small random batches"""
     det = PSI()
-    det.set_reference(np.random.randint(0, 5, (100, 1)))
-    det.update(X=np.random.randint(0, 5, (100, 1)))
+    det.set_reference(np.random.randint(0, 5, (10, 1)))
+    det.update(X=np.random.randint(0, 5, (10, 1)))
 
 def test_psi_update_2():
     """Ensure PSI can update with drift actions triggered"""
     det = PSI()
     np.random.seed(123)
-    det.set_reference(np.random.randint(0, 5, (100, 1)))
-    det.update(X=np.random.randint(10, 40, (100, 1)))
+    det.set_reference(np.random.randint(0, 100, (200, 1)))
+    det.update(X=np.random.randint(150, 200, (100, 1)))
     assert det.drift_state is not None
 
 def test_psi_update_3():

From 614eb0fbab1babf92ac1ad8d0ca1b66142858d56 Mon Sep 17 00:00:00 2001
From: Tim Chen <115333718+951378644@users.noreply.github.com>
Date: Wed, 22 Nov 2023 17:19:55 -0500
Subject: [PATCH 34/34] update

---
 menelaus/data_drift/psi_detector.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/menelaus/data_drift/psi_detector.py b/menelaus/data_drift/psi_detector.py
index 9ac653dd..efad3a82 100644
--- a/menelaus/data_drift/psi_detector.py
+++ b/menelaus/data_drift/psi_detector.py
@@ -94,6 +94,7 @@ def update(self, X: np.array, y_true=None, y_pred=None):
             self._drift_state = "drift"
             self.set_reference(test_batch)
         return psi_value
+
     def _bin_data(self, feature, min, max):
         """
         Bin the given feature based on the specified minimum and maximum values.