updates documentation

BenjaminDoran · Mar 29, 2018 · bc02e79 · bc02e79
1 parent 53d4675
commit bc02e79
Showing 1 changed file with 38 additions and 26 deletions.
diff --git a/unidip/unidip.py b/unidip/unidip.py
@@ -12,45 +12,52 @@
 
 class UniDip:
     """ Class containing the UniDip clustering algorithm.
-        Isolates peaks in high noise samples. 
+        Isolates peaks in high noise 1d numeric samples. 
+
+        INPUT:
+            dat:     1d np.array of floats or ints (required)
+            is_hist: False, flips from looking at 
+                     x axis to density along x axis
+            alpha:   0.5, tuning parameter, sets significance
+                     level of p_values
+            ntrials: 100, number of trials when running diptest
+            mrg_dst: 1, distance to merge returned intervals
+            debug:   False, determines whether to plot 
+                     the data at each recursion level
+
+        Algorithm Ref:
+            title:  "Skinny-dip: Clustering in a sea of noise" 
+            author: Samuel Maurus & Claudia Plant.
+            url:    http://www.kdd.org/kdd2016/subtopic/view/skinny-dip-clustering-in-a-sea-of-noise
     """
-    def __init__(self, dat, is_hist=False, alpha=0.05, ntrials=100, merge_distance=1, debug=False):
+    def __init__(self, dat, is_hist=False, alpha=0.05, ntrials=100, mrg_dst=1, debug=False):
         self.dat = np.msort(np.array(dat)) if not is_hist else np.array(dat)
         self.is_hist = is_hist
         self.alpha = alpha
         self.ntrials = ntrials
-        self.merge_distance = merge_distance
+        self.mrg_dst = mrg_dst
         self.debug = debug
 
     def run(self):
         """ Perform unidip algorithm on 1d array
 
-            INPUT:
-            dat: 1d np.array of floats or ints
-            offset: int, offset from dat[0]
-            is_hist: bool, flips from looking at x axis to density along x axis
-            alpha: float, tuning parameter, sets significance level of p_values
-            _is_model: internal should not be changed
-            numt: int, number of trials in diptest
-            plotdat: none or dat, determines whether to plot the data at each recursion level
-
             RETURNS:
             list of tuples: each tuple containing the start and end indecies on the x axis.
         """
         modidxs = self._unidip(0, len(self.dat), True, self.debug)
         return self.merge_intervals(modidxs)
 
     def merge_intervals(self, idxs):
-        """ merge intervals that are touching """
+        """ merge intervals that are less than merge distance apart """
         midxs = []
         for idx in sorted(idxs):
             if not midxs:
                 midxs.append(idx)
             else:
                 lower = midxs[-1]
-                # adjacent or overlapping (adjust MERGE_DISTANCE to merge intervals)
+                # adjacent or overlapping (adjust mrg_dst to merge intervals)
                 # that are close enough
-                if idx[0] - lower[1] <= self.merge_distance:
+                if idx[0] - lower[1] <= self.mrg_dst:
                     midxs[-1] = (lower[0], idx[1])
                 else:
                     midxs.append(idx)
@@ -59,7 +66,7 @@ def merge_intervals(self, idxs):
     def plot(self, sub, ints, plot_style="seaborn"):
         """ Plot complete data, highlight subset currently being searched,
             and add vertical lines for discovered intervals. (only intervals of
-            the current level appear.)
+            the current recursion level appear.)
         """
         import matplotlib.pyplot as plt
         plt.style.use(plot_style)
@@ -88,16 +95,17 @@ def _unidip(self, start, end, is_model, debug):
         """ Perform unidip algorithm on 1d array
 
             INPUT:
-            dat: 1d np.array of floats or ints
-            offset: int, offset from dat[0]
-            is_hist: bool, flips from looking at x axis to density along x axis
-            alpha: float, tuning parameter, sets significance level of p_values
-            _is_model: internal should not be changed
-            numt: int, number of trials in diptest
-            plotdat: none or dat, determines whether to plot the data at each recursion level
+                start:    0, idx of first point in current slice
+                end:      len(data), idx of last number in 
+                          current slice
+                is_model: True, always starts as true
+                ntrials:  100, number of trials in diptest
+                debug:    False, determines whether to plot the 
+                          data at each recursion level
 
             RETURNS:
-            list of tuples: each tuple containing the start and end indecies on the x axis.
+                list of tuples: each tuple containing the start and
+                                end indicies on the x axis.
         """
         dat = self.dat[start:end]
         interval_idxs = list()
@@ -168,8 +176,12 @@ def _get_full_interval(self, mod_int):
         return tuple(full_indxs)
 
     def _mirror_data(self, dat, left):
-        """ Mirror dataset.
-        input: [1, 2, 3] output: [-2, -1, 0, 1, 2]
+        """ Mirrors dataset
+
+            if is_hist (measuring density)
+                on input = [1, 2, 3], output = [3, 2, 1, 2, 3]
+            else (measuring raw sample)
+                on input = [1, 2, 3], output = [-2, -1, 0, 1, 2]
         """
         wdat = np.array(dat)
         if self.is_hist: