wgurecky · wgurecky · Aug 19, 2019 · Feb 3, 2020 · Feb 4, 2020 · Feb 4, 2020
diff --git a/requirements.txt b/requirements.txt
@@ -8,3 +8,4 @@ networkx>=2.2.0
 matplotlib
 seaborn
 statsmodels
+quadpy
diff --git a/setup.py b/setup.py
@@ -24,7 +24,8 @@ def setup_package():
           install_requires=['numpy>=1.8.0', 'scipy>=0.13',
                             'pandas>=0.13.0', 'h5py>=2.2.0',
                             'seaborn>=0.7.0', 'networkx>=2.0.0',
-                            'emcee>=2.0.0', 'statsmodels', 'numba'],
+                            'emcee>=2.0.0', 'statsmodels', 'numba',
+                            'quadpy'],
           package_data={'': ['*.txt']},
           license='BSD-3clause',
           author_email='[email protected]',

diff --git a/starvine/mvar/mv_plot.py b/starvine/mvar/mv_plot.py
@@ -21,7 +21,7 @@ def matrixPairPlot(data_in, weights=None, corr_stat="kendalltau", **kwargs):
     else:
         data = data_in
     upper_kde = kwargs.pop("kde", False)
-    pair_plot = sns.PairGrid(data, palette=["red"], size=kwargs.pop("size", 5))
+    pair_plot = sns.PairGrid(data, palette=["red"], height=kwargs.pop("size", 5))
     # UPPER
     if upper_kde:
         pair_plot.map_upper(sns.kdeplot, cmap="Blues_d")

diff --git a/starvine/vine/C_vine.py b/starvine/vine/C_vine.py
@@ -10,11 +10,29 @@
 from pandas import DataFrame
 from scipy.optimize import minimize
 import networkx as nx
+from itertools import product
 from starvine.bvcopula import pc_base as pc
+import scipy.integrate as spi
 import numpy as np
 from six import iteritems
 from starvine.vine.tree import Vtree
 
+def splitcubes(K, d, bounds_list):
+    coords = [np.linspace(bounds_list[i][0] , bounds_list[i][1], num=K + 1) for i in range(d)]
+    grid = np.stack(np.meshgrid(*coords)).T
+
+    ks = list(range(1, K))
+    for slices in product(*([[slice(b,e) for b,e in zip([None] + ks, [k+1 for k in ks] + [None])]]*d)):
+        yield grid[slices]
+
+def cubesets(K, d, bounds_list=()):
+    assert len(bounds_list) == d
+    assert len(bounds_list[0]) == 2
+    if (K & (K - 1)) or K < 2:
+        raise ValueError('K must be a positive power of 2. K: %s' % K)
+
+    return list([set(tuple(p.tolist()) for p in c.reshape(-1, d)) for c in splitcubes(K, d, bounds_list)])
+
 
 class Cvine(BaseVine):
     """!
@@ -108,6 +126,160 @@ def buildDeepTrees(self, level=1):
         elif level == self.nLevels - 1:
             self.vine[level].evalH()
 
+    def _lnpdf(self, x, **kwargs):
+        vine_cond_x = []
+        vine_cond_x.append(self.vine[0].evalHData(x))
+        for lvl in range(1, self.nLevels):
+            vine_cond_x.append(self.vine[lvl].evalHData(vine_cond_x[lvl - 1]))
+        """
+        sum_pdf = np.zeros(len(x))
+        total_n_copula = np.math.factorial(self.nLevels)
+        for u_id, v_id in self.vine[0].tree.edges(data=False):
+            x_lvl = x
+
+            rootID = self.vine[0].rootNodeID
+            if u_id is not rootID:
+                nonRootID = u_id
+            else:
+                nonRootID = v_id
+
+            u, v = np.asarray(x_lvl[nonRootID]), np.asarray(x_lvl[rootID])
+            # u, v = np.asarray(x_lvl[u_id]), np.asarray(x_lvl[v_id])
+
+            # tree_wgt = len(self.vine[0].tree.edges)
+            tree_wgt =  total_n_copula
+            sum_pdf += \
+                self.vine[0].tree.adj[nonRootID][rootID]["pc"].copulaModel.pdf( \
+                u, v) / tree_wgt
+
+        for lvl in range(1, self.nLevels):
+            for u_id, v_id in self.vine[lvl].tree.edges(data=False):
+                x_lvl = vine_cond_x[lvl - 1]
+
+                rootID = self.vine[lvl].rootNodeID
+                if u_id is not rootID:
+                    nonRootID = u_id
+                else:
+                    nonRootID = v_id
+
+                # u, v = np.asarray(x_lvl[u_id]), np.asarray(x_lvl[v_id])
+                u, v = np.asarray(x_lvl[nonRootID]), np.asarray(x_lvl[rootID])
+
+                # tree_wgt = len(self.vine[lvl].tree.edges)
+                tree_wgt =  total_n_copula
+                sum_pdf += \
+                    self.vine[lvl].tree.adj[nonRootID][rootID]["pc"].copulaModel.pdf( \
+                    u, v) / tree_wgt
+        return sum_pdf
+        """
+        ln_pdf = np.zeros(len(x))
+        total_n_copula = np.math.factorial(self.nLevels)
+        for u_id, v_id in self.vine[0].tree.edges(data=False):
+            x_lvl = x
+
+            rootID = self.vine[0].rootNodeID
+            if u_id is not rootID:
+                nonRootID = u_id
+            else:
+                nonRootID = v_id
+
+            # u, v = np.asarray(x_lvl[u_id]), np.asarray(x_lvl[v_id])
+            u, v = np.asarray(x_lvl[nonRootID]), np.asarray(x_lvl[rootID])
+
+            ln_pdf += \
+                np.log(self.vine[0].tree.adj[nonRootID][rootID]["pc"].copulaModel.pdf( \
+                u, v))
+
+        for lvl in range(1, self.nLevels):
+            for u_id, v_id in self.vine[lvl].tree.edges(data=False):
+                x_lvl = vine_cond_x[lvl - 1]
+
+                rootID = self.vine[lvl].rootNodeID
+                if u_id is not rootID:
+                    nonRootID = u_id
+                else:
+                    nonRootID = v_id
+
+                # u, v = np.asarray(x_lvl[u_id]), np.asarray(x_lvl[v_id])
+                u, v = np.asarray(x_lvl[nonRootID]), np.asarray(x_lvl[rootID])
+
+                ln_pdf += \
+                    np.log(self.vine[lvl].tree.adj[nonRootID][rootID]["pc"].copulaModel.pdf( \
+                    u, v))
+        return ln_pdf
+
+    def _pdf(self, x, **kwargs):
+        assert isinstance(x, DataFrame)
+        return np.exp(self._lnpdf(x, **kwargs))
+
+    def _cdf(self, x, k_split=2, **kwargs):
+        """
+        @brief CDF via numerical integration of PDF.
+        @param x pandas dataframe of locations at which to evaluate CDF.
+        @param k_split splits the [0,1]^D dimensional hypercube
+            over which the copula PDF is supported over into k_split chunks
+            in each dimension.  Ex:  a copula of D=3 with k_split=2
+            would have 8 equally sized cubical integration zones.
+        """
+        # note: quadpy makes use of mypy
+        import quadpy
+        assert isinstance(x, DataFrame)
+        dim = len(x.columns)
+        scheme = quadpy.ncube.stroud_cn_5_9(dim)
+        cdf_vector = np.zeros(len(x))
+        points_vector = np.ones(len(x)) * 0.0
+        def _pdf(*x):
+            # x should always have even len
+            col_labels = x[int(len(x)/2):]
+            x_dframe = DataFrame()
+            for i, col_label in enumerate(col_labels):
+                x_dframe[col_label] = [x[i]]
+            pdf_x = self._pdf(x_dframe)
+            print(x[0:int(len(x)/2)], pdf_x)
+            return pdf_x
+
+        def f_pdf(x_np):
+            nps = x_np.shape
+            x_dframe = DataFrame()
+            for k in range(nps[0]):
+                x_dframe[x.columns[k]] = x_np[k, :]
+            pdf_x = self._pdf(x_dframe)
+            pdf_x[np.isnan(pdf_x)] = np.max(pdf_x)
+            return pdf_x
+
+        edge_tol = 1e-8
+        for i, x_i in x.iterrows():
+            ranges = np.zeros((len(x_i), 2)) + edge_tol
+            col_labels = []
+            j=0
+            for col_name, col_data  in x_i.iteritems():
+                col_labels.append(col_name)
+                ranges[j, 1] = np.clip(col_data, edge_tol, 1. - 0.1 * edge_tol)
+                j += 1
+
+            sub_cubes = cubesets(k_split, dim, ranges)
+            for k, sub_cube in enumerate(sub_cubes):
+                sub_cube_coords = np.asarray(list(sub_cube))
+                sub_cube_ranges = np.array([np.min(sub_cube_coords, axis=0), np.max(sub_cube_coords, axis=0)]).T
+                # print("Integrating subcube %d out of %d" % (int(k), len(sub_cubes)))
+                cdf_vector[i] += scheme.integrate(f_pdf,
+                        quadpy.ncube.ncube_points(
+                            *[r for r in sub_cube_ranges])
+                            )
+
+            # cdf_vector[i] = scheme.integrate(f_pdf,
+            #         quadpy.ncube.ncube_points(
+            #             *[r for r in ranges])
+            #             )
+
+            #res = spi.nquad(_pdf, ranges,
+            #               args=(col_labels),
+            #               opts={'limit': 2, 'epsabs': 1e-2, 'epsrel': 1e-2,
+            #                     'points': points_vector, 'limlst': 8})[0]
+            #print("Vine CDF abs err est at xi=%s: %0.5e" % (str(x_i), res[1]))
+            #cdf_vector[i] = res[0]
+        return cdf_vector
+
 
 class Ctree(Vtree):
     """!
@@ -222,16 +394,25 @@ def _evalH(self):
             rootID = self.rootNodeID
             if u is not rootID:
                 nonRootID = u
-                nonRootData = data["pc"].UU
-                rootData = data["pc"].VV
             else:
                 nonRootID = v
-                nonRootData = data["pc"].VV
-                rootData = data["pc"].UU
             condData[(nonRootID, rootID)] = data["h-dist"](data["pc"].VV,
                                                            data["pc"].UU)
         return condData
 
+    def evalHData(self, x):
+        assert isinstance(x, DataFrame)
+        condData = DataFrame()
+        for u, v, data in self.tree.edges(data=True):
+            rootID = self.rootNodeID
+            if u is not rootID:
+                nonRootID = u
+            else:
+                nonRootID = v
+            condData[(nonRootID, rootID)] = data["h-dist"](x[v],
+                                                           x[u])
+        return condData
+
     def _getEdgeCopulaParams(self, u, v):
         """!
         @brief Get copula paramters of particular edge in tree.

diff --git a/starvine/vine/base_vine.py b/starvine/vine/base_vine.py
@@ -25,6 +25,7 @@ def _validate_trial_copula(self, trial_copula):
         for key, val in iteritems(trial_copula):
             assert key in self._all_trial_copula
             assert self._all_trial_copula[key] == val
+        return trial_copula
 
     @property
     def _all_trial_copula(self):
@@ -51,6 +52,18 @@ def loadVineStructure(self, vS):
         """
         pass
 
+    def vinePdf(self, x, level=1, **kwargs):
+        return self._pdf(x)
+
+    def vineCdf(self, x):
+        return self._cdf(x)
+
+    def _pdf(self, x, level=1, **kwargs):
+        raise NotImplementedError
+
+    def _cdf(self, x, level=1, **kwargs):
+        raise NotImplementedError
+
     def vineNLLH(self, vineParams=[None], **kwargs):
         """!
         @brief Compute the vine negative log likelihood.  Used for

diff --git a/starvine/vine/tree.py b/starvine/vine/tree.py
@@ -201,7 +201,8 @@ def unrollNodes(l):
             tree_num = 0
         else:
             # Determine tree level from number of nodes (#nodes are powers of 2)
-            tree_num = int(np.log(len(unrollNodes(n0))) / np.log(2)) - 1
+            # tree_num = int(np.log(len(unrollNodes(n0))) / np.log(2)) - 1
+            tree_num = len(np.unique(np.asarray(n0).flatten())) - 1
         current_tree = vine[tree_num].tree
         next_tree = vine[tree_num + 1].tree
         edge_info = current_tree[n0][n1]

diff --git a/tests/test_C_vine_construct.py b/tests/test_C_vine_construct.py
@@ -8,6 +8,7 @@
 from scipy.stats import norm, beta
 import unittest
 import os
+from scipy.optimize import minimize
 import numpy as np
 import pandas as pd
 pwd_ = os.path.dirname(os.path.abspath(__file__))
@@ -71,10 +72,14 @@ def testCvineConstruct(self):
         self.assertTrue(np.allclose(tst_rho_matrix - sample_rho_matrix, 0, atol=0.10))
         self.assertTrue(np.allclose(tst_ktau_matrix - sample_ktau_matrix, 0, atol=0.10))
 
+        def opti_wrap(fun, x0, args, disp=0, **kwargs):
+            return minimize(fun, x0, args=args, method='Nelder-Mead',
+                                        tol=1e-12, options={'maxiter': 4000}).x
+
         # fit marginal distributions to original data
         marginal_dict = {}
         for col_name in tstData.columns:
-            marginal_dict[col_name] = beta(*beta.fit(tstData[col_name]))
+            marginal_dict[col_name] = beta(*beta.fit(tstData[col_name], optimizer=opti_wrap))
         # scale the samples
         c_vine_scaled_samples_a = tstVine.scaleSamples(c_vine_samples, marginal_dict)
         matrixPairPlot(c_vine_scaled_samples_a, savefig="vine_varaite_resampled_scaled_a.png")
@@ -89,6 +94,28 @@ def testCvineConstruct(self):
         self.assertTrue(np.allclose(tst_rho_matrix - sample_scaled_rho_matrix_a, 0, atol=0.1))
         self.assertTrue(np.allclose(tst_rho_matrix - sample_scaled_rho_matrix_b, 0, atol=0.1))
 
+        # check vine pdf values
+        tstX = pd.DataFrame()
+        tstX['1a'] = [0.01, 0.4, 0.5, 0.6, 0.99, 0.999]
+        tstX['2b'] = [0.01, 0.4, 0.5, 0.6, 0.99, 0.999]
+        tstX['3c'] = [0.01, 0.4, 0.5, 0.6, 0.99, 0.999]
+        tstX['4d'] = [0.01, 0.4, 0.5, 0.6, 0.99, 0.999]
+        tstX['5e'] = [0.01, 0.4, 0.5, 0.6, 0.99, 0.999]
+        pdf_at_tstX = tstVine.vinePdf(tstX)
+        print("PDF at: ", tstX)
+        print(pdf_at_tstX)
+        self.assertTrue(np.all(pdf_at_tstX > 0.0))
+
+        # check vine cdf values
+        cdf_at_tstX = tstVine.vineCdf(tstX)
+        print("CDF at: ", tstX)
+        print(cdf_at_tstX)
+        cdf_tol = 0.05
+        self.assertTrue(cdf_at_tstX[1] > cdf_at_tstX[0])
+        self.assertTrue(np.all(0.0 <= cdf_at_tstX))
+        self.assertTrue(np.all(cdf_at_tstX <= 1.0 + cdf_tol))
+        self.assertAlmostEqual(cdf_at_tstX[-1], 1.0, delta=cdf_tol)
+
 
 if __name__ == "__main__":
     unittest.main()
-Original file line number
+Diff line change
@@ Expand Up / @@ -8,3 +8,4 @@ networkx>=2.2.0 @@
     matplotlib
     seaborn
     statsmodels
+    quadpy