Skip to content

Commit

Permalink
Adding memModel Trees and some code correction (#161)
Browse files Browse the repository at this point in the history
* Adding memModel Trees and some code correction

 - memModel Trees (Unit Tests to do)
 - Adding matrix rotation for PCA (tests available)
 - simplifying code for vModel

TO DO (to complete 0.7.0):
 - tests for trees
 - memModel for Naive Bayes (+ tests)

* Adding tests for trees

TO DO:
test for the SQL syntax in vDataFrame

* copyedits

* reword

* Update memmodel.py

Co-authored-by: kxu <[email protected]>
  • Loading branch information
oualib and ansleis authored Aug 27, 2021
1 parent acb71df commit b0c87a5
Show file tree
Hide file tree
Showing 6 changed files with 698 additions and 44 deletions.
411 changes: 376 additions & 35 deletions verticapy/learn/memmodel.py

Large diffs are not rendered by default.

52 changes: 51 additions & 1 deletion verticapy/learn/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@

# Standard Python Modules
import numpy as np
from numpy import eye, asarray, dot, sum, diag
from numpy.linalg import svd
from typing import Union

#
Expand Down Expand Up @@ -635,4 +637,52 @@ def load_model(name: str, cursor=None, input_relation: str = "", test_relation:
model.classes_ = [0, 1]
if model_type in ("svm_classifier", "svm_regressor", "logistic_reg", "linear_reg",):
model.coef_ = model.get_attr("details")
return model
return model

# ---#
# This piece of code was taken from
# https://en.wikipedia.org/wiki/Talk:Varimax_rotation
def matrix_rotation(Phi: list,
gamma: float = 1.0,
q: int = 20,
tol: float = 1e-6):
"""
---------------------------------------------------------------------------
Performs a Oblimin (Varimax, Quartimax) rotation on the the model's
PCA matrix.
Parameters
----------
Phi: list / numpy.array
input matrix.
gamma: float, optional
Oblimin rotation factor, determines the type of rotation.
It must be between 0.0 and 1.0.
gamma = 0.0 results in a Quartimax rotation.
gamma = 1.0 results in a Varimax rotation.
q: int, optional
Maximum number of iterations.
tol: float, optional
The algorithm stops when the Frobenius norm of gradient is less than tol.
Returns
-------
model
The model.
"""
check_types([("Phi", Phi, [list,],),
("gamma", gamma, [int, float,],),
("q", q, [int, float,],),
("tol", tol, [int, float,],),])
Phi = np.array(Phi)
p,k = Phi.shape
R = eye(k)
d=0
for i in range(q):
d_old = d
Lambda = dot(Phi, R)
u,s,vh = svd(dot(Phi.T,asarray(Lambda)**3 - (gamma/p) * dot(Lambda, diag(diag(dot(Lambda.T,Lambda))))))
R = dot(u,vh)
d = sum(s)
if d_old!=0 and d/d_old < 1 + tol: break
return dot(Phi, R)
5 changes: 1 addition & 4 deletions verticapy/learn/vmodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@
# Standard Python Modules
import os, warnings
import numpy as np
from collections.abc import Iterable
from typing import Union

# VerticaPy Modules
Expand Down Expand Up @@ -2387,10 +2386,8 @@ def to_python(self, name: str = "predict", return_proba: bool = False, return_di
func = "def {}(X):\n\timport numpy as np\n\t".format(name)
if self.type in ("LinearRegression", "LinearSVR", "LogisticRegression", "LinearSVC",):
result = "{} + np.sum(np.array({}) * np.array(X), axis=1)".format(self.coef_["coefficient"][0], self.coef_["coefficient"][1:])
if self.type in ("LogisticRegression",):
if self.type in ("LogisticRegression", "LinearSVC",):
func += f"result = 1 / (1 + np.exp(- ({result})))"
elif self.type in ("LinearSVC",):
func += f"result = 1 - 1 / (1 + np.exp({result}))"
else:
func += "result = " + result
if return_proba and self.type in ("LogisticRegression", "LinearSVC",):
Expand Down
6 changes: 3 additions & 3 deletions verticapy/stats/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -635,16 +635,16 @@ def het_breuschpagan(
):
"""
---------------------------------------------------------------------------
Breusch-Pagan test for heteroscedasticity.
Uses the Breusch-Pagan to test a model for heteroskedasticity.
Parameters
----------
vdf: vDataFrame
Input vDataFrame.
eps: str
Input residual vcolumn.
Input residual vColumn.
X: list
Exogenous Variables to test the heteroscedasticity on.
The exogenous variables to test.
Returns
-------
Expand Down
Loading

0 comments on commit b0c87a5

Please sign in to comment.