Skip to content

Commit

Permalink
Small corrections (#72)
Browse files Browse the repository at this point in the history
* Small corrections

Small Correction
 - set_parameters
 - some tests
 - udf

* Update test_dummy_tree_classifier.py

* problem with shap

* Some corrections + New Tests

 - New tests for PCA, SVD, Normalizer & OneHotEncoder
 - Correction SVD to_sklearn
 - Correction regression_report

* Update test_linear_regression.py

* Small Changes

 - Deploy Inverse SQL
 - Model cleaning

* Update test_normalizer.py

Added model2.drop() at line 177 and model3.drop() at line 197.

Co-authored-by: Arash Fard <[email protected]>
  • Loading branch information
oualib and afard authored Jan 20, 2021
1 parent b0e800d commit 02b7d64
Show file tree
Hide file tree
Showing 19 changed files with 1,032 additions and 95 deletions.
5 changes: 3 additions & 2 deletions verticapy/geo.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,9 @@ def create_index(
Returns
-------
vDataFrame
object result of the join.
tablesample
An object containing the result. For more information, see
utilities.tablesample.
"""
check_types(
[
Expand Down
9 changes: 6 additions & 3 deletions verticapy/learn/linear_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,8 @@ def __init__(
}
)
for elem in ["l1_ratio"]:
del self.parameters[elem]
if elem in self.parameters:
del self.parameters[elem]
cursor = check_cursor(cursor)[0]
self.cursor = cursor
version(cursor=cursor, condition=[8, 0, 0])
Expand Down Expand Up @@ -214,7 +215,8 @@ def __init__(
}
)
for elem in ["l1_ratio", "C"]:
del self.parameters[elem]
if elem in self.parameters:
del self.parameters[elem]
cursor = check_cursor(cursor)[0]
self.cursor = cursor
version(cursor=cursor, condition=[8, 0, 0])
Expand Down Expand Up @@ -342,7 +344,8 @@ def __init__(
}
)
for elem in ["l1_ratio"]:
del self.parameters[elem]
if elem in self.parameters:
del self.parameters[elem]
cursor = check_cursor(cursor)[0]
self.cursor = cursor
version(cursor=cursor, condition=[8, 0, 0])
7 changes: 5 additions & 2 deletions verticapy/learn/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ class Pipeline:
"""
---------------------------------------------------------------------------
Creates a Pipeline object. Sequentially apply a list of transforms and a
final estimator.
final estimator. The intermediate steps must implement a transform method.
Parameters
----------
Expand Down Expand Up @@ -216,7 +216,10 @@ def predict(
current_vdf = vdf
for idx, step in enumerate(self.steps):
if idx == len(self.steps) - 1:
current_vdf = step[1].predict(current_vdf, X_new, name = name)
try:
current_vdf = step[1].predict(current_vdf, X_new, name = name, inplace = False)
except:
current_vdf = step[1].predict(current_vdf, X_new, name = name)
else:
current_vdf = step[1].transform(current_vdf, X_new)
X_new = step[1].get_names(X = X)
Expand Down
337 changes: 256 additions & 81 deletions verticapy/learn/vmodel.py

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion verticapy/tests/vModel/test_decision_tree_regressor.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def test_get_params(self, model):
def test_get_plot(self):
pass

@pytest.mark.xfail(reason = "vmodel.py, line 1725, in to_sklearn: local variable 'model' referenced before assignment")
@pytest.mark.xfail(reason = "pb with sklearn trees")
def test_to_sklearn(self, base):
base.cursor.execute("DROP MODEL IF EXISTS tr_model_sk_test")

Expand Down
2 changes: 1 addition & 1 deletion verticapy/tests/vModel/test_dummy_tree_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ def test_set_cursor(self):

def test_set_params(self, model):
model.set_params({"nbins": 100})

# Nothing will change as Dummy Trees have no parameters
assert model.get_params()["nbins"] == 100

def test_model_from_vDF(self, base, dtc_data_vd):
Expand Down
6 changes: 3 additions & 3 deletions verticapy/tests/vModel/test_dummy_tree_regressor.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ def test_get_params(self, model):
def test_get_plot(self):
pass

@pytest.mark.xfail(reason = "vmodel.py, line 1725, in to_sklearn: local variable 'model' referenced before assignment")
@pytest.mark.xfail(reason = "pb with sklearn trees")
def test_to_sklearn(self, base):
base.cursor.execute("DROP MODEL IF EXISTS tr_model_sk_test")

Expand Down Expand Up @@ -253,9 +253,9 @@ def test_set_cursor(self, base):
model_test.drop()

def test_set_params(self, model):
# Nothing will change as Dummy Trees have no parameters
model.set_params({"max_features": 100})

assert model.get_params()["max_features"] == 100
assert model.get_params()["max_features"] == "max"

def test_model_from_vDF(self, base, tr_data_vd):
base.cursor.execute("DROP MODEL IF EXISTS tr_from_vDF")
Expand Down
1 change: 1 addition & 0 deletions verticapy/tests/vModel/test_elastic_net.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ def test_to_sklearn(self, model):
prediction = model.cursor.fetchone()[0]
assert prediction == pytest.approx(md.predict([[3.0, 11.0, 93.0]])[0][0])

@pytest.mark.skip(reason="problem with shap installation")
def test_shapExplainer(self, model):
explainer = model.shapExplainer()
assert explainer.expected_value[0] == pytest.approx(5.81837771)
Expand Down
1 change: 1 addition & 0 deletions verticapy/tests/vModel/test_lasso.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ def test_to_sklearn(self, model):
prediction = model.cursor.fetchone()[0]
assert prediction == pytest.approx(md.predict([[3.0, 11.0, 93.0]])[0][0])

@pytest.mark.skip(reason="problem with shap installation")
def test_shapExplainer(self, model):
explainer = model.shapExplainer()
assert explainer.expected_value[0] == pytest.approx(5.81837771)
Expand Down
1 change: 1 addition & 0 deletions verticapy/tests/vModel/test_linear_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ def test_to_sklearn(self, model):
prediction = model.cursor.fetchone()[0]
assert prediction == pytest.approx(md.predict([[3.0, 11.0, 93.0]])[0][0])

@pytest.mark.skip(reason="problem with shap installation")
def test_shapExplainer(self, model):
explainer = model.shapExplainer()
assert explainer.expected_value[0] == pytest.approx(5.81837771)
Expand Down
1 change: 1 addition & 0 deletions verticapy/tests/vModel/test_linear_svc.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ def test_to_sklearn(self, model):

# 'LinearSVC' object (md) has no attribute 'predict_proba'

@pytest.mark.skip(reason="problem with shap installation")
def test_shapExplainer(self, model):
explainer = model.shapExplainer()
assert explainer.expected_value[0] == pytest.approx(-0.22667938806360247)
Expand Down
1 change: 1 addition & 0 deletions verticapy/tests/vModel/test_linear_svr.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ def test_to_sklearn(self, model):
prediction = model.cursor.fetchone()[0]
assert prediction == pytest.approx(md.predict([[3.0, 11.0, 93.0]])[0][0])

@pytest.mark.skip(reason="problem with shap installation")
def test_shapExplainer(self, model):
explainer = model.shapExplainer()
assert explainer.expected_value[0] == pytest.approx(5.819113657580594)
Expand Down
2 changes: 1 addition & 1 deletion verticapy/tests/vModel/test_logistic_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ def test_to_sklearn(self, model):
prediction = model.cursor.fetchone()[0]
assert prediction == pytest.approx(md.predict_proba([[11.0, 1993.0]])[0][1])

@pytest.mark.skip(reason="shap doesn't want to work on python3.6")
@pytest.mark.skip(reason="problem with shap installation")
def test_shapExplainer(self, model):
explainer = model.shapExplainer()
assert explainer.expected_value[0] == pytest.approx(-0.4617437138350809)
Expand Down
241 changes: 241 additions & 0 deletions verticapy/tests/vModel/test_normalizer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,241 @@
# (c) Copyright [2018-2020] Micro Focus or one of its affiliates.
# Licensed under the Apache License, Version 2.0 (the "License");
# You may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import pytest, warnings, sys
from verticapy.learn.preprocessing import Normalizer
from verticapy import drop_table

from verticapy import set_option

set_option("print_info", False)


@pytest.fixture(scope="module")
def winequality_vd(base):
from verticapy.learn.datasets import load_winequality

winequality = load_winequality(cursor=base.cursor)
yield winequality
with warnings.catch_warnings(record=True) as w:
drop_table(name="public.winequality", cursor=base.cursor)


@pytest.fixture(scope="module")
def model(base, winequality_vd):
base.cursor.execute("DROP MODEL IF EXISTS norm_model_test")
model_class = Normalizer("norm_model_test", cursor=base.cursor)
model_class.fit(
"public.winequality", ["citric_acid", "residual_sugar", "alcohol"]
)
yield model_class
model_class.drop()


class TestNormalizer:

def test_deploySQL(self, model):
expected_sql = 'APPLY_NORMALIZE("citric_acid", "residual_sugar", "alcohol" USING PARAMETERS model_name = \'norm_model_test\', match_by_pos = \'true\')'
result_sql = model.deploySQL()

assert result_sql == expected_sql

def test_deployInverseSQL(self, model):
expected_sql = "REVERSE_NORMALIZE(\"citric_acid\", \"residual_sugar\", \"alcohol\" USING PARAMETERS model_name = 'norm_model_test', match_by_pos = 'true')"
result_sql = model.deployInverseSQL()

assert result_sql == expected_sql

def test_drop(self, base):
base.cursor.execute("DROP MODEL IF EXISTS norm_model_test_drop")
model_test = Normalizer("norm_model_test_drop", cursor=base.cursor)
model_test.fit("public.winequality", ["alcohol", "quality"])

base.cursor.execute(
"SELECT model_name FROM models WHERE model_name = 'norm_model_test_drop'"
)
assert base.cursor.fetchone()[0] == "norm_model_test_drop"

model_test.drop()
base.cursor.execute(
"SELECT model_name FROM models WHERE model_name = 'norm_model_test_drop'"
)
assert base.cursor.fetchone() is None

def test_get_attr(self, model):
m_att = model.get_attr()

assert m_att["attr_name"] == [
"details",
]
assert m_att["attr_fields"] == [
"column_name, avg, std_dev",
]
assert m_att["#_of_rows"] == [3]

m_att_details = model.get_attr(attr_name="details")

assert m_att_details["column_name"] == [
"citric_acid",
"residual_sugar",
"alcohol",
]
assert m_att_details["avg"][0] == pytest.approx(0.318633215330152, abs=1e-6)
assert m_att_details["avg"][1] == pytest.approx(5.44323533938741, abs=1e-6)
assert m_att_details["avg"][2] == pytest.approx(10.4918008311528, abs=1e-6)
assert m_att_details["std_dev"][0] == pytest.approx(0.145317864897592, abs=1e-6)
assert m_att_details["std_dev"][1] == pytest.approx(4.75780374314742, abs=1e-6)
assert m_att_details["std_dev"][2] == pytest.approx(1.192711748871)

def test_get_params(self, model):
assert model.get_params() == {'method': 'zscore'}

def test_to_sklearn(self, model):
# Zscore
md = model.to_sklearn()
model.cursor.execute(
"SELECT APPLY_NORMALIZE(citric_acid, residual_sugar, alcohol USING PARAMETERS model_name = '{}', match_by_pos=True) FROM (SELECT 3.0 AS citric_acid, 11.0 AS residual_sugar, 93. AS alcohol) x".format(
model.name
)
)
prediction = model.cursor.fetchone()[0]
assert prediction == pytest.approx(md.transform([[3.0, 11.0, 93.0]])[0][0])
# Minmax
model2 = Normalizer("norm_model_test2", cursor=model.cursor, method = "minmax")
model2.drop()
model2.fit(
"public.winequality", ["citric_acid", "residual_sugar", "alcohol"]
)
md = model2.to_sklearn()
model2.cursor.execute(
"SELECT APPLY_NORMALIZE(citric_acid, residual_sugar, alcohol USING PARAMETERS model_name = '{}', match_by_pos=True) FROM (SELECT 3.0 AS citric_acid, 11.0 AS residual_sugar, 93. AS alcohol) x".format(
model2.name
)
)
prediction = model2.cursor.fetchone()[0]
model2.drop()
assert prediction == pytest.approx(md.transform([[3.0, 11.0, 93.0]])[0][0])
# Robust Zscore
model3 = Normalizer("norm_model_test2", cursor=model.cursor, method = "robust_zscore")
model3.drop()
model3.fit(
"public.winequality", ["citric_acid", "residual_sugar", "alcohol"]
)
md = model3.to_sklearn()
model3.cursor.execute(
"SELECT APPLY_NORMALIZE(citric_acid, residual_sugar, alcohol USING PARAMETERS model_name = '{}', match_by_pos=True) FROM (SELECT 3.0 AS citric_acid, 11.0 AS residual_sugar, 93. AS alcohol) x".format(
model3.name
)
)
prediction = model3.cursor.fetchone()[0]
model3.drop()
assert prediction == pytest.approx(md.transform([[3.0, 11.0, 93.0]])[0][0])

def test_get_transform(self, winequality_vd, model):
# Zscore
winequality_trans = model.transform(
winequality_vd,
X=["citric_acid", "residual_sugar", "alcohol"]
)
assert winequality_trans["citric_acid"].mean() == pytest.approx(
0.0, abs=1e-6
)
assert winequality_trans["residual_sugar"].mean() == pytest.approx(
0.0, abs=1e-6
)
assert winequality_trans["alcohol"].mean() == pytest.approx(
0.0, abs=1e-6
)
# Minmax
model2 = Normalizer("norm_model_test2", cursor=model.cursor, method = "minmax")
model2.drop()
model2.fit(
"public.winequality", ["citric_acid", "residual_sugar", "alcohol"]
)
winequality_trans = model2.transform(
winequality_vd,
X=["citric_acid", "residual_sugar", "alcohol"]
)
assert winequality_trans["citric_acid"].min() == pytest.approx(
0.0, abs=1e-6
)
assert winequality_trans["residual_sugar"].max() == pytest.approx(
1.0, abs=1e-6
)
assert winequality_trans["alcohol"].min() == pytest.approx(
0.0, abs=1e-6
)
model2.drop()
# Robust Zscore
model3 = Normalizer("norm_model_test2", cursor=model.cursor, method = "robust_zscore")
model3.drop()
model3.fit(
"public.winequality", ["citric_acid", "residual_sugar", "alcohol"]
)
winequality_trans = model3.transform(
winequality_vd,
X=["citric_acid", "residual_sugar", "alcohol"]
)
assert winequality_trans["citric_acid"].median() == pytest.approx(
0.0, abs=1e-6
)
assert winequality_trans["residual_sugar"].median() == pytest.approx(
0.0, abs=1e-6
)
assert winequality_trans["alcohol"].median() == pytest.approx(
0.0, abs=1e-6
)
model3.drop()

def test_get_inverse_transform(self, winequality_vd, model):
winequality_trans = model.inverse_transform(
winequality_vd,
X=["citric_acid", "residual_sugar", "alcohol"]
)
assert winequality_trans["citric_acid"].mean() == pytest.approx(
0.364936313867385, abs=1e-6
)
assert winequality_trans["residual_sugar"].mean() == pytest.approx(
31.3410808119571, abs=1e-6
)
assert winequality_trans["alcohol"].mean() == pytest.approx(
23.0054949492833, abs=1e-6
)

def test_set_cursor(self, base):
model_test = Normalizer("norm_cursor_test", cursor=base.cursor)
# TODO: creat a new cursor
model_test.set_cursor(base.cursor)
model_test.drop()
model_test.fit("public.winequality", ["alcohol"])

base.cursor.execute(
"SELECT model_name FROM models WHERE model_name = 'norm_cursor_test'"
)
assert base.cursor.fetchone()[0] == "norm_cursor_test"
model_test.drop()

def test_set_params(self, model):
model.set_params({"method": "robust_zscore"})
assert model.get_params()["method"] == "robust_zscore"
model.set_params({"method": "zscore"})
assert model.get_params()["method"] == "zscore"

def test_model_from_vDF(self, base, winequality_vd):
base.cursor.execute("DROP MODEL IF EXISTS norm_vDF")
model_test = Normalizer("norm_vDF", cursor=base.cursor)
model_test.fit(winequality_vd, ["alcohol", "quality"])
base.cursor.execute(
"SELECT model_name FROM models WHERE model_name = 'norm_vDF'"
)
assert base.cursor.fetchone()[0] == "norm_vDF"
model_test.drop()
Loading

0 comments on commit 02b7d64

Please sign in to comment.