Remove statsmodels, scikit-learn, and other dependencies; add gross leverage to summary stats (#347)

gusgordon · web-flow · commit 1be90ed06bb9 · 2016-11-04T21:58:51.000-04:00
Remove statsmodels, scikit-learn, and other dependencies; add gross leverage to performance stats summary
diff --git a/.travis.yml b/.travis.yml
@@ -24,11 +24,10 @@ before_install:
   - cp pyfolio/tests/matplotlibrc .
 
 install:
-  - conda create -q -n testenv --yes python=$TRAVIS_PYTHON_VERSION ipython pyzmq numpy scipy nose matplotlib pandas Cython patsy statsmodels flake8 scikit-learn seaborn runipy pytables networkx pandas-datareader matplotlib-tests joblib
+  - conda create -q -n testenv --yes python=$TRAVIS_PYTHON_VERSION ipython pyzmq numpy scipy nose matplotlib pandas Cython patsy flake8 seaborn runipy pytables networkx pandas-datareader matplotlib-tests joblib
   - source activate testenv
   - pip install nose_parameterized
   #- pip install --no-deps git+https://github.com/quantopian/zipline
-  - if [ ${TRAVIS_PYTHON_VERSION:0:1} == "2" ]; then conda install --yes mock enum34; fi
   - pip install -e .[bayesian]
 
 before_script:
diff --git a/conda/meta.yaml b/conda/meta.yaml
@@ -17,18 +17,12 @@ requirements:
 
   run:
     - python
-    - funcsigs >=0.4
     - matplotlib >=1.4.0
-    - mock >=1.1.2
     - numpy >=1.9.1
     - pandas >=0.18.0
-    - pyparsing >=2.0.3
-    - python-dateutil >=2.4.2
     - pytz >=2014.10
-    - scikit-learn >=0.15.0
     - scipy >=0.14.0
     - seaborn >=0.6.0
-    - statsmodels >=0.5.0
     - pandas-datareader >=0.2
     - ipython
     - empyrical >=0.2.1
diff --git a/pyfolio/plotting.py b/pyfolio/plotting.py
@@ -26,8 +26,6 @@
 from matplotlib import figure
 from matplotlib.backends.backend_agg import FigureCanvasAgg
 
-from sklearn import preprocessing
-
 from . import utils
 from . import timeseries
 from . import pos
@@ -499,8 +497,8 @@ def plot_perf_stats(returns, factor_returns, ax=None):
     return ax
 
 
-def show_perf_stats(returns, factor_returns, live_start_date=None,
-                    bootstrap=False):
+def show_perf_stats(returns, factor_returns, gross_lev=None,
+                    live_start_date=None, bootstrap=False):
     """Prints some performance metrics of the strategy.
 
     - Shows amount of time the strategy has been run in backtest and
@@ -537,25 +535,35 @@ def show_perf_stats(returns, factor_returns, live_start_date=None,
         returns_backtest = returns[returns.index < live_start_date]
         returns_live = returns[returns.index > live_start_date]
 
+        gross_lev_backtest = None
+        gross_lev_live = None
+        if gross_lev is not None:
+            gross_lev_backtest = gross_lev[gross_lev.index < live_start_date]
+            gross_lev_live = gross_lev[gross_lev.index > live_start_date]
+
         perf_stats_live = perf_func(
             returns_live,
-            factor_returns=factor_returns)
+            factor_returns=factor_returns,
+            gross_lev=gross_lev_live)
 
         perf_stats_all = perf_func(
             returns,
-            factor_returns=factor_returns)
+            factor_returns=factor_returns,
+            gross_lev=gross_lev)
 
         print('Out-of-Sample Months: ' +
               str(int(len(returns_live) / APPROX_BDAYS_PER_MONTH)))
     else:
         returns_backtest = returns
+        gross_lev_backtest = gross_lev
 
     print('Backtest Months: ' +
           str(int(len(returns_backtest) / APPROX_BDAYS_PER_MONTH)))
 
     perf_stats = perf_func(
         returns_backtest,
-        factor_returns=factor_returns)
+        factor_returns=factor_returns,
+        gross_lev=gross_lev_backtest)
 
     if live_start_date is not None:
         perf_stats = pd.concat(OrderedDict([
@@ -1425,8 +1433,7 @@ def plot_daily_volume(returns, transactions, ax=None, **kwargs):
 
 
 def plot_daily_returns_similarity(returns_backtest, returns_live,
-                                  title='', scale_kws=None, ax=None,
-                                  **kwargs):
+                                  title='', ax=None, **kwargs):
     """Plots overlapping distributions of in-sample (backtest) returns
     and out-of-sample (live trading) returns.
 
@@ -1438,8 +1445,6 @@ def plot_daily_returns_similarity(returns_backtest, returns_live,
         Daily returns of the strategy's live trading, noncumulative.
     title : str, optional
         The title to use for the plot.
-    scale_kws : dict, optional
-        Additional arguments passed to preprocessing.scale.
     ax : matplotlib.Axes, optional
         Axes upon which to plot.
     **kwargs, optional
@@ -1454,13 +1459,11 @@ def plot_daily_returns_similarity(returns_backtest, returns_live,
 
     if ax is None:
         ax = plt.gca()
-    if scale_kws is None:
-        scale_kws = {}
 
-    sns.kdeplot(preprocessing.scale(returns_backtest, **scale_kws),
+    sns.kdeplot(utils.standardize_data(returns_backtest),
                 bw='scott', shade=True, label='backtest',
                 color='forestgreen', ax=ax, **kwargs)
-    sns.kdeplot(preprocessing.scale(returns_live, **scale_kws),
+    sns.kdeplot(utils.standardize_data(returns_live),
                 bw='scott', shade=True, label='out-of-sample',
                 color='red', ax=ax, **kwargs)
     ax.set_title(title)
diff --git a/pyfolio/tears.py b/pyfolio/tears.py
@@ -163,6 +163,7 @@ def create_full_tear_sheet(returns,
     create_returns_tear_sheet(
         returns,
         live_start_date=live_start_date,
+        gross_lev=gross_lev,
         cone_std=cone_std,
         benchmark_rets=benchmark_rets,
         bootstrap=bootstrap,
@@ -204,6 +205,7 @@ def create_full_tear_sheet(returns,
 
 @plotting_context
 def create_returns_tear_sheet(returns, live_start_date=None,
+                              gross_lev=None,
                               cone_std=(1.0, 1.5, 2.0),
                               benchmark_rets=None,
                               bootstrap=False,
@@ -256,6 +258,7 @@ def create_returns_tear_sheet(returns, live_start_date=None,
     print('\n')
 
     plotting.show_perf_stats(returns, benchmark_rets,
+                             gross_lev=gross_lev,
                              bootstrap=bootstrap,
                              live_start_date=live_start_date)
 
diff --git a/pyfolio/tests/test_timeseries.py b/pyfolio/tests/test_timeseries.py
@@ -276,33 +276,6 @@ def test_beta(self, returns, benchmark_rets, rolling_window, expected):
             expected)
 
 
-class TestMultifactor(TestCase):
-    simple_rets = pd.Series(
-        [0.1] * 3 + [0] * 497,
-        pd.date_range(
-            '2000-1-1',
-            periods=500,
-            freq='D'))
-    simple_benchmark_rets = pd.DataFrame(
-        pd.Series(
-            [0.03] * 4 + [0] * 496,
-            pd.date_range(
-                '2000-1-1',
-                periods=500,
-                freq='D')),
-        columns=['bm'])
-
-    @parameterized.expand([
-        (simple_rets[:4], simple_benchmark_rets[:4], [2.5000000000000004])
-    ])
-    def test_calc_multifactor(self, returns, factors, expected):
-        self.assertEqual(
-            timeseries.calc_multifactor(
-                returns,
-                factors).values.tolist(),
-            expected)
-
-
 class TestCone(TestCase):
     def test_bootstrap_cone_against_linear_cone_normal_returns(self):
         random_seed = 100
diff --git a/pyfolio/timeseries.py b/pyfolio/timeseries.py
@@ -474,7 +474,7 @@ def common_sense_ratio(returns):
     stats.skew,
     stats.kurtosis,
     tail_ratio,
-    common_sense_ratio,
+    common_sense_ratio
 ]
 
 FACTOR_STAT_FUNCS = [
@@ -554,34 +554,6 @@ def aggregate_returns(returns, convert_to):
     return empyrical.aggregate_returns(returns, convert_to=convert_to)
 
 
-def calc_multifactor(returns, factors):
-    """Computes multiple ordinary least squares linear fits, and returns
-    fit parameters.
-
-    Parameters
-    ----------
-    returns : pd.Series
-        Daily returns of the strategy, noncumulative.
-         - See full explanation in tears.create_full_tear_sheet.
-    factors : pd.Series
-        Secondary sets to fit.
-
-    Returns
-    -------
-    pd.DataFrame
-        Fit parameters.
-
-    """
-
-    import statsmodels.api as sm
-    factors = factors.loc[returns.index]
-    factors = sm.add_constant(factors)
-    factors = factors.dropna(axis=0)
-    results = sm.OLS(returns[factors.index], factors).fit()
-
-    return results.params
-
-
 def rolling_beta(returns, factor_returns,
                  rolling_window=APPROX_BDAYS_PER_MONTH * 6):
     """Determines the rolling beta of a strategy.
@@ -658,7 +630,7 @@ def rolling_fama_french(returns, factor_returns=None,
                         rolling_window=rolling_window)
 
 
-def perf_stats(returns, factor_returns=None):
+def perf_stats(returns, factor_returns=None, gross_lev=None):
     """Calculates various performance metrics of a strategy, for use in
     plotting.show_perf_stats.
 
@@ -671,6 +643,8 @@ def perf_stats(returns, factor_returns=None):
         Daily noncumulative returns of the benchmark.
          - This is in the same style as returns.
         If None, do not compute alpha, beta, and information ratio.
+    gross_lev : pd.Series (optional)
+        Daily gross leverage of the strategy.
 
     Returns
     -------
@@ -680,10 +654,12 @@ def perf_stats(returns, factor_returns=None):
     """
 
     stats = pd.Series()
-
     for stat_func in SIMPLE_STAT_FUNCS:
         stats[stat_func.__name__] = stat_func(returns)
 
+    if gross_lev is not None:
+        stats['mean_gross_leverage'] = gross_lev.mean()
+
     if factor_returns is not None:
         for stat_func in FACTOR_STAT_FUNCS:
             stats[stat_func.__name__] = stat_func(returns,
@@ -692,7 +668,8 @@ def perf_stats(returns, factor_returns=None):
     return stats
 
 
-def perf_stats_bootstrap(returns, factor_returns=None, return_stats=True):
+def perf_stats_bootstrap(returns, factor_returns=None, gross_lev=None,
+                         return_stats=True):
     """Calculates various bootstrapped performance metrics of a strategy.
 
     Parameters
@@ -726,6 +703,10 @@ def perf_stats_bootstrap(returns, factor_returns=None, return_stats=True):
         bootstrap_values[stat_name] = calc_bootstrap(stat_func,
                                                      returns)
 
+    if gross_lev is not None:
+        bootstrap_values['mean_gross_leverage'] = calc_bootstrap(np.mean,
+                                                                 gross_lev)
+
     if factor_returns is not None:
         for stat_func in FACTOR_STAT_FUNCS:
             stat_name = stat_func.__name__
diff --git a/pyfolio/utils.py b/pyfolio/utils.py
@@ -20,6 +20,7 @@
 from os.path import expanduser, join, getmtime, isdir
 import warnings
 
+import numpy as np
 from IPython.display import display
 import pandas as pd
 from pandas.tseries.offsets import BDay
@@ -475,7 +476,8 @@ def get_symbol_rets(symbol, start=None, end=None):
 
 
 def print_table(table, name=None, fmt=None):
-    """Pretty print a pandas DataFrame.
+    """
+    Pretty print a pandas DataFrame.
 
     Uses HTML output if running inside Jupyter Notebook, otherwise
     formatted text output.
@@ -506,3 +508,20 @@ def print_table(table, name=None, fmt=None):
 
     if fmt is not None:
         pd.set_option('display.float_format', prev_option)
+
+
+def standardize_data(x):
+    """
+    Standardize an array with mean and standard deviation.
+
+    Parameters
+    ----------
+    x : np.array
+        Array to standardize.
+
+    Returns
+    -------
+    np.array
+        Standardized array.
+    """
+    return (x - np.mean(x)) / np.std(x)
diff --git a/setup.py b/setup.py
@@ -38,22 +38,15 @@
                'Operating System :: OS Independent']
 
 install_reqs = [
-    'funcsigs>=0.4',
     'ipython>=3.2.3',
     'matplotlib>=1.4.0',
-    'mock>=1.1.2',
     'numpy>=1.9.1',
     'pandas>=0.18.0',
-    'pyparsing>=2.0.3',
-    'python-dateutil>=2.4.2',
     'pytz>=2014.10',
     'scipy>=0.14.0',
     'seaborn>=0.7.1',
     'pandas-datareader>=0.2',
-    'scikit-learn>=0.17',
-    'empyrical>=0.2.1',
-    'statsmodels>=0.6.1',
-    'jsonschema>=2.5.1',
+    'empyrical>=0.2.1'
 ]
 
 test_reqs = ['nose>=1.3.7', 'nose-parameterized>=0.5.0', 'runipy>=0.1.3']