Skip to content

Commit

Permalink
Merge pull request #22 from KIC/feature/0.2.7
Browse files Browse the repository at this point in the history
Feature/0.2.7
  • Loading branch information
KIC authored Aug 23, 2021
2 parents 59b7023 + cb111e7 commit 14fdef8
Show file tree
Hide file tree
Showing 74 changed files with 14,883 additions and 185 deletions.
5 changes: 4 additions & 1 deletion Readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,10 @@ NOTE! This module is currently stalled as I mainly use pytorch at the moment.
...

### [pandas-ta-quant](pandas-ta-quant/Readme.md)
Technical analysis library
Technical analysis library.

It is a pure python re-implementation of the famous TA-Lib and some custom indicators on top i.e. GARCH.
Use `df.ta.help` to see all possible indicators.
![Ta Plot](./.readme/images/multi_index.png)

### [pandas-ta-quant-plot](pandas-ta-quant-plot/Readme.md)
Expand Down
2 changes: 1 addition & 1 deletion bump_version.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ OLD_VERSION=$1
NEW_VERSION=$2

set -e
for f in `fgrep $OLD_VERSION */* -d skip | sed -e's/\s*=\s*/:/g'`
for f in `(fgrep $OLD_VERSION */* -d skip ; fgrep $OLD_VERSION */*/* -d skip) | sed -e's/\s*=\s*/:/g'`
do
array=(${f//:/ })
echo "sed -i -E \"s/(__version__)(\s*=\s*)(${array[2]})/\1\2'$NEW_VERSION'/\" ${array[0]}"
Expand Down
14,415 changes: 14,415 additions & 0 deletions notebooks/blogs/probabilistic.ipynb

Large diffs are not rendered by default.

5 changes: 5 additions & 0 deletions one-liners.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
echo "script is not meant to run"
exit 0

# freeze version of a dedicated package
find -name requirements.txt -exec sed -i -e 's/numpy.*/numpy==1.20.*/g' {} \;
2 changes: 1 addition & 1 deletion pandas-ml-1ntegration-test-private
2 changes: 1 addition & 1 deletion pandas-ml-1ntegration-test/noxfile.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = '0.2.6'
__version__ = '0.2.7'

import os
import shutil
Expand Down
2 changes: 1 addition & 1 deletion pandas-ml-airflow/setup.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""Augment pandas DataFrame with methods to fetch time series data for quant finance"""
__version__ = '0.2.6'
__version__ = '0.2.7'

import os

Expand Down
8 changes: 4 additions & 4 deletions pandas-ml-common/dev-requirements.frozen.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
ipykernel==5.5.5
ipykernel==6.2.0
lxml==4.6.3
Markdown==3.3.4
nbconvert==6.0.7
nbconvert==6.1.0
nbformat==5.1.3
requests==2.25.1
requests==2.26.0
scikit-learn==0.24.2
twine==3.4.1
twine==3.4.2
2 changes: 1 addition & 1 deletion pandas-ml-common/noxfile.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = '0.2.6'
__version__ = '0.2.7'

import os
import shutil
Expand Down
2 changes: 1 addition & 1 deletion pandas-ml-common/pandas_ml_common/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""Augment pandas DataFrame with methods for machine learning"""
__version__ = '0.2.0'
__version__ = '0.2.7'

import logging
from typing import Union, List, Callable, Any
Expand Down
13 changes: 13 additions & 0 deletions pandas-ml-common/pandas_ml_common/utils/serialization_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import traceback

import dill as pickle
import pandas as pd


def serialize(obj, filename):
Expand Down Expand Up @@ -75,3 +76,15 @@ def dict_to_str(d):
from sortedcontainers import SortedDict
return ",".join([f"{k}={v}" for k, v in SortedDict(d).items()])


def df_to_nested_dict(df: pd.DataFrame):
if isinstance(df.index, pd.MultiIndex) and df.index.nlevels > 1:
res = {}

keys = set(df.index.get_level_values(0))
for key in sorted(keys):
res[key] = df_to_nested_dict(df.loc[key])

return res
else:
return df.to_dict('records')
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from unittest import TestCase

import pandas as pd
import numpy as np

from pandas_ml_common.utils.serialization_utils import df_to_nested_dict


class TestSerializationUtils(TestCase):

def test_nested_dict(self):
df = pd.DataFrame({"a": np.random.rand(27)})
df.index = pd.MultiIndex.from_tuples([(a, b, c) for a in range(3) for b in range(3) for c in range(3)])
print(df_to_nested_dict(df))
2 changes: 1 addition & 1 deletion pandas-ml-common/requirements.frozen.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
dill==0.3.4
numpy==1.20.3
pandas==1.2.4
pandas==1.3.2
scikit-learn==0.24.2
sortedcontainers==2.4.0
2 changes: 1 addition & 1 deletion pandas-ml-common/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
dill
numpy>=1.20.2
numpy==1.20.*
pandas
scikit-learn
sortedcontainers
2 changes: 1 addition & 1 deletion pandas-ml-common/setup.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""Augment pandas DataFrame with methods for machine learning"""
__version__ = '0.2.6'
__version__ = '0.2.7'

import os
import re
Expand Down
2 changes: 2 additions & 0 deletions pandas-ml-quant-rl/pandas_ml_quant_rl/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
__version__ = '0.2.7'

from .environments import *
from .model.agent import Agent
2 changes: 1 addition & 1 deletion pandas-ml-quant-rl/setup.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""Augment pandas DataFrame with methods to fetch time series data for quant finance"""
__version__ = '0.2.6'
__version__ = '0.2.7'

import os

Expand Down
16 changes: 8 additions & 8 deletions pandas-ml-quant/dev-requirements.frozen.txt
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
ipykernel==5.5.5
jupyter-client==6.1.12
ipykernel==6.2.0
jupyter-client==7.0.1
lxml==4.6.3
Markdown==3.3.4
matplotlib==3.4.2
matplotlib==3.4.3
mplfinance==0.12.7a17
nbconvert==6.0.7
requests==2.25.1
TA-Lib==0.4.20
tox==3.23.1
twine==3.4.1
nbconvert==6.1.0
requests==2.26.0
TA-Lib==0.4.21
tox==3.24.3
twine==3.4.2
2 changes: 1 addition & 1 deletion pandas-ml-quant/noxfile.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = '0.2.6'
__version__ = '0.2.7'

import os
import shutil
Expand Down
2 changes: 1 addition & 1 deletion pandas-ml-quant/pandas_ml_quant/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""Augment pandas DataFrame with methods for quant analysis"""
__version__ = '0.2.0'
__version__ = '0.2.7'

import importlib

Expand Down
12 changes: 12 additions & 0 deletions pandas-ml-quant/pandas_ml_quant/empirical.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,14 @@ def __init__(self, x):
def hist(self, bins='sqrt'):
return np.histogram(self.x, bins=bins, density=True)

def extreme(self):
hist, edges = self.hist()
iext = np.argmax(hist)
return (edges[iext] + edges[iext + 1]) / 2

def std(self):
return np.std(self.x)

def confidence_interval(self, lower: float, upper: float) -> Tuple[float, float]:
# a[i-1] < v <= a[i]
li = np.searchsorted(self.probs, lower, side='right')
Expand All @@ -29,6 +37,10 @@ def get_val(idx, prob):

return get_val(li, lower), get_val(ri, upper)

def confidence_band_width(self, lower: float, upper: float) -> float:
l, u = self.confidence_interval(lower, upper)
return (u - l) / u

def heat_bar(self, bins=21) -> Tuple[np.ndarray, np.ndarray]:
# return a 2D array [value, mass]
mass, edges = np.histogram(self.x, bins=bins, density=True)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -306,8 +306,16 @@ def calc_scores(self, *args, **kwargs):
dfcdf = self.cdf.to_frame().dropna()
idx = dfcdf.index.intersection(self.label_returns.index)
dfl = self.label_returns.loc[idx]
dfpp = self.cdf[idx].apply(lambda cdf: cdf.extreme())
mean_std = self.cdf[idx].apply(lambda cdf: cdf.std()).mean()
nr_events = len(dfcdf)

direction_correct_ratio = \
(((dfl.values.squeeze() > 0) & (dfpp.values.squeeze() > 0)) | ((dfl.values.squeeze() < 0) & (dfpp.values.squeeze() < 0))).sum() / len(dfl)

corr, _ = pearsonr(dfl.values.flatten(), dfpp.values.flatten())
r2 = r2_score(dfl, dfpp)

tail_events = dfcdf.join(dfl).apply(
lambda x: x.iloc[0].is_tail_event(x.iloc[1], self.left_confidence, self.right_confidence),
axis=1,
Expand All @@ -324,11 +332,22 @@ def calc_scores(self, *args, **kwargs):
axis=1,
result_type='expand').mean()

# how wide is the confidence interval, the smaller the better
band_width = dfcdf.apply(
lambda cdf: cdf.iloc[0].confidence_band_width(self.left_confidence, self.right_confidence),
axis=1,
result_type='expand').mean()

return pd.DataFrame({
"first date": [dfcdf.index[0]],
"last date": [dfcdf.index[-1]],
"events": [nr_events],
"direction correct ratio of extreme": [direction_correct_ratio],
"correlation of extreme": [corr],
"r^2 of extreme": [r2],
"mean(σ)": [mean_std],
f"confidence (exp: {self.expected_confidence:.2f} %)": [1 - tail_events.values.sum().item() / nr_events],
"conf width": [band_width],
"left tail avg. distance %": [np.abs(distance.iloc[0])],
"right tail avg. distance %": [distance.iloc[1]],
"left tail events %": [tail_events.iloc[:, 0].sum() / nr_events],
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from unittest import TestCase

from pandas_ml_common.decorator import MultiFrameDecorator
from pandas_ml_quant_test.config import DF_TEST
from pandas_ml_utils import PostProcessedFeaturesAndLabels, Constant, np
from pandas_ml_utils.ml.data.extraction.features_and_labels_extractor import FeaturesWithLabels
Expand Down Expand Up @@ -61,6 +62,24 @@ def test_feature_and_label_post_processing(self):
self.assertEqual((6672, 4), fl.labels.shape)
# FIXME implement chained lagging: self.assertEqual((6674, 3, 5, 2), fl.features_with_required_samples.features._.values.shape)

def test_empty_post_prcessor(self):
df = DF_TEST.copy()

fl: FeaturesWithLabels = df._.extract(
PostProcessedFeaturesAndLabels(
features=[
lambda df: df["Close"].ta.log_returns(),
],
feature_post_processor=[],
labels=[Constant(0)],
)
)

f = fl.features_with_required_samples.features

self.assertEqual((6762, 1), f.shape)
np.testing.assert_array_almost_equal(df[["Close"]].ta.log_returns().dropna().values, f.values)

def test_post_row_standardisation(self):
df = DF_TEST.copy()

Expand All @@ -81,11 +100,52 @@ def test_post_row_standardisation(self):

f = fl.features_with_required_samples.features

self.assertEqual((6659, 20 * 3), f.shape)
self.assertAlmostEqual(1, f.max(axis=1).values.max())
self.assertAlmostEqual(0, f.min(axis=1).values.max())
self.assertEqual(
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55,
56, 57, 58, 59},
set(f.apply(np.argmax, axis=1).values)
)
)

def test_multiple_features_post_processing(self):
df = DF_TEST.copy()

fl: FeaturesWithLabels = df._.extract(
PostProcessedFeaturesAndLabels(
features=(
[lambda df: df["Close"].ta.log_returns(), lambda df: df["Close"].ta.trix(), lambda df: df["Close"].ta.rsi()],
[lambda df: df["Close"].ta.rsi()],
),
feature_post_processor=(
[lambda df: df.ta.rnn(20), lambda df: df.ta.normalize_row('minmax01', level=1)],
[lambda df: df.ta.rnn(10)],
),
labels=[Constant(0)],
)
)

f = fl.features_with_required_samples.features
self.assertIsInstance(f, MultiFrameDecorator)

a, b = f.frames()

# test a
self.assertEqual((6659, 20 * 3), a.shape)
self.assertAlmostEqual(1, a.max(axis=1).values.max())
self.assertAlmostEqual(0, a.min(axis=1).values.max())
self.assertEqual(
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55,
56, 57, 58, 59},
set(a.apply(np.argmax, axis=1).values)
)

# test b
self.assertEqual((6659, 10), b.shape)
np.testing.assert_array_almost_equal(
b.values[-1],
[0.580079, 0.561797, 0.501477, 0.58181 , 0.716154, 0.789371, 0.762768, 0.74797 , 0.687273, 0.666173]
)
2 changes: 2 additions & 0 deletions pandas-ml-quant/pandas_ml_quant_test/test__empirical.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,5 @@ def test_empirical_probs(self):

np.testing.assert_array_almost_equal(ecdf.heat_bar(6)[0], np.array([1., 6.]))
np.testing.assert_array_almost_equal(ecdf.heat_bar(6)[1], np.array([0.12, 0.24, 0.36, 0.24, 0.12, 0.12]))

self.assertEqual(ecdf.confidence_band_width(0.1, 0.9), (6. - 2.) / 6.)
12 changes: 6 additions & 6 deletions pandas-ml-quant/requirements.frozen.txt
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
arch==4.19
arch==5.0.1
bintrees==2.2.0
colorama==0.4.4
cvxopt==1.2.6
cvxpy==1.1.13
cvxpy==1.1.15
ipywidgets==7.6.3
matplotlib==3.4.2
matplotlib==3.4.3
mgarch-setup-fix==0.2.0
MiniSom==2.2.9
mlxtend==0.18.0
moviepy==1.0.3
numba==0.53.1
numba==0.54.0
numpy==1.20.3
pandas==1.2.4
pandas==1.3.2
pyts==0.11.0
PyWavelets==1.1.1
qpsolvers==1.6.1
seaborn==0.11.1
seaborn==0.11.2
sortedcontainers==2.4.0
2 changes: 1 addition & 1 deletion pandas-ml-quant/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ mgarch-setup-fix
mlxtend
moviepy
numba
numpy
numpy==1.20.*
pandas
pyts
PyWavelets
Expand Down
2 changes: 1 addition & 1 deletion pandas-ml-quant/setup.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""Augment pandas DataFrame with methods for quant analysis"""
__version__ = '0.2.6'
__version__ = '0.2.7'
import os
import re

Expand Down
2 changes: 1 addition & 1 deletion pandas-ml-utils-tf/setup.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""Augment pandas DataFrame with methods for machine learning"""
__version__ = '0.2.6'
__version__ = '0.2.7'

import os
from setuptools import setup, find_packages
Expand Down
Loading

0 comments on commit 14fdef8

Please sign in to comment.