Skip to content

Commit

Permalink
Merge pull request #6 from sjoshistrats/feature/benchmarks
Browse files Browse the repository at this point in the history
Add benchmarks
  • Loading branch information
sjoshistrats committed Apr 16, 2022
2 parents 4155195 + dcb13e0 commit 2f01b83
Show file tree
Hide file tree
Showing 4 changed files with 85 additions and 14 deletions.
1 change: 1 addition & 0 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ jobs:
- run:
command: | # Run tests
pipenv install pytest
pipenv install pytest-benchmark
pipenv run pytest python/fastgrouper/test/ -s -vv
workflows:
build_test:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import pytest
import numpy as np
from numpy.testing import assert_almost_equal

Expand All @@ -14,37 +15,37 @@ def foobar_op(x, y):
return np.mean(x + y)

def test_arr_grouped():

# Prepare example
arr_grpr = fastgrouper.arr.Grouped(GIDS)

# Check apply with positional args
result = arr_grpr.apply(foobar_op, XVALS, YVALS)

# Ensure returned result is a numpy array
assert isinstance(result, np.ndarray)

# Check values
assert_almost_equal(result, np.array(EXPECTED_APPLY))

# Check values when using apply with keyword args
result = arr_grpr.apply(foobar_op, XVALS, y=YVALS)
assert_almost_equal(result, np.array(EXPECTED_APPLY))

# Check values when using apply_expand
result = arr_grpr.apply_expand(foobar_op, XVALS, y=YVALS)
assert_almost_equal(result, np.array(EXPECTED_APPLY_EXPAND))

def test_li_grouped():

# Prepare example
li_grpr = fastgrouper.li.Grouped(GIDS)

# Check apply with keyword args
result = li_grpr.apply(foobar_op, XVALS, y=YVALS)

# Ensure returned result is a list
assert isinstance(result, list)

# Check values
assert_almost_equal(result, EXPECTED_APPLY)
assert_almost_equal(result, EXPECTED_APPLY)
66 changes: 66 additions & 0 deletions python/fastgrouper/test/test_grouped_benchmark.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import pytest
import pandas as pd
import numpy as np
from numpy.testing import assert_almost_equal

import fastgrouper.arr
import fastgrouper.li

# Create assymetrical groups
XVALS = np.linspace(500, 1200, 50000)
YVALS = np.linspace(-230, 177.3, 50000)
GIDS = np.tile(np.arange(500), 100)
GIDS[[14, 19, 230, 87]] = 4
GIDS[[345, 1270, 63, 1287]] = 12

def beepbop(x, y):
return np.min(np.abs(np.sin(x) + np.sin(y)))

def test_fastgrouper_arr_slice_apply_benchmark(benchmark):
arr_grpr = fastgrouper.arr.Grouped(GIDS)
idx = pd.Index(arr_grpr.dedup_gids, name="gids")

def apply_fn():
result = arr_grpr.apply(beepbop, XVALS, YVALS)

# Sorting to make a more fair comparison against pure pandas benchmark
return pd.Series(result, index=idx).sort_index()

benchmark(apply_fn)

def test_pure_pandas_slice_apply_benchmark(benchmark):
df = pd.DataFrame({
"gids": GIDS,
"xvals": XVALS,
"yvals": YVALS
})
pdgrpd = df.groupby("gids")

def apply_fn(r):
return beepbop(r["xvals"].values, r["yvals"].values)

benchmark(pdgrpd.apply, apply_fn)

def test_fastgrouper_all_steps_benchmark(benchmark):
def apply_fn():
arr_grpr = fastgrouper.arr.Grouped(GIDS)
idx = pd.Index(arr_grpr.dedup_gids, name="gids")
result = arr_grpr.apply(beepbop, XVALS, YVALS)

# Sorting to make a more fair comparison against pure pandas benchmark
return pd.Series(result, index=idx).sort_index()

benchmark(apply_fn)

def test_pure_pandas_all_steps_benchmark(benchmark):
df = pd.DataFrame({
"gids": GIDS,
"xvals": XVALS,
"yvals": YVALS
})
pdgrpd = df.groupby("gids")

def apply_fn():
return df.groupby("gids").apply(lambda r: beepbop(r["xvals"].values, r["yvals"].values))

benchmark(apply_fn)
7 changes: 5 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

setuptools.setup(
name="fastgrouper",
version="0.1.1",
version="0.1.2",
author="Shreyas Joshi",
author_email="[email protected]",
description="A package for applying efficient groupby operations.",
Expand All @@ -26,5 +26,8 @@
package_dir={"": "python"},
packages=setuptools.find_packages(where="python"),
python_requires=">=3.6",
install_requires=["numpy", "pandas"]
install_requires=["numpy", "pandas"],
extras_require={
"testing": ["pytest", "pytest-benchmark"]
}
)

0 comments on commit 2f01b83

Please sign in to comment.