Skip to content

Commit

Permalink
SEOD-1326. Bump fireant to pandas version 2 (#366)
Browse files Browse the repository at this point in the history
* SEOD-1326. Bump fireant to pandas version 2

- Bump fireant version to 8.0.0
- Drop support for Python3.7 as pandas v2 does not support it
- Add Python3.9 support
- Use the newest `pandas` (2.0.3 in requirements) and add the minimum version 2.0.0 of `pandas` to pyproject.toml
- Use the newest `vertica-python` (1.3.4 in requirements) and add the minimum version 1.0.0 of `vertica-python`  to pyproject.toml
- Use the newest `snowflake-connector-python` (3.0.4 in requirements) and add the minimum version 3.0.0 of `snowflake-connector-python` to pyproject.toml
- Use the newest `coverage` (7.3.0 in requirements) and add the minimum version 7.3.0 of `coverage` to pyproject.toml
- Use the newest `watchdog` (3.0.0 in requirements) and add the minimum version 3.0.0 of `watchdiog` to pyproject.toml
- Remove `python-dateutil` from dependencies as it is part of other libraries' dependencies
- Bump `psycopg-binary==2.9.6` though it seems not needed for the tests
- Bump `pymssql==2.2.7` though it seems not needed for the tests
- Bump `Cython==3.0.0` though it seems not needed for the tests
- Get rid of `SyntaxWarning: "is" with a literal. Did you mean "=="?`
- Get rid of `DeprecationWarning: Using or importing the ABCs from 'collections' instead of from 'collections.abc' is deprecated since Python 3.3, and in 3.10 it will stop working`
- Replace `Dataframe.append` to `pd.concat` because `append` does not exist since pandas v2
- Add `group_keys=False` to `DataFrame.groupby()` method because it is no longer ignored since pandas v1.5
- Fix `_apply_share` method for `Share`s with the new libraries.
- Rename `TestDatabase` to `MockDatabase` since it is used only for mocking. This is beneficial also because Python testing method will not delve into it to find methods to run
- Rename `test_connect` and `test_fetch` to `mock_connect` and `mock_fetch` as these are mocks. This is beneficial also because Python testing method will not delve into it to find methods to run
- Rename `TestMySQLDatabase` to `MockMySQLDatabase` for the same reason
- When concatenating `DataFrames`, use `.tail(1)` instead of `.iloc[-1]` as it includes indexes
- Use static CSVs to get the expected `DataFrames` in tests instead of applying methods of `fireant` to a `DataFrame` to get those expected `DataFrames`
- Replace `np.float` to `float` since it was deprecated
- Get rid of `None` and `[]` as `ascending` parameters for `Pandas` class
- Replace `.iteritems()` with `.items()` as the former method was deprecated
  • Loading branch information
AzisK committed Sep 5, 2023
1 parent f10a5c0 commit 4a1216f
Show file tree
Hide file tree
Showing 11 changed files with 57 additions and 36 deletions.
5 changes: 3 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
2023 July

#### [7.10.0] - 2023-08-29
- Bump fireant version to 7.10.0

#### [8.0.0] - 2023-08-29
- Bump fireant version to 8.0.0
- Drop support for Python3.7 as pandas v2 does not support it
- Add Python3.9 support
- Use the newest `pandas` (2.0.3 in requirements) and add the minimum version 2.0.0 of `pandas` to pyproject.toml
Expand Down
2 changes: 1 addition & 1 deletion fireant/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,4 +55,4 @@ def __hash__(self) -> int:
Term.__hash__ = __hash__


__version__ = "7.9.0"
__version__ = "8.0.0"
8 changes: 6 additions & 2 deletions fireant/database/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,11 @@ def make_slicer_query_with_totals_and_references(
)

for reference_parts, references in reference_groups_and_none:
(dimensions_with_ref, metrics_with_ref, filters_with_ref,) = self.adapt_for_reference_query(
(
dimensions_with_ref,
metrics_with_ref,
filters_with_ref,
) = self.adapt_for_reference_query(
reference_parts,
dimensions_with_totals,
metrics,
Expand Down Expand Up @@ -301,7 +305,7 @@ def make_slicer_query(
# In the case that the orders are determined by a field that is not selected as a metric or dimension, then it needs
# to be added to the query.
select_aliases = {el.alias for el in query._selects}
for (orderby_field, orientation) in orders:
for orderby_field, orientation in orders:
orderby_term = self.transform_field_to_query(orderby_field)
query = query.orderby(orderby_term, order=orientation)

Expand Down
5 changes: 2 additions & 3 deletions fireant/queries/builder/dataset_blender_query_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ def _deepcopy_recursive(node):
if hasattr(node, '_cases'):
cloned_cases = []

for (criterion, value) in cloned_node._cases:
for criterion, value in cloned_node._cases:
cloned_cases.append((_deepcopy_recursive(criterion), _deepcopy_recursive(value)))

cloned_node._cases = cloned_cases
Expand Down Expand Up @@ -423,7 +423,6 @@ def sql(self):
# First determine the metrics. If a a metric is requested, and the dataset has it, add it for that dataset.
# We include metrics used in filters. We also save for each dataset the mapped metrics and filters
for dataset_index, dataset in enumerate(datasets):

dataset_metrics.append(
map_blender_fields_to_dataset_fields(
selected_metrics_as_dataset_fields,
Expand Down Expand Up @@ -478,7 +477,7 @@ def sql(self):
for dimension_dataset_info in dimensions_dataset_info:
dimension_accounted_for = False
first_dataset_that_has_the_dimension = None
for (dataset_index, mapped_dimension, is_selected_dimension) in dimension_dataset_info:
for dataset_index, mapped_dimension, is_selected_dimension in dimension_dataset_info:
# If the dataset is already part of the final query, add this dimension
if dataset_included_in_final_query[dataset_index]:
dimension_accounted_for = True
Expand Down
16 changes: 13 additions & 3 deletions fireant/tests/dataset/mocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -728,13 +728,19 @@ def _totals(df):

dimx2_date_str_totalsx2_share_over_first_series = pd.read_csv(
os.path.dirname(os.path.realpath(__file__)) + "/mocks/dimx2_date_str_totalsx2_share_over_first_df.csv",
index_col=['$timestamp', '$political_party',],
index_col=[
'$timestamp',
'$political_party',
],
parse_dates=['$timestamp'],
).squeeze()

dimx2_date_str_totalsx2_share_over_second_series = pd.read_csv(
os.path.dirname(os.path.realpath(__file__)) + "/mocks/dimx2_date_str_totalsx2_share_over_second_df.csv",
index_col=['$timestamp', '$political_party',],
index_col=[
'$timestamp',
'$political_party',
],
parse_dates=['$timestamp'],
).squeeze()

Expand All @@ -744,7 +750,11 @@ def _totals(df):

dimx3_date_str_str_totals_df = pd.read_csv(
os.path.dirname(os.path.realpath(__file__)) + "/mocks/dimx3_date_str_str_totals_df.csv",
index_col=['$timestamp', '$political_party', '$state',],
index_col=[
'$timestamp',
'$political_party',
'$state',
],
parse_dates=['$timestamp'],
)

Expand Down
39 changes: 21 additions & 18 deletions fireant/tests/dataset/test_execution.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,10 +288,12 @@ def test_reduce_single_result_set_with_str_dimension(self):
pandas.testing.assert_frame_equal(expected, result)

def test_reduce_single_result_set_with_dimx2_date_str_totals_date(self):
expected = pd.concat([
dimx2_date_str_totalsx2_df.loc[(slice(None), slice("Democrat", "Republican")), :],
dimx2_date_str_totalsx2_df.tail(1),
])
expected = pd.concat(
[
dimx2_date_str_totalsx2_df.loc[(slice(None), slice("Democrat", "Republican")), :],
dimx2_date_str_totalsx2_df.tail(1),
]
)

raw_df = replace_totals(dimx2_date_str_df)
totals_df = pd.merge(
Expand Down Expand Up @@ -351,10 +353,12 @@ def test_reduce_single_result_set_with_dimx2_date_str_str_totals_date(self):
pandas.testing.assert_frame_equal(expected, result)

def test_reduce_single_result_set_with_date_str_str_dimensions_str1_totals(self):
expected = pd.concat([
dimx3_date_str_str_totalsx3_df.loc[(slice(None), slice(None), slice("California", "Texas")), :],
dimx3_date_str_str_totalsx3_df.loc[(slice(None), "~~totals"), :].iloc[:-1],
]).sort_index()
expected = pd.concat(
[
dimx3_date_str_str_totalsx3_df.loc[(slice(None), slice(None), slice("California", "Texas")), :],
dimx3_date_str_str_totalsx3_df.loc[(slice(None), "~~totals"), :].iloc[:-1],
]
).sort_index()

raw_df = replace_totals(dimx3_date_str_str_df)
totals_df = raw_df.groupby("$timestamp").sum().reset_index()
Expand Down Expand Up @@ -408,18 +412,17 @@ def test_reduce_single_result_set_with_date_str_str_dimensions_str1_totals_with_
nulls_totals[index_names[1]] = "~~totals"
nulls_totals[index_names[2]] = "~~totals"

expected = pd.concat([
dimx3_date_str_str_totalsx3_df.loc[(slice(None), slice(None), slice("1", "2")), :],
dimx3_date_str_str_totalsx3_df.loc[(slice(None), "~~totals"), :].tail(1),
nulls.set_index(index_names),
nulls_totals.set_index(index_names),
]).sort_index()
expected = pd.concat(
[
dimx3_date_str_str_totalsx3_df.loc[(slice(None), slice(None), slice("1", "2")), :],
dimx3_date_str_str_totalsx3_df.loc[(slice(None), "~~totals"), :].tail(1),
nulls.set_index(index_names),
nulls_totals.set_index(index_names),
]
).sort_index()

raw_df = replace_totals(dimx3_date_str_str_df)
raw_df = pd.concat([
nulls,
raw_df
]).sort_values(["$timestamp", "$political_party", "$state"])
raw_df = pd.concat([nulls, raw_df]).sort_values(["$timestamp", "$political_party", "$state"])

totals_df = raw_df.groupby("$timestamp").sum().reset_index()
null_totals_df = pd.DataFrame([raw_df[raw_df["$timestamp"].isnull()][metrics].sum()])
Expand Down
10 changes: 6 additions & 4 deletions fireant/tests/dataset/test_filter_totals_from_share_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,10 +82,12 @@ def test_do_not_remove_totals_for_rollup_dimensions_with_multiindex_and_higher_d
dimx2_date_str_totalsx2_df, [Rollup(mock_dataset.fields.timestamp), mock_dataset.fields.political_party]
)

expected = pd.concat([
dimx2_date_str_totalsx2_df.loc[(slice(None), slice('Democrat', 'Republican')), :],
dimx2_date_str_totalsx2_df.tail(1),
])
expected = pd.concat(
[
dimx2_date_str_totalsx2_df.loc[(slice(None), slice('Democrat', 'Republican')), :],
dimx2_date_str_totalsx2_df.tail(1),
]
)

pandas.testing.assert_frame_equal(result, expected)

Expand Down
1 change: 1 addition & 0 deletions fireant/tests/queries/test_build_sets.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
],
)


# noinspection SqlDialectInspection,SqlNoDataSourceInspection
class ResultSetTests(TestCase):
maxDiff = None
Expand Down
1 change: 0 additions & 1 deletion fireant/tests/widgets/test_matplotlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,5 @@ def test_single_metric_line_chart(self):

self.assertEqual(1, len(result))


except ImportError:
pass
4 changes: 3 additions & 1 deletion fireant/tests/widgets/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -573,7 +573,9 @@ def test_use_first_value_for_ascending_when_arg_has_invalid_length(self):

def test_use_pandas_default_for_ascending_when_arg_empty_list(self):
result = Pandas(
mock_dataset.fields.votes, pivot=[mock_dataset.fields.political_party], sort=[0, 2],
mock_dataset.fields.votes,
pivot=[mock_dataset.fields.political_party],
sort=[0, 2],
).transform(dimx2_date_str_df, [mock_dataset.fields.timestamp, mock_dataset.fields.political_party], [])

expected = dimx2_date_str_df.copy()[[f('votes')]]
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "fireant"
version = "7.10.0"
version = "8.0.0"
description = ""
authors = ["Ąžuolas Krušna <[email protected]>"]
readme = "README.rst"
Expand Down

0 comments on commit 4a1216f

Please sign in to comment.