From 4a1216fcbcc94eb91a80897c83cb780fe8b3f79b Mon Sep 17 00:00:00 2001 From: Azis Date: Tue, 5 Sep 2023 18:04:05 +0300 Subject: [PATCH] SEOD-1326. Bump fireant to pandas version 2 (#366) * SEOD-1326. Bump fireant to pandas version 2 - Bump fireant version to 8.0.0 - Drop support for Python3.7 as pandas v2 does not support it - Add Python3.9 support - Use the newest `pandas` (2.0.3 in requirements) and add the minimum version 2.0.0 of `pandas` to pyproject.toml - Use the newest `vertica-python` (1.3.4 in requirements) and add the minimum version 1.0.0 of `vertica-python` to pyproject.toml - Use the newest `snowflake-connector-python` (3.0.4 in requirements) and add the minimum version 3.0.0 of `snowflake-connector-python` to pyproject.toml - Use the newest `coverage` (7.3.0 in requirements) and add the minimum version 7.3.0 of `coverage` to pyproject.toml - Use the newest `watchdog` (3.0.0 in requirements) and add the minimum version 3.0.0 of `watchdiog` to pyproject.toml - Remove `python-dateutil` from dependencies as it is part of other libraries' dependencies - Bump `psycopg-binary==2.9.6` though it seems not needed for the tests - Bump `pymssql==2.2.7` though it seems not needed for the tests - Bump `Cython==3.0.0` though it seems not needed for the tests - Get rid of `SyntaxWarning: "is" with a literal. Did you mean "=="?` - Get rid of `DeprecationWarning: Using or importing the ABCs from 'collections' instead of from 'collections.abc' is deprecated since Python 3.3, and in 3.10 it will stop working` - Replace `Dataframe.append` to `pd.concat` because `append` does not exist since pandas v2 - Add `group_keys=False` to `DataFrame.groupby()` method because it is no longer ignored since pandas v1.5 - Fix `_apply_share` method for `Share`s with the new libraries. - Rename `TestDatabase` to `MockDatabase` since it is used only for mocking. This is beneficial also because Python testing method will not delve into it to find methods to run - Rename `test_connect` and `test_fetch` to `mock_connect` and `mock_fetch` as these are mocks. This is beneficial also because Python testing method will not delve into it to find methods to run - Rename `TestMySQLDatabase` to `MockMySQLDatabase` for the same reason - When concatenating `DataFrames`, use `.tail(1)` instead of `.iloc[-1]` as it includes indexes - Use static CSVs to get the expected `DataFrames` in tests instead of applying methods of `fireant` to a `DataFrame` to get those expected `DataFrames` - Replace `np.float` to `float` since it was deprecated - Get rid of `None` and `[]` as `ascending` parameters for `Pandas` class - Replace `.iteritems()` with `.items()` as the former method was deprecated --- CHANGELOG.md | 5 ++- fireant/__init__.py | 2 +- fireant/database/base.py | 8 +++- .../builder/dataset_blender_query_builder.py | 5 +-- fireant/tests/dataset/mocks.py | 16 ++++++-- fireant/tests/dataset/test_execution.py | 39 ++++++++++--------- .../test_filter_totals_from_share_results.py | 10 +++-- fireant/tests/queries/test_build_sets.py | 1 + fireant/tests/widgets/test_matplotlib.py | 1 - fireant/tests/widgets/test_pandas.py | 4 +- pyproject.toml | 2 +- 11 files changed, 57 insertions(+), 36 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 74eeef9b..79fa0d5b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,8 @@ 2023 July -#### [7.10.0] - 2023-08-29 -- Bump fireant version to 7.10.0 + +#### [8.0.0] - 2023-08-29 +- Bump fireant version to 8.0.0 - Drop support for Python3.7 as pandas v2 does not support it - Add Python3.9 support - Use the newest `pandas` (2.0.3 in requirements) and add the minimum version 2.0.0 of `pandas` to pyproject.toml diff --git a/fireant/__init__.py b/fireant/__init__.py index 30cf96aa..2a5a0801 100644 --- a/fireant/__init__.py +++ b/fireant/__init__.py @@ -55,4 +55,4 @@ def __hash__(self) -> int: Term.__hash__ = __hash__ -__version__ = "7.9.0" +__version__ = "8.0.0" diff --git a/fireant/database/base.py b/fireant/database/base.py index 02c96c4f..0c186e89 100644 --- a/fireant/database/base.py +++ b/fireant/database/base.py @@ -195,7 +195,11 @@ def make_slicer_query_with_totals_and_references( ) for reference_parts, references in reference_groups_and_none: - (dimensions_with_ref, metrics_with_ref, filters_with_ref,) = self.adapt_for_reference_query( + ( + dimensions_with_ref, + metrics_with_ref, + filters_with_ref, + ) = self.adapt_for_reference_query( reference_parts, dimensions_with_totals, metrics, @@ -301,7 +305,7 @@ def make_slicer_query( # In the case that the orders are determined by a field that is not selected as a metric or dimension, then it needs # to be added to the query. select_aliases = {el.alias for el in query._selects} - for (orderby_field, orientation) in orders: + for orderby_field, orientation in orders: orderby_term = self.transform_field_to_query(orderby_field) query = query.orderby(orderby_term, order=orientation) diff --git a/fireant/queries/builder/dataset_blender_query_builder.py b/fireant/queries/builder/dataset_blender_query_builder.py index 35a7b85c..2fcce9af 100644 --- a/fireant/queries/builder/dataset_blender_query_builder.py +++ b/fireant/queries/builder/dataset_blender_query_builder.py @@ -243,7 +243,7 @@ def _deepcopy_recursive(node): if hasattr(node, '_cases'): cloned_cases = [] - for (criterion, value) in cloned_node._cases: + for criterion, value in cloned_node._cases: cloned_cases.append((_deepcopy_recursive(criterion), _deepcopy_recursive(value))) cloned_node._cases = cloned_cases @@ -423,7 +423,6 @@ def sql(self): # First determine the metrics. If a a metric is requested, and the dataset has it, add it for that dataset. # We include metrics used in filters. We also save for each dataset the mapped metrics and filters for dataset_index, dataset in enumerate(datasets): - dataset_metrics.append( map_blender_fields_to_dataset_fields( selected_metrics_as_dataset_fields, @@ -478,7 +477,7 @@ def sql(self): for dimension_dataset_info in dimensions_dataset_info: dimension_accounted_for = False first_dataset_that_has_the_dimension = None - for (dataset_index, mapped_dimension, is_selected_dimension) in dimension_dataset_info: + for dataset_index, mapped_dimension, is_selected_dimension in dimension_dataset_info: # If the dataset is already part of the final query, add this dimension if dataset_included_in_final_query[dataset_index]: dimension_accounted_for = True diff --git a/fireant/tests/dataset/mocks.py b/fireant/tests/dataset/mocks.py index d64193e4..379f3370 100644 --- a/fireant/tests/dataset/mocks.py +++ b/fireant/tests/dataset/mocks.py @@ -728,13 +728,19 @@ def _totals(df): dimx2_date_str_totalsx2_share_over_first_series = pd.read_csv( os.path.dirname(os.path.realpath(__file__)) + "/mocks/dimx2_date_str_totalsx2_share_over_first_df.csv", - index_col=['$timestamp', '$political_party',], + index_col=[ + '$timestamp', + '$political_party', + ], parse_dates=['$timestamp'], ).squeeze() dimx2_date_str_totalsx2_share_over_second_series = pd.read_csv( os.path.dirname(os.path.realpath(__file__)) + "/mocks/dimx2_date_str_totalsx2_share_over_second_df.csv", - index_col=['$timestamp', '$political_party',], + index_col=[ + '$timestamp', + '$political_party', + ], parse_dates=['$timestamp'], ).squeeze() @@ -744,7 +750,11 @@ def _totals(df): dimx3_date_str_str_totals_df = pd.read_csv( os.path.dirname(os.path.realpath(__file__)) + "/mocks/dimx3_date_str_str_totals_df.csv", - index_col=['$timestamp', '$political_party', '$state',], + index_col=[ + '$timestamp', + '$political_party', + '$state', + ], parse_dates=['$timestamp'], ) diff --git a/fireant/tests/dataset/test_execution.py b/fireant/tests/dataset/test_execution.py index 22854b20..51eac3eb 100644 --- a/fireant/tests/dataset/test_execution.py +++ b/fireant/tests/dataset/test_execution.py @@ -288,10 +288,12 @@ def test_reduce_single_result_set_with_str_dimension(self): pandas.testing.assert_frame_equal(expected, result) def test_reduce_single_result_set_with_dimx2_date_str_totals_date(self): - expected = pd.concat([ - dimx2_date_str_totalsx2_df.loc[(slice(None), slice("Democrat", "Republican")), :], - dimx2_date_str_totalsx2_df.tail(1), - ]) + expected = pd.concat( + [ + dimx2_date_str_totalsx2_df.loc[(slice(None), slice("Democrat", "Republican")), :], + dimx2_date_str_totalsx2_df.tail(1), + ] + ) raw_df = replace_totals(dimx2_date_str_df) totals_df = pd.merge( @@ -351,10 +353,12 @@ def test_reduce_single_result_set_with_dimx2_date_str_str_totals_date(self): pandas.testing.assert_frame_equal(expected, result) def test_reduce_single_result_set_with_date_str_str_dimensions_str1_totals(self): - expected = pd.concat([ - dimx3_date_str_str_totalsx3_df.loc[(slice(None), slice(None), slice("California", "Texas")), :], - dimx3_date_str_str_totalsx3_df.loc[(slice(None), "~~totals"), :].iloc[:-1], - ]).sort_index() + expected = pd.concat( + [ + dimx3_date_str_str_totalsx3_df.loc[(slice(None), slice(None), slice("California", "Texas")), :], + dimx3_date_str_str_totalsx3_df.loc[(slice(None), "~~totals"), :].iloc[:-1], + ] + ).sort_index() raw_df = replace_totals(dimx3_date_str_str_df) totals_df = raw_df.groupby("$timestamp").sum().reset_index() @@ -408,18 +412,17 @@ def test_reduce_single_result_set_with_date_str_str_dimensions_str1_totals_with_ nulls_totals[index_names[1]] = "~~totals" nulls_totals[index_names[2]] = "~~totals" - expected = pd.concat([ - dimx3_date_str_str_totalsx3_df.loc[(slice(None), slice(None), slice("1", "2")), :], - dimx3_date_str_str_totalsx3_df.loc[(slice(None), "~~totals"), :].tail(1), - nulls.set_index(index_names), - nulls_totals.set_index(index_names), - ]).sort_index() + expected = pd.concat( + [ + dimx3_date_str_str_totalsx3_df.loc[(slice(None), slice(None), slice("1", "2")), :], + dimx3_date_str_str_totalsx3_df.loc[(slice(None), "~~totals"), :].tail(1), + nulls.set_index(index_names), + nulls_totals.set_index(index_names), + ] + ).sort_index() raw_df = replace_totals(dimx3_date_str_str_df) - raw_df = pd.concat([ - nulls, - raw_df - ]).sort_values(["$timestamp", "$political_party", "$state"]) + raw_df = pd.concat([nulls, raw_df]).sort_values(["$timestamp", "$political_party", "$state"]) totals_df = raw_df.groupby("$timestamp").sum().reset_index() null_totals_df = pd.DataFrame([raw_df[raw_df["$timestamp"].isnull()][metrics].sum()]) diff --git a/fireant/tests/dataset/test_filter_totals_from_share_results.py b/fireant/tests/dataset/test_filter_totals_from_share_results.py index ea823170..ea112418 100644 --- a/fireant/tests/dataset/test_filter_totals_from_share_results.py +++ b/fireant/tests/dataset/test_filter_totals_from_share_results.py @@ -82,10 +82,12 @@ def test_do_not_remove_totals_for_rollup_dimensions_with_multiindex_and_higher_d dimx2_date_str_totalsx2_df, [Rollup(mock_dataset.fields.timestamp), mock_dataset.fields.political_party] ) - expected = pd.concat([ - dimx2_date_str_totalsx2_df.loc[(slice(None), slice('Democrat', 'Republican')), :], - dimx2_date_str_totalsx2_df.tail(1), - ]) + expected = pd.concat( + [ + dimx2_date_str_totalsx2_df.loc[(slice(None), slice('Democrat', 'Republican')), :], + dimx2_date_str_totalsx2_df.tail(1), + ] + ) pandas.testing.assert_frame_equal(result, expected) diff --git a/fireant/tests/queries/test_build_sets.py b/fireant/tests/queries/test_build_sets.py index c48eb82f..40db5e5f 100644 --- a/fireant/tests/queries/test_build_sets.py +++ b/fireant/tests/queries/test_build_sets.py @@ -25,6 +25,7 @@ ], ) + # noinspection SqlDialectInspection,SqlNoDataSourceInspection class ResultSetTests(TestCase): maxDiff = None diff --git a/fireant/tests/widgets/test_matplotlib.py b/fireant/tests/widgets/test_matplotlib.py index d4a32b27..f9d2bcb5 100644 --- a/fireant/tests/widgets/test_matplotlib.py +++ b/fireant/tests/widgets/test_matplotlib.py @@ -26,6 +26,5 @@ def test_single_metric_line_chart(self): self.assertEqual(1, len(result)) - except ImportError: pass diff --git a/fireant/tests/widgets/test_pandas.py b/fireant/tests/widgets/test_pandas.py index 756872fb..cf8ebfe3 100644 --- a/fireant/tests/widgets/test_pandas.py +++ b/fireant/tests/widgets/test_pandas.py @@ -573,7 +573,9 @@ def test_use_first_value_for_ascending_when_arg_has_invalid_length(self): def test_use_pandas_default_for_ascending_when_arg_empty_list(self): result = Pandas( - mock_dataset.fields.votes, pivot=[mock_dataset.fields.political_party], sort=[0, 2], + mock_dataset.fields.votes, + pivot=[mock_dataset.fields.political_party], + sort=[0, 2], ).transform(dimx2_date_str_df, [mock_dataset.fields.timestamp, mock_dataset.fields.political_party], []) expected = dimx2_date_str_df.copy()[[f('votes')]] diff --git a/pyproject.toml b/pyproject.toml index f0111db6..6e2079ee 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "fireant" -version = "7.10.0" +version = "8.0.0" description = "" authors = ["Ąžuolas Krušna "] readme = "README.rst"