diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 2b35abd45a96f..38755aef32b85 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -999,6 +999,7 @@ Datetimelike - Bug in comparison between objects with pyarrow date dtype and ``timestamp[pyarrow]`` or ``np.datetime64`` dtype failing to consider these as non-comparable (:issue:`62157`) - Bug in constructing arrays with :class:`ArrowDtype` with ``timestamp`` type incorrectly allowing ``Decimal("NaN")`` (:issue:`61773`) - Bug in constructing arrays with a timezone-aware :class:`ArrowDtype` from timezone-naive datetime objects incorrectly treating those as UTC times instead of wall times like :class:`DatetimeTZDtype` (:issue:`61775`) +- Bug in retaining frequency in :meth:`value_counts` specifically for :meth:`DatetimeIndex` and :meth:`TimedeltaIndex` (:issue:`33830`) - Bug in setting scalar values with mismatched resolution into arrays with non-nanosecond ``datetime64``, ``timedelta64`` or :class:`DatetimeTZDtype` incorrectly truncating those scalars (:issue:`56410`) Timedelta diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 03127e58a9f41..b977e998b82a4 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -868,8 +868,10 @@ def value_counts_internal( dropna: bool = True, ) -> Series: from pandas import ( + DatetimeIndex, Index, Series, + TimedeltaIndex, ) index_name = getattr(values, "name", None) @@ -934,6 +936,17 @@ def value_counts_internal( # Starting in 3.0, we no longer perform dtype inference on the # Index object we construct here, xref GH#56161 idx = Index(keys, dtype=keys.dtype, name=index_name) + + if ( + bins is None + and not sort + and isinstance(values, (DatetimeIndex, TimedeltaIndex)) + and idx.equals(values) + and values.inferred_freq is not None + ): + # Preserve freq of original index + idx.freq = values.inferred_freq # type: ignore[attr-defined] + result = Series(counts, index=idx, name=name, copy=False) if sort: diff --git a/pandas/tests/base/test_value_counts.py b/pandas/tests/base/test_value_counts.py index bcb31829a201f..f642d26c32f5d 100644 --- a/pandas/tests/base/test_value_counts.py +++ b/pandas/tests/base/test_value_counts.py @@ -14,6 +14,7 @@ Series, Timedelta, TimedeltaIndex, + Timestamp, array, ) import pandas._testing as tm @@ -339,3 +340,81 @@ def test_value_counts_object_inference_deprecated(): exp = dti.value_counts() exp.index = exp.index.astype(object) tm.assert_series_equal(res, exp) + + +@pytest.mark.parametrize( + ("index", "expected_index"), + [ + [ + pd.date_range("2016-01-01", periods=5, freq="D"), + pd.date_range("2016-01-01", periods=5, freq="D"), + ], + [ + pd.timedelta_range(Timedelta(0), periods=5, freq="h"), + pd.timedelta_range(Timedelta(0), periods=5, freq="h"), + ], + [ + DatetimeIndex( + [Timestamp("2016-01-01") + Timedelta(days=i) for i in range(1)] + + [Timestamp("2016-01-02")] + + [Timestamp("2016-01-01") + Timedelta(days=i) for i in range(1, 5)] + ), + DatetimeIndex(pd.date_range("2016-01-01", periods=5, freq="D")), + ], + [ + TimedeltaIndex( + [Timedelta(hours=i) for i in range(1)] + + [Timedelta(hours=1)] + + [Timedelta(hours=i) for i in range(1, 5)], + ), + TimedeltaIndex(pd.timedelta_range(Timedelta(0), periods=5, freq="h")), + ], + [ + DatetimeIndex( + [Timestamp("2016-01-01") + Timedelta(days=i) for i in range(2)] + + [Timestamp("2016-01-01") + Timedelta(days=i) for i in range(3, 5)], + ), + DatetimeIndex( + [Timestamp("2016-01-01") + Timedelta(days=i) for i in range(2)] + + [Timestamp("2016-01-01") + Timedelta(days=i) for i in range(3, 5)], + ), + ], + [ + TimedeltaIndex( + [Timedelta(hours=i) for i in range(2)] + + [Timedelta(hours=i) for i in range(3, 5)], + ), + TimedeltaIndex( + [Timedelta(hours=i) for i in range(2)] + + [Timedelta(hours=i) for i in range(3, 5)], + ), + ], + [ + DatetimeIndex( + [Timestamp("2016-01-01")] + + [pd.NaT] + + [Timestamp("2016-01-01") + Timedelta(days=i) for i in range(1, 5)], + ), + DatetimeIndex( + [Timestamp("2016-01-01")] + + [pd.NaT] + + [Timestamp("2016-01-01") + Timedelta(days=i) for i in range(1, 5)], + ), + ], + [ + TimedeltaIndex( + [Timedelta(hours=0)] + + [pd.NaT] + + [Timedelta(hours=i) for i in range(1, 5)], + ), + TimedeltaIndex( + [Timedelta(hours=0)] + + [pd.NaT] + + [Timedelta(hours=i) for i in range(1, 5)], + ), + ], + ], +) +def test_value_counts_index_datetimelike(index, expected_index): + vc = index.value_counts(sort=False, dropna=False) + tm.assert_index_equal(vc.index, expected_index)