From a0b6df664da3f30abe82328f80c2c52a1c8f6108 Mon Sep 17 00:00:00 2001 From: Leif Walsh Date: Fri, 18 Oct 2019 21:42:10 -0400 Subject: [PATCH] replace assign with []= syntax to reduce copying pandas's df.assign(...) copies the entire dataframe, while using df[col] = ... syntax avoids copying everything. This reduces memory overhead significantly in calls to TimeSeriesDataFrame.toPandas(). --- python/ts/flint/dataframe.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python/ts/flint/dataframe.py b/python/ts/flint/dataframe.py index f1df361b..73ff019c 100644 --- a/python/ts/flint/dataframe.py +++ b/python/ts/flint/dataframe.py @@ -1616,7 +1616,7 @@ def toPandas(self): series = pd.to_datetime(pdf['time']) except: series = pdf['time'] - pdf = pdf.assign(time=series) + pdf['time'] = series if not self._is_sorted: pdf = pdf.sort_values(by='time').reset_index(drop=True) @@ -1642,6 +1642,7 @@ def preview(self, n=10): :param n: number of rows to return. Default is 10. """ df = pd.DataFrame(self.head(n), columns=self.columns) - return df.assign(**{self._time_column: pd.to_datetime(df[self._time_column])}) + df[self._time_column] = pd.to_datetime(df[self._time_column]) + return df TimeSeriesDataFrame._override_df_methods()