From 425643a0fe36e70345ac6c62a4d50ea86502a2c6 Mon Sep 17 00:00:00 2001 From: Ancy Augustin Date: Thu, 7 Nov 2024 14:55:57 +0530 Subject: [PATCH 01/12] feat: add pandas_utilis --- .../create_write_data_from_pandas.py | 23 ++ .../clients/dataframe/models/_data_frame.py | 15 +- .../dataframe/pandas_utils/__init__.py | 3 + .../clients/dataframe/pandas_utils/_helper.py | 128 +++++++++ .../_pandas_dataframe_operations.py | 84 ++++++ .../pandas_utils/_pandas_exception.py | 27 ++ poetry.lock | 254 +++++++++++++++++- pyproject.toml | 1 + 8 files changed, 527 insertions(+), 8 deletions(-) create mode 100644 examples/dataframe/create_write_data_from_pandas.py create mode 100644 nisystemlink/clients/dataframe/pandas_utils/__init__.py create mode 100644 nisystemlink/clients/dataframe/pandas_utils/_helper.py create mode 100644 nisystemlink/clients/dataframe/pandas_utils/_pandas_dataframe_operations.py create mode 100644 nisystemlink/clients/dataframe/pandas_utils/_pandas_exception.py diff --git a/examples/dataframe/create_write_data_from_pandas.py b/examples/dataframe/create_write_data_from_pandas.py new file mode 100644 index 00000000..be75dd46 --- /dev/null +++ b/examples/dataframe/create_write_data_from_pandas.py @@ -0,0 +1,23 @@ +import pandas as pd + +from nisystemlink.clients.dataframe.pandas_utils import ( + append_pandas_df, + create_table_from_pandas_df, +) +from nisystemlink.clients.dataframe.pandas_utils import ( + InvalidIndexError, + InvalidColumnTypeError, +) + +from nisystemlink.clients.dataframe import DataFrameClient + +client = DataFrameClient() + +# df = pd.read_csv("data/data.csv") + +# try: +# table_id = create_table_from_pandas_df(client, df, "Example Table") +# except InvalidColumnTypeError or InvalidIndexError as e: +# print(e) + +# append_pandas_df(table_id, df) diff --git a/nisystemlink/clients/dataframe/models/_data_frame.py b/nisystemlink/clients/dataframe/models/_data_frame.py index c34c41fc..3fbd22a6 100644 --- a/nisystemlink/clients/dataframe/models/_data_frame.py +++ b/nisystemlink/clients/dataframe/models/_data_frame.py @@ -1,5 +1,7 @@ from typing import List, Optional +import pandas as pd + from nisystemlink.clients.core._uplink._json_model import JsonModel @@ -52,6 +54,17 @@ class DataFrame(JsonModel): columns: Optional[List[str]] = None """The names and order of the columns included in the data frame.""" - data: List[List[Optional[str]]] + data: List[List[Optional[str]]] = None """The data for each row with the order specified in the columns property. Must contain a value for each column in the columns property.""" + + def from_pandas(self, df: pd.DataFrame): + self.columns = df.columns.tolist() + self.data = df.values.tolist() + + + def to_pandas(self, index: Optional[str]) -> pd.DataFrame: + df = pd.DataFrame(data=self.data, columns=self.columns) + if index: + df = df.set_index(index) + return df diff --git a/nisystemlink/clients/dataframe/pandas_utils/__init__.py b/nisystemlink/clients/dataframe/pandas_utils/__init__.py new file mode 100644 index 00000000..ce5f1ec6 --- /dev/null +++ b/nisystemlink/clients/dataframe/pandas_utils/__init__.py @@ -0,0 +1,3 @@ +from ._pandas_exception import InvalidColumnTypeError, InvalidIndexError +from ._helper import _process_column_data,_infer_dataframe_columns, _infer_index_column, _pandas_dtype_to_data_type +from ._pandas_dataframe_operations import create_table_from_pandas_df, query_decimated_pandas_df, append_pandas_df \ No newline at end of file diff --git a/nisystemlink/clients/dataframe/pandas_utils/_helper.py b/nisystemlink/clients/dataframe/pandas_utils/_helper.py new file mode 100644 index 00000000..abfc0b6e --- /dev/null +++ b/nisystemlink/clients/dataframe/pandas_utils/_helper.py @@ -0,0 +1,128 @@ +from typing import List, Optional, Union + +import pandas as pd + +from ._pandas_exception import ( + InvalidIndexError, + InvalidColumnTypeError, +) +from nisystemlink.clients.dataframe.models import ( + Column, + ColumnType, + DataType, +) + +UNSUPPORTED_INT_TYPES = ["int8", "int16"] +UNSUPPORTED_FLOAT_TYPES = ["float16"] +SUPPORTED_INDEX_TYPE = [DataType.Int32, DataType.Int64, DataType.Timestamp] +PANDAS_TO_DATATYPE_MAPPING = { + "bool": DataType.Bool, + "int32": DataType.Int32, + "int64": DataType.Int64, + "float32": DataType.Float32, + "float64": DataType.Float64, + "object": DataType.String, + "datetime64[ns]": DataType.Timestamp, +} + + +def _pandas_dtype_to_data_type(dtype: str) -> Optional[DataType]: + """Convert pandas data type to `DataType`. + + Args: + dtype (str): Pandas data type. + + Returns: + Optional[DataType]: `DataType`or `None` if match not found. + """ + if dtype in PANDAS_TO_DATATYPE_MAPPING: + return PANDAS_TO_DATATYPE_MAPPING[dtype] + return None + + +def _type_cast_column_datatype( + data: Union[pd.Index, pd.Series] +) -> Union[pd.Index, pd.Series]: + """Process data to convert to supported type if necessary. + + Args: + data (Union[pd.Index, pd.Series]): Data to be processed. + + Returns: + Union[pd.Index, pd.Series]: Processed data. + """ + if pd.api.types.is_unsigned_integer_dtype(data): + data = pd.to_numeric(data, downcast="integer") + pd_dtype = data.dtype + + if pd_dtype in UNSUPPORTED_INT_TYPES: + data = data.astype("int32") + + elif pd_dtype in UNSUPPORTED_FLOAT_TYPES: + data = data.astype("float32") + + return data + + +def _infer_index_column(self, df: pd.DataFrame) -> Column: + """Infer the index column for table creation. + + Args: + df (pd.DataFrame): Pandas Dataframe. + + Raises: + InvalidIndexError: If index column is invalid. + + Returns: + Column: Valid `Column` to the table. + """ + index = df.index.name + + if not index: + raise InvalidIndexError(index_name=index) + if ( + pd.api.types.is_any_real_numeric_dtype(df.index) + and pd_dtype not in PANDAS_TO_DATATYPE_MAPPING + ): + df.index = _type_cast_column_datatype(df.index) + pd_dtype = df.index.dtype + data_type = _pandas_dtype_to_data_type(pd_dtype) + if data_type not in SUPPORTED_INDEX_TYPE: + raise InvalidIndexError(index_name=index) + return Column(name=index, data_type=data_type, column_type=ColumnType.Index) + + +def _infer_dataframe_columns( + self, df: pd.DataFrame, nullable_columns: bool +) -> List[Column]: + """Infer the columns for table creation. + + Args: + df (pd.DataFrame): Pandas Dataframe. + nullable_columns (bool): Make the columns nullable. + + Raises: + InvalidColumnTypeError: If a column's type is invalid. + + Returns: + List[Column]: Columns to the table. + """ + columns = [] + + column_type = ColumnType.Nullable if nullable_columns else ColumnType.Normal + + for column_name in df.columns: + pd_dtype = df[column_name].dtype + if ( + pd.api.types.is_any_real_numeric_dtype(pd_dtype) + and pd_dtype not in PANDAS_TO_DATATYPE_MAPPING + ): + df[column_name] = _type_cast_column_datatype(df[column_name]) + pd_dtype = df[column_name].dtype + data_type = _pandas_dtype_to_data_type(pd_dtype) + if data_type is None: + raise InvalidColumnTypeError(column_name, pd_dtype) + columns.append( + Column(name=column_name, data_type=data_type, column_type=column_type) + ) + return columns diff --git a/nisystemlink/clients/dataframe/pandas_utils/_pandas_dataframe_operations.py b/nisystemlink/clients/dataframe/pandas_utils/_pandas_dataframe_operations.py new file mode 100644 index 00000000..0fecce9a --- /dev/null +++ b/nisystemlink/clients/dataframe/pandas_utils/_pandas_dataframe_operations.py @@ -0,0 +1,84 @@ +from typing import List, Optional, Union, Tuple + +import pandas as pd + +from ._helper import _infer_index_column, _infer_dataframe_columns +from nisystemlink.clients.dataframe import DataFrameClient +from nisystemlink.clients.dataframe.models import ( + AppendTableDataRequest, + ColumnType, + CreateTableRequest, + DataFrame, + QueryDecimatedDataRequest, +) +def create_table_from_pandas_df( + client: DataFrameClient, df: pd.DataFrame, table_name: str, nullable_columns: bool +) -> str: + """Create a table from a pandas DataFrame. + + Args: + client (DataFrameClient): Instance of DataFrameClient. + df (pd.DataFrame): Pandas dataframe. + table_name (str): Name of the table. + nullable_columns (bool): Make the columns nullable. + + Returns: + str: ID of the table. + """ + index = _infer_index_column(df) + table_columns = [index] + + dataframe_columns = _infer_dataframe_columns(df, nullable_columns) + table_columns += dataframe_columns + + table_id = client.create_table( + CreateTableRequest(name=table_name, columns=table_columns) + ) + return table_id + + +def append_pandas_df(client: DataFrameClient, table_id: str, df: pd.DataFrame) -> None: + """Append `df` to table. + + Args: + client: Instance of DataFrameClient. + table_id: ID of the table. + df: Pandas DataFrame containing the data to append. + + Returns: + None + """ + frame = DataFrame() + frame.from_pandas(df) + client.append_table_data( + table_id, data=AppendTableDataRequest(frame=frame, endOfData=True) + ) + + +def query_decimated_pandas_df( + client: DataFrameClient, + table_id: str, + request: QueryDecimatedDataRequest, + index: bool, +) -> pd.DataFrame: + """Query data from the table. + + Args: + client (DataFrameClient): Instance of DataFrameClient. + table_id (str): ID of the table. + request (QueryDecimatedDataRequest): Request to query decimated data. + + Returns: + pd.DataFrame: Data in pandas dataframe. + """ + index_name: str = None + if index: + columns = client.get_table_metadata(table_id).columns + for column in columns: + if column.column_type == ColumnType.Index: + index_name = column.name + if request: + request.columns.append(index_name) + break + response = client.query_decimated_data(table_id, request) + return response.frame.to_pandas(index_name) \ No newline at end of file diff --git a/nisystemlink/clients/dataframe/pandas_utils/_pandas_exception.py b/nisystemlink/clients/dataframe/pandas_utils/_pandas_exception.py new file mode 100644 index 00000000..f476f216 --- /dev/null +++ b/nisystemlink/clients/dataframe/pandas_utils/_pandas_exception.py @@ -0,0 +1,27 @@ +class DataFrameError(Exception): + """Base class for Dataframe errors.""" + + pass + + +class InvalidIndexError(DataFrameError): + """Raised when an invalid or missing index column is encountered.""" + + def __init__(self, index_name: str = None) -> None: + self.index_name = index_name + self.message = "Data frame must contain one index." + if index_name: + self.message = f"Column '{self.index_name}' must be of type INT32, INT64, or TIMESTAMP to be an index column." + super().__init__(self.message) + + +class InvalidColumnTypeError(DataFrameError): + """Raised when a column has an unsupported data type.""" + + def __init__(self, column_name: str, column_type: str) -> None: + self.column_name = column_name + self.column_type = column_type + self.message = ( + f"Column '{column_name}' has an unsupported datatype: {column_type}" + ) + super().__init__(self.message) diff --git a/poetry.lock b/poetry.lock index ce44e313..8d5d51b6 100644 --- a/poetry.lock +++ b/poetry.lock @@ -445,6 +445,124 @@ files = [ {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, ] +[[package]] +name = "numpy" +version = "2.0.2" +description = "Fundamental package for array computing in Python" +optional = false +python-versions = ">=3.9" +files = [ + {file = "numpy-2.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:51129a29dbe56f9ca83438b706e2e69a39892b5eda6cedcb6b0c9fdc9b0d3ece"}, + {file = "numpy-2.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f15975dfec0cf2239224d80e32c3170b1d168335eaedee69da84fbe9f1f9cd04"}, + {file = "numpy-2.0.2-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:8c5713284ce4e282544c68d1c3b2c7161d38c256d2eefc93c1d683cf47683e66"}, + {file = "numpy-2.0.2-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:becfae3ddd30736fe1889a37f1f580e245ba79a5855bff5f2a29cb3ccc22dd7b"}, + {file = "numpy-2.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2da5960c3cf0df7eafefd806d4e612c5e19358de82cb3c343631188991566ccd"}, + {file = "numpy-2.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:496f71341824ed9f3d2fd36cf3ac57ae2e0165c143b55c3a035ee219413f3318"}, + {file = "numpy-2.0.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a61ec659f68ae254e4d237816e33171497e978140353c0c2038d46e63282d0c8"}, + {file = "numpy-2.0.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:d731a1c6116ba289c1e9ee714b08a8ff882944d4ad631fd411106a30f083c326"}, + {file = "numpy-2.0.2-cp310-cp310-win32.whl", hash = "sha256:984d96121c9f9616cd33fbd0618b7f08e0cfc9600a7ee1d6fd9b239186d19d97"}, + {file = "numpy-2.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:c7b0be4ef08607dd04da4092faee0b86607f111d5ae68036f16cc787e250a131"}, + {file = "numpy-2.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:49ca4decb342d66018b01932139c0961a8f9ddc7589611158cb3c27cbcf76448"}, + {file = "numpy-2.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:11a76c372d1d37437857280aa142086476136a8c0f373b2e648ab2c8f18fb195"}, + {file = "numpy-2.0.2-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:807ec44583fd708a21d4a11d94aedf2f4f3c3719035c76a2bbe1fe8e217bdc57"}, + {file = "numpy-2.0.2-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:8cafab480740e22f8d833acefed5cc87ce276f4ece12fdaa2e8903db2f82897a"}, + {file = "numpy-2.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a15f476a45e6e5a3a79d8a14e62161d27ad897381fecfa4a09ed5322f2085669"}, + {file = "numpy-2.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:13e689d772146140a252c3a28501da66dfecd77490b498b168b501835041f951"}, + {file = "numpy-2.0.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:9ea91dfb7c3d1c56a0e55657c0afb38cf1eeae4544c208dc465c3c9f3a7c09f9"}, + {file = "numpy-2.0.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c1c9307701fec8f3f7a1e6711f9089c06e6284b3afbbcd259f7791282d660a15"}, + {file = "numpy-2.0.2-cp311-cp311-win32.whl", hash = "sha256:a392a68bd329eafac5817e5aefeb39038c48b671afd242710b451e76090e81f4"}, + {file = "numpy-2.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:286cd40ce2b7d652a6f22efdfc6d1edf879440e53e76a75955bc0c826c7e64dc"}, + {file = "numpy-2.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:df55d490dea7934f330006d0f81e8551ba6010a5bf035a249ef61a94f21c500b"}, + {file = "numpy-2.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8df823f570d9adf0978347d1f926b2a867d5608f434a7cff7f7908c6570dcf5e"}, + {file = "numpy-2.0.2-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:9a92ae5c14811e390f3767053ff54eaee3bf84576d99a2456391401323f4ec2c"}, + {file = "numpy-2.0.2-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:a842d573724391493a97a62ebbb8e731f8a5dcc5d285dfc99141ca15a3302d0c"}, + {file = "numpy-2.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c05e238064fc0610c840d1cf6a13bf63d7e391717d247f1bf0318172e759e692"}, + {file = "numpy-2.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0123ffdaa88fa4ab64835dcbde75dcdf89c453c922f18dced6e27c90d1d0ec5a"}, + {file = "numpy-2.0.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:96a55f64139912d61de9137f11bf39a55ec8faec288c75a54f93dfd39f7eb40c"}, + {file = "numpy-2.0.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ec9852fb39354b5a45a80bdab5ac02dd02b15f44b3804e9f00c556bf24b4bded"}, + {file = "numpy-2.0.2-cp312-cp312-win32.whl", hash = "sha256:671bec6496f83202ed2d3c8fdc486a8fc86942f2e69ff0e986140339a63bcbe5"}, + {file = "numpy-2.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:cfd41e13fdc257aa5778496b8caa5e856dc4896d4ccf01841daee1d96465467a"}, + {file = "numpy-2.0.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9059e10581ce4093f735ed23f3b9d283b9d517ff46009ddd485f1747eb22653c"}, + {file = "numpy-2.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:423e89b23490805d2a5a96fe40ec507407b8ee786d66f7328be214f9679df6dd"}, + {file = "numpy-2.0.2-cp39-cp39-macosx_14_0_arm64.whl", hash = "sha256:2b2955fa6f11907cf7a70dab0d0755159bca87755e831e47932367fc8f2f2d0b"}, + {file = "numpy-2.0.2-cp39-cp39-macosx_14_0_x86_64.whl", hash = "sha256:97032a27bd9d8988b9a97a8c4d2c9f2c15a81f61e2f21404d7e8ef00cb5be729"}, + {file = "numpy-2.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1e795a8be3ddbac43274f18588329c72939870a16cae810c2b73461c40718ab1"}, + {file = "numpy-2.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f26b258c385842546006213344c50655ff1555a9338e2e5e02a0756dc3e803dd"}, + {file = "numpy-2.0.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5fec9451a7789926bcf7c2b8d187292c9f93ea30284802a0ab3f5be8ab36865d"}, + {file = "numpy-2.0.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:9189427407d88ff25ecf8f12469d4d39d35bee1db5d39fc5c168c6f088a6956d"}, + {file = "numpy-2.0.2-cp39-cp39-win32.whl", hash = "sha256:905d16e0c60200656500c95b6b8dca5d109e23cb24abc701d41c02d74c6b3afa"}, + {file = "numpy-2.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:a3f4ab0caa7f053f6797fcd4e1e25caee367db3112ef2b6ef82d749530768c73"}, + {file = "numpy-2.0.2-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:7f0a0c6f12e07fa94133c8a67404322845220c06a9e80e85999afe727f7438b8"}, + {file = "numpy-2.0.2-pp39-pypy39_pp73-macosx_14_0_x86_64.whl", hash = "sha256:312950fdd060354350ed123c0e25a71327d3711584beaef30cdaa93320c392d4"}, + {file = "numpy-2.0.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:26df23238872200f63518dd2aa984cfca675d82469535dc7162dc2ee52d9dd5c"}, + {file = "numpy-2.0.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:a46288ec55ebbd58947d31d72be2c63cbf839f0a63b49cb755022310792a3385"}, + {file = "numpy-2.0.2.tar.gz", hash = "sha256:883c987dee1880e2a864ab0dc9892292582510604156762362d9326444636e78"}, +] + +[[package]] +name = "numpy" +version = "2.1.3" +description = "Fundamental package for array computing in Python" +optional = false +python-versions = ">=3.10" +files = [ + {file = "numpy-2.1.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c894b4305373b9c5576d7a12b473702afdf48ce5369c074ba304cc5ad8730dff"}, + {file = "numpy-2.1.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b47fbb433d3260adcd51eb54f92a2ffbc90a4595f8970ee00e064c644ac788f5"}, + {file = "numpy-2.1.3-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:825656d0743699c529c5943554d223c021ff0494ff1442152ce887ef4f7561a1"}, + {file = "numpy-2.1.3-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:6a4825252fcc430a182ac4dee5a505053d262c807f8a924603d411f6718b88fd"}, + {file = "numpy-2.1.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e711e02f49e176a01d0349d82cb5f05ba4db7d5e7e0defd026328e5cfb3226d3"}, + {file = "numpy-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:78574ac2d1a4a02421f25da9559850d59457bac82f2b8d7a44fe83a64f770098"}, + {file = "numpy-2.1.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c7662f0e3673fe4e832fe07b65c50342ea27d989f92c80355658c7f888fcc83c"}, + {file = "numpy-2.1.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:fa2d1337dc61c8dc417fbccf20f6d1e139896a30721b7f1e832b2bb6ef4eb6c4"}, + {file = "numpy-2.1.3-cp310-cp310-win32.whl", hash = "sha256:72dcc4a35a8515d83e76b58fdf8113a5c969ccd505c8a946759b24e3182d1f23"}, + {file = "numpy-2.1.3-cp310-cp310-win_amd64.whl", hash = "sha256:ecc76a9ba2911d8d37ac01de72834d8849e55473457558e12995f4cd53e778e0"}, + {file = "numpy-2.1.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4d1167c53b93f1f5d8a139a742b3c6f4d429b54e74e6b57d0eff40045187b15d"}, + {file = "numpy-2.1.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c80e4a09b3d95b4e1cac08643f1152fa71a0a821a2d4277334c88d54b2219a41"}, + {file = "numpy-2.1.3-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:576a1c1d25e9e02ed7fa5477f30a127fe56debd53b8d2c89d5578f9857d03ca9"}, + {file = "numpy-2.1.3-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:973faafebaae4c0aaa1a1ca1ce02434554d67e628b8d805e61f874b84e136b09"}, + {file = "numpy-2.1.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:762479be47a4863e261a840e8e01608d124ee1361e48b96916f38b119cfda04a"}, + {file = "numpy-2.1.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc6f24b3d1ecc1eebfbf5d6051faa49af40b03be1aaa781ebdadcbc090b4539b"}, + {file = "numpy-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:17ee83a1f4fef3c94d16dc1802b998668b5419362c8a4f4e8a491de1b41cc3ee"}, + {file = "numpy-2.1.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:15cb89f39fa6d0bdfb600ea24b250e5f1a3df23f901f51c8debaa6a5d122b2f0"}, + {file = "numpy-2.1.3-cp311-cp311-win32.whl", hash = "sha256:d9beb777a78c331580705326d2367488d5bc473b49a9bc3036c154832520aca9"}, + {file = "numpy-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:d89dd2b6da69c4fff5e39c28a382199ddedc3a5be5390115608345dec660b9e2"}, + {file = "numpy-2.1.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f55ba01150f52b1027829b50d70ef1dafd9821ea82905b63936668403c3b471e"}, + {file = "numpy-2.1.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:13138eadd4f4da03074851a698ffa7e405f41a0845a6b1ad135b81596e4e9958"}, + {file = "numpy-2.1.3-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:a6b46587b14b888e95e4a24d7b13ae91fa22386c199ee7b418f449032b2fa3b8"}, + {file = "numpy-2.1.3-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:0fa14563cc46422e99daef53d725d0c326e99e468a9320a240affffe87852564"}, + {file = "numpy-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8637dcd2caa676e475503d1f8fdb327bc495554e10838019651b76d17b98e512"}, + {file = "numpy-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2312b2aa89e1f43ecea6da6ea9a810d06aae08321609d8dc0d0eda6d946a541b"}, + {file = "numpy-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a38c19106902bb19351b83802531fea19dee18e5b37b36454f27f11ff956f7fc"}, + {file = "numpy-2.1.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:02135ade8b8a84011cbb67dc44e07c58f28575cf9ecf8ab304e51c05528c19f0"}, + {file = "numpy-2.1.3-cp312-cp312-win32.whl", hash = "sha256:e6988e90fcf617da2b5c78902fe8e668361b43b4fe26dbf2d7b0f8034d4cafb9"}, + {file = "numpy-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:0d30c543f02e84e92c4b1f415b7c6b5326cbe45ee7882b6b77db7195fb971e3a"}, + {file = "numpy-2.1.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:96fe52fcdb9345b7cd82ecd34547fca4321f7656d500eca497eb7ea5a926692f"}, + {file = "numpy-2.1.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f653490b33e9c3a4c1c01d41bc2aef08f9475af51146e4a7710c450cf9761598"}, + {file = "numpy-2.1.3-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:dc258a761a16daa791081d026f0ed4399b582712e6fc887a95af09df10c5ca57"}, + {file = "numpy-2.1.3-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:016d0f6f5e77b0f0d45d77387ffa4bb89816b57c835580c3ce8e099ef830befe"}, + {file = "numpy-2.1.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c181ba05ce8299c7aa3125c27b9c2167bca4a4445b7ce73d5febc411ca692e43"}, + {file = "numpy-2.1.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5641516794ca9e5f8a4d17bb45446998c6554704d888f86df9b200e66bdcce56"}, + {file = "numpy-2.1.3-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:ea4dedd6e394a9c180b33c2c872b92f7ce0f8e7ad93e9585312b0c5a04777a4a"}, + {file = "numpy-2.1.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:b0df3635b9c8ef48bd3be5f862cf71b0a4716fa0e702155c45067c6b711ddcef"}, + {file = "numpy-2.1.3-cp313-cp313-win32.whl", hash = "sha256:50ca6aba6e163363f132b5c101ba078b8cbd3fa92c7865fd7d4d62d9779ac29f"}, + {file = "numpy-2.1.3-cp313-cp313-win_amd64.whl", hash = "sha256:747641635d3d44bcb380d950679462fae44f54b131be347d5ec2bce47d3df9ed"}, + {file = "numpy-2.1.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:996bb9399059c5b82f76b53ff8bb686069c05acc94656bb259b1d63d04a9506f"}, + {file = "numpy-2.1.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:45966d859916ad02b779706bb43b954281db43e185015df6eb3323120188f9e4"}, + {file = "numpy-2.1.3-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:baed7e8d7481bfe0874b566850cb0b85243e982388b7b23348c6db2ee2b2ae8e"}, + {file = "numpy-2.1.3-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:a9f7f672a3388133335589cfca93ed468509cb7b93ba3105fce780d04a6576a0"}, + {file = "numpy-2.1.3-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7aac50327da5d208db2eec22eb11e491e3fe13d22653dce51b0f4109101b408"}, + {file = "numpy-2.1.3-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4394bc0dbd074b7f9b52024832d16e019decebf86caf909d94f6b3f77a8ee3b6"}, + {file = "numpy-2.1.3-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:50d18c4358a0a8a53f12a8ba9d772ab2d460321e6a93d6064fc22443d189853f"}, + {file = "numpy-2.1.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:14e253bd43fc6b37af4921b10f6add6925878a42a0c5fe83daee390bca80bc17"}, + {file = "numpy-2.1.3-cp313-cp313t-win32.whl", hash = "sha256:08788d27a5fd867a663f6fc753fd7c3ad7e92747efc73c53bca2f19f8bc06f48"}, + {file = "numpy-2.1.3-cp313-cp313t-win_amd64.whl", hash = "sha256:2564fbdf2b99b3f815f2107c1bbc93e2de8ee655a69c261363a1172a79a257d4"}, + {file = "numpy-2.1.3-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:4f2015dfe437dfebbfce7c85c7b53d81ba49e71ba7eadbf1df40c915af75979f"}, + {file = "numpy-2.1.3-pp310-pypy310_pp73-macosx_14_0_x86_64.whl", hash = "sha256:3522b0dfe983a575e6a9ab3a4a4dfe156c3e428468ff08ce582b9bb6bd1d71d4"}, + {file = "numpy-2.1.3-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c006b607a865b07cd981ccb218a04fc86b600411d83d6fc261357f1c0966755d"}, + {file = "numpy-2.1.3-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:e14e26956e6f1696070788252dcdff11b4aca4c3e8bd166e0df1bb8f315a67cb"}, + {file = "numpy-2.1.3.tar.gz", hash = "sha256:aa08e04e08aaf974d4458def539dece0d28146d866a39da5639596f4921fd761"}, +] + [[package]] name = "packaging" version = "24.1" @@ -456,6 +574,92 @@ files = [ {file = "packaging-24.1.tar.gz", hash = "sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002"}, ] +[[package]] +name = "pandas" +version = "2.2.3" +description = "Powerful data structures for data analysis, time series, and statistics" +optional = false +python-versions = ">=3.9" +files = [ + {file = "pandas-2.2.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1948ddde24197a0f7add2bdc4ca83bf2b1ef84a1bc8ccffd95eda17fd836ecb5"}, + {file = "pandas-2.2.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:381175499d3802cde0eabbaf6324cce0c4f5d52ca6f8c377c29ad442f50f6348"}, + {file = "pandas-2.2.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d9c45366def9a3dd85a6454c0e7908f2b3b8e9c138f5dc38fed7ce720d8453ed"}, + {file = "pandas-2.2.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:86976a1c5b25ae3f8ccae3a5306e443569ee3c3faf444dfd0f41cda24667ad57"}, + {file = "pandas-2.2.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:b8661b0238a69d7aafe156b7fa86c44b881387509653fdf857bebc5e4008ad42"}, + {file = "pandas-2.2.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:37e0aced3e8f539eccf2e099f65cdb9c8aa85109b0be6e93e2baff94264bdc6f"}, + {file = "pandas-2.2.3-cp310-cp310-win_amd64.whl", hash = "sha256:56534ce0746a58afaf7942ba4863e0ef81c9c50d3f0ae93e9497d6a41a057645"}, + {file = "pandas-2.2.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:66108071e1b935240e74525006034333f98bcdb87ea116de573a6a0dccb6c039"}, + {file = "pandas-2.2.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7c2875855b0ff77b2a64a0365e24455d9990730d6431b9e0ee18ad8acee13dbd"}, + {file = "pandas-2.2.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cd8d0c3be0515c12fed0bdbae072551c8b54b7192c7b1fda0ba56059a0179698"}, + {file = "pandas-2.2.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c124333816c3a9b03fbeef3a9f230ba9a737e9e5bb4060aa2107a86cc0a497fc"}, + {file = "pandas-2.2.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:63cc132e40a2e084cf01adf0775b15ac515ba905d7dcca47e9a251819c575ef3"}, + {file = "pandas-2.2.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:29401dbfa9ad77319367d36940cd8a0b3a11aba16063e39632d98b0e931ddf32"}, + {file = "pandas-2.2.3-cp311-cp311-win_amd64.whl", hash = "sha256:3fc6873a41186404dad67245896a6e440baacc92f5b716ccd1bc9ed2995ab2c5"}, + {file = "pandas-2.2.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b1d432e8d08679a40e2a6d8b2f9770a5c21793a6f9f47fdd52c5ce1948a5a8a9"}, + {file = "pandas-2.2.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a5a1595fe639f5988ba6a8e5bc9649af3baf26df3998a0abe56c02609392e0a4"}, + {file = "pandas-2.2.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5de54125a92bb4d1c051c0659e6fcb75256bf799a732a87184e5ea503965bce3"}, + {file = "pandas-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fffb8ae78d8af97f849404f21411c95062db1496aeb3e56f146f0355c9989319"}, + {file = "pandas-2.2.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6dfcb5ee8d4d50c06a51c2fffa6cff6272098ad6540aed1a76d15fb9318194d8"}, + {file = "pandas-2.2.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:062309c1b9ea12a50e8ce661145c6aab431b1e99530d3cd60640e255778bd43a"}, + {file = "pandas-2.2.3-cp312-cp312-win_amd64.whl", hash = "sha256:59ef3764d0fe818125a5097d2ae867ca3fa64df032331b7e0917cf5d7bf66b13"}, + {file = "pandas-2.2.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f00d1345d84d8c86a63e476bb4955e46458b304b9575dcf71102b5c705320015"}, + {file = "pandas-2.2.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3508d914817e153ad359d7e069d752cdd736a247c322d932eb89e6bc84217f28"}, + {file = "pandas-2.2.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:22a9d949bfc9a502d320aa04e5d02feab689d61da4e7764b62c30b991c42c5f0"}, + {file = "pandas-2.2.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3a255b2c19987fbbe62a9dfd6cff7ff2aa9ccab3fc75218fd4b7530f01efa24"}, + {file = "pandas-2.2.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:800250ecdadb6d9c78eae4990da62743b857b470883fa27f652db8bdde7f6659"}, + {file = "pandas-2.2.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6374c452ff3ec675a8f46fd9ab25c4ad0ba590b71cf0656f8b6daa5202bca3fb"}, + {file = "pandas-2.2.3-cp313-cp313-win_amd64.whl", hash = "sha256:61c5ad4043f791b61dd4752191d9f07f0ae412515d59ba8f005832a532f8736d"}, + {file = "pandas-2.2.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:3b71f27954685ee685317063bf13c7709a7ba74fc996b84fc6821c59b0f06468"}, + {file = "pandas-2.2.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:38cf8125c40dae9d5acc10fa66af8ea6fdf760b2714ee482ca691fc66e6fcb18"}, + {file = "pandas-2.2.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ba96630bc17c875161df3818780af30e43be9b166ce51c9a18c1feae342906c2"}, + {file = "pandas-2.2.3-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1db71525a1538b30142094edb9adc10be3f3e176748cd7acc2240c2f2e5aa3a4"}, + {file = "pandas-2.2.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:15c0e1e02e93116177d29ff83e8b1619c93ddc9c49083f237d4312337a61165d"}, + {file = "pandas-2.2.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ad5b65698ab28ed8d7f18790a0dc58005c7629f227be9ecc1072aa74c0c1d43a"}, + {file = "pandas-2.2.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bc6b93f9b966093cb0fd62ff1a7e4c09e6d546ad7c1de191767baffc57628f39"}, + {file = "pandas-2.2.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5dbca4c1acd72e8eeef4753eeca07de9b1db4f398669d5994086f788a5d7cc30"}, + {file = "pandas-2.2.3-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8cd6d7cc958a3910f934ea8dbdf17b2364827bb4dafc38ce6eef6bb3d65ff09c"}, + {file = "pandas-2.2.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:99df71520d25fade9db7c1076ac94eb994f4d2673ef2aa2e86ee039b6746d20c"}, + {file = "pandas-2.2.3-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:31d0ced62d4ea3e231a9f228366919a5ea0b07440d9d4dac345376fd8e1477ea"}, + {file = "pandas-2.2.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:7eee9e7cea6adf3e3d24e304ac6b8300646e2a5d1cd3a3c2abed9101b0846761"}, + {file = "pandas-2.2.3-cp39-cp39-win_amd64.whl", hash = "sha256:4850ba03528b6dd51d6c5d273c46f183f39a9baf3f0143e566b89450965b105e"}, + {file = "pandas-2.2.3.tar.gz", hash = "sha256:4f18ba62b61d7e192368b84517265a99b4d7ee8912f8708660fb4a366cc82667"}, +] + +[package.dependencies] +numpy = [ + {version = ">=1.22.4", markers = "python_version < \"3.11\""}, + {version = ">=1.23.2", markers = "python_version == \"3.11\""}, + {version = ">=1.26.0", markers = "python_version >= \"3.12\""}, +] +python-dateutil = ">=2.8.2" +pytz = ">=2020.1" +tzdata = ">=2022.7" + +[package.extras] +all = ["PyQt5 (>=5.15.9)", "SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-driver-sqlite (>=0.8.0)", "beautifulsoup4 (>=4.11.2)", "bottleneck (>=1.3.6)", "dataframe-api-compat (>=0.1.7)", "fastparquet (>=2022.12.0)", "fsspec (>=2022.11.0)", "gcsfs (>=2022.11.0)", "html5lib (>=1.1)", "hypothesis (>=6.46.1)", "jinja2 (>=3.1.2)", "lxml (>=4.9.2)", "matplotlib (>=3.6.3)", "numba (>=0.56.4)", "numexpr (>=2.8.4)", "odfpy (>=1.4.1)", "openpyxl (>=3.1.0)", "pandas-gbq (>=0.19.0)", "psycopg2 (>=2.9.6)", "pyarrow (>=10.0.1)", "pymysql (>=1.0.2)", "pyreadstat (>=1.2.0)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)", "python-calamine (>=0.1.7)", "pyxlsb (>=1.0.10)", "qtpy (>=2.3.0)", "s3fs (>=2022.11.0)", "scipy (>=1.10.0)", "tables (>=3.8.0)", "tabulate (>=0.9.0)", "xarray (>=2022.12.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.5)", "zstandard (>=0.19.0)"] +aws = ["s3fs (>=2022.11.0)"] +clipboard = ["PyQt5 (>=5.15.9)", "qtpy (>=2.3.0)"] +compression = ["zstandard (>=0.19.0)"] +computation = ["scipy (>=1.10.0)", "xarray (>=2022.12.0)"] +consortium-standard = ["dataframe-api-compat (>=0.1.7)"] +excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.1.0)", "python-calamine (>=0.1.7)", "pyxlsb (>=1.0.10)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.5)"] +feather = ["pyarrow (>=10.0.1)"] +fss = ["fsspec (>=2022.11.0)"] +gcp = ["gcsfs (>=2022.11.0)", "pandas-gbq (>=0.19.0)"] +hdf5 = ["tables (>=3.8.0)"] +html = ["beautifulsoup4 (>=4.11.2)", "html5lib (>=1.1)", "lxml (>=4.9.2)"] +mysql = ["SQLAlchemy (>=2.0.0)", "pymysql (>=1.0.2)"] +output-formatting = ["jinja2 (>=3.1.2)", "tabulate (>=0.9.0)"] +parquet = ["pyarrow (>=10.0.1)"] +performance = ["bottleneck (>=1.3.6)", "numba (>=0.56.4)", "numexpr (>=2.8.4)"] +plot = ["matplotlib (>=3.6.3)"] +postgresql = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "psycopg2 (>=2.9.6)"] +pyarrow = ["pyarrow (>=10.0.1)"] +spss = ["pyreadstat (>=1.2.0)"] +sql-other = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-driver-sqlite (>=0.8.0)"] +test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"] +xml = ["lxml (>=4.9.2)"] + [[package]] name = "pastel" version = "0.2.1" @@ -665,6 +869,31 @@ pytest = ">=6.1.0" docs = ["sphinx (>=5.3)", "sphinx-rtd-theme (>=1.0)"] testing = ["coverage (>=6.2)", "flaky (>=3.5.0)", "hypothesis (>=5.7.1)", "mypy (>=0.931)", "pytest-trio (>=0.7.0)"] +[[package]] +name = "python-dateutil" +version = "2.9.0.post0" +description = "Extensions to the standard Python datetime module" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +files = [ + {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"}, + {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"}, +] + +[package.dependencies] +six = ">=1.5" + +[[package]] +name = "pytz" +version = "2024.2" +description = "World timezone definitions, modern and historical" +optional = false +python-versions = "*" +files = [ + {file = "pytz-2024.2-py2.py3-none-any.whl", hash = "sha256:31c7c1817eb7fae7ca4b8c7ee50c72f93aa2dd863de768e1ef4245d426aa0725"}, + {file = "pytz-2024.2.tar.gz", hash = "sha256:2aa355083c50a0f93fa581709deac0c9ad65cca8a9e9beac660adcbd493c798a"}, +] + [[package]] name = "pyyaml" version = "6.0.2" @@ -787,23 +1016,23 @@ idna2008 = ["idna"] [[package]] name = "setuptools" -version = "75.2.0" +version = "75.3.0" description = "Easily download, build, install, upgrade, and uninstall Python packages" optional = false python-versions = ">=3.8" files = [ - {file = "setuptools-75.2.0-py3-none-any.whl", hash = "sha256:a7fcb66f68b4d9e8e66b42f9876150a3371558f98fa32222ffaa5bced76406f8"}, - {file = "setuptools-75.2.0.tar.gz", hash = "sha256:753bb6ebf1f465a1912e19ed1d41f403a79173a9acf66a42e7e6aec45c3c16ec"}, + {file = "setuptools-75.3.0-py3-none-any.whl", hash = "sha256:f2504966861356aa38616760c0f66568e535562374995367b4e69c7143cf6bcd"}, + {file = "setuptools-75.3.0.tar.gz", hash = "sha256:fba5dd4d766e97be1b1681d98712680ae8f2f26d7881245f2ce9e40714f1a686"}, ] [package.extras] check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)", "ruff (>=0.5.2)"] -core = ["importlib-metadata (>=6)", "importlib-resources (>=5.10.2)", "jaraco.collections", "jaraco.functools", "jaraco.text (>=3.7)", "more-itertools", "more-itertools (>=8.8)", "packaging", "packaging (>=24)", "platformdirs (>=2.6.2)", "tomli (>=2.0.1)", "wheel (>=0.43.0)"] +core = ["importlib-metadata (>=6)", "importlib-resources (>=5.10.2)", "jaraco.collections", "jaraco.functools", "jaraco.text (>=3.7)", "more-itertools", "more-itertools (>=8.8)", "packaging", "packaging (>=24)", "platformdirs (>=4.2.2)", "tomli (>=2.0.1)", "wheel (>=0.43.0)"] cover = ["pytest-cov"] doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier", "towncrier (<24.7)"] enabler = ["pytest-enabler (>=2.2)"] -test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"] -type = ["importlib-metadata (>=7.0.2)", "jaraco.develop (>=7.21)", "mypy (==1.11.*)", "pytest-mypy"] +test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test (>=5.5)", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"] +type = ["importlib-metadata (>=7.0.2)", "jaraco.develop (>=7.21)", "mypy (==1.12.*)", "pytest-mypy"] [[package]] name = "six" @@ -907,6 +1136,17 @@ files = [ {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"}, ] +[[package]] +name = "tzdata" +version = "2024.2" +description = "Provider of IANA time zone data" +optional = false +python-versions = ">=2" +files = [ + {file = "tzdata-2024.2-py2.py3-none-any.whl", hash = "sha256:a48093786cdcde33cad18c2555e8532f34422074448fbc874186f0abd79565cd"}, + {file = "tzdata-2024.2.tar.gz", hash = "sha256:7d85cc416e9382e69095b7bdf4afd9e3880418a2413feec7069d533d6b4e31cc"}, +] + [[package]] name = "uplink" version = "0.9.7" @@ -962,4 +1202,4 @@ zstd = ["zstandard (>=0.18.0)"] [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "3fc729acc4723542a580a33f13ee63b048dcc0942af38578e9df8db486773ac3" +content-hash = "4f7e1493ccefc7ab65c754dcad307b271eb64d2e33604eb1f9927f798b894fe0" diff --git a/pyproject.toml b/pyproject.toml index 34eac08d..474c249a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,6 +38,7 @@ requests = "^2.28.1" uplink = "^0.9.7" pydantic = "^1.10.2" pyyaml = "^6.0.1" +pandas = "^2.2.3" [tool.poetry.group.dev.dependencies] black = ">=22.10,<25.0" From 24ed4a6904771558ef1bf90eab004b2f7828e89a Mon Sep 17 00:00:00 2001 From: Ancy Augustin Date: Fri, 8 Nov 2024 14:32:29 +0530 Subject: [PATCH 02/12] feat: add paginated query logic --- .../create_write_data_from_pandas.py | 16 +-- .../clients/dataframe/models/_data_frame.py | 30 +++-- .../dataframe/pandas_utils/__init__.py | 3 - .../_pandas_dataframe_operations.py | 84 ------------ .../clients/dataframe/utilities/__init__.py | 7 + .../utilities/_pandas_dataframe_operations.py | 124 ++++++++++++++++++ .../_pandas_exception.py | 0 .../_helper.py => utilities/_pandas_utils.py} | 46 ++++--- 8 files changed, 190 insertions(+), 120 deletions(-) delete mode 100644 nisystemlink/clients/dataframe/pandas_utils/__init__.py delete mode 100644 nisystemlink/clients/dataframe/pandas_utils/_pandas_dataframe_operations.py create mode 100644 nisystemlink/clients/dataframe/utilities/__init__.py create mode 100644 nisystemlink/clients/dataframe/utilities/_pandas_dataframe_operations.py rename nisystemlink/clients/dataframe/{pandas_utils => utilities}/_pandas_exception.py (100%) rename nisystemlink/clients/dataframe/{pandas_utils/_helper.py => utilities/_pandas_utils.py} (73%) diff --git a/examples/dataframe/create_write_data_from_pandas.py b/examples/dataframe/create_write_data_from_pandas.py index be75dd46..7ffea81e 100644 --- a/examples/dataframe/create_write_data_from_pandas.py +++ b/examples/dataframe/create_write_data_from_pandas.py @@ -1,10 +1,10 @@ import pandas as pd -from nisystemlink.clients.dataframe.pandas_utils import ( +from nisystemlink.clients.dataframe.utilities import ( append_pandas_df, create_table_from_pandas_df, ) -from nisystemlink.clients.dataframe.pandas_utils import ( +from nisystemlink.clients.dataframe.utilities import ( InvalidIndexError, InvalidColumnTypeError, ) @@ -13,11 +13,11 @@ client = DataFrameClient() -# df = pd.read_csv("data/data.csv") +df = pd.DataFrame(data=[[1,2,3],[4,5,6],[7,8,9]], columns=["a","b","c"]) -# try: -# table_id = create_table_from_pandas_df(client, df, "Example Table") -# except InvalidColumnTypeError or InvalidIndexError as e: -# print(e) +try: + table_id = create_table_from_pandas_df(client, df, "Example Table") +except InvalidColumnTypeError or InvalidIndexError as e: + print(e) -# append_pandas_df(table_id, df) +append_pandas_df(table_id, df) diff --git a/nisystemlink/clients/dataframe/models/_data_frame.py b/nisystemlink/clients/dataframe/models/_data_frame.py index 3fbd22a6..8e158e6f 100644 --- a/nisystemlink/clients/dataframe/models/_data_frame.py +++ b/nisystemlink/clients/dataframe/models/_data_frame.py @@ -58,13 +58,25 @@ class DataFrame(JsonModel): """The data for each row with the order specified in the columns property. Must contain a value for each column in the columns property.""" - def from_pandas(self, df: pd.DataFrame): - self.columns = df.columns.tolist() - self.data = df.values.tolist() - + def from_pandas(self, df: pd.DataFrame) -> None: + """Convert pandas dataframe to `DataFrame`. - def to_pandas(self, index: Optional[str]) -> pd.DataFrame: - df = pd.DataFrame(data=self.data, columns=self.columns) - if index: - df = df.set_index(index) - return df + Args: + df (pd.DataFrame): Pandas dataframe. + """ + self.columns = df.columns.tolist() + self.data = df.values.tolist() + + def to_pandas(self, index: Optional[str] = None) -> pd.DataFrame: + """Convert `DataFrame` to pandas dataframe. + + Args: + index (Optional[str]): Column to set as index. + + Returns: + pd.DataFrame: Converted pandas dataframe. + """ + df = pd.DataFrame(data=self.data, columns=self.columns) + if index: + df.set_index(index, inplace=True) + return df diff --git a/nisystemlink/clients/dataframe/pandas_utils/__init__.py b/nisystemlink/clients/dataframe/pandas_utils/__init__.py deleted file mode 100644 index ce5f1ec6..00000000 --- a/nisystemlink/clients/dataframe/pandas_utils/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from ._pandas_exception import InvalidColumnTypeError, InvalidIndexError -from ._helper import _process_column_data,_infer_dataframe_columns, _infer_index_column, _pandas_dtype_to_data_type -from ._pandas_dataframe_operations import create_table_from_pandas_df, query_decimated_pandas_df, append_pandas_df \ No newline at end of file diff --git a/nisystemlink/clients/dataframe/pandas_utils/_pandas_dataframe_operations.py b/nisystemlink/clients/dataframe/pandas_utils/_pandas_dataframe_operations.py deleted file mode 100644 index 0fecce9a..00000000 --- a/nisystemlink/clients/dataframe/pandas_utils/_pandas_dataframe_operations.py +++ /dev/null @@ -1,84 +0,0 @@ -from typing import List, Optional, Union, Tuple - -import pandas as pd - -from ._helper import _infer_index_column, _infer_dataframe_columns -from nisystemlink.clients.dataframe import DataFrameClient -from nisystemlink.clients.dataframe.models import ( - AppendTableDataRequest, - ColumnType, - CreateTableRequest, - DataFrame, - QueryDecimatedDataRequest, -) -def create_table_from_pandas_df( - client: DataFrameClient, df: pd.DataFrame, table_name: str, nullable_columns: bool -) -> str: - """Create a table from a pandas DataFrame. - - Args: - client (DataFrameClient): Instance of DataFrameClient. - df (pd.DataFrame): Pandas dataframe. - table_name (str): Name of the table. - nullable_columns (bool): Make the columns nullable. - - Returns: - str: ID of the table. - """ - index = _infer_index_column(df) - table_columns = [index] - - dataframe_columns = _infer_dataframe_columns(df, nullable_columns) - table_columns += dataframe_columns - - table_id = client.create_table( - CreateTableRequest(name=table_name, columns=table_columns) - ) - return table_id - - -def append_pandas_df(client: DataFrameClient, table_id: str, df: pd.DataFrame) -> None: - """Append `df` to table. - - Args: - client: Instance of DataFrameClient. - table_id: ID of the table. - df: Pandas DataFrame containing the data to append. - - Returns: - None - """ - frame = DataFrame() - frame.from_pandas(df) - client.append_table_data( - table_id, data=AppendTableDataRequest(frame=frame, endOfData=True) - ) - - -def query_decimated_pandas_df( - client: DataFrameClient, - table_id: str, - request: QueryDecimatedDataRequest, - index: bool, -) -> pd.DataFrame: - """Query data from the table. - - Args: - client (DataFrameClient): Instance of DataFrameClient. - table_id (str): ID of the table. - request (QueryDecimatedDataRequest): Request to query decimated data. - - Returns: - pd.DataFrame: Data in pandas dataframe. - """ - index_name: str = None - if index: - columns = client.get_table_metadata(table_id).columns - for column in columns: - if column.column_type == ColumnType.Index: - index_name = column.name - if request: - request.columns.append(index_name) - break - response = client.query_decimated_data(table_id, request) - return response.frame.to_pandas(index_name) \ No newline at end of file diff --git a/nisystemlink/clients/dataframe/utilities/__init__.py b/nisystemlink/clients/dataframe/utilities/__init__.py new file mode 100644 index 00000000..46c742b2 --- /dev/null +++ b/nisystemlink/clients/dataframe/utilities/__init__.py @@ -0,0 +1,7 @@ +from ._pandas_exception import InvalidColumnTypeError, InvalidIndexError +from ._pandas_dataframe_operations import ( + create_table_from_pandas_df, + append_pandas_df_to_table, + query_decimated_table_data_as_pandas_df, + query_table_data_as_pandas_df +) diff --git a/nisystemlink/clients/dataframe/utilities/_pandas_dataframe_operations.py b/nisystemlink/clients/dataframe/utilities/_pandas_dataframe_operations.py new file mode 100644 index 00000000..cd77cff6 --- /dev/null +++ b/nisystemlink/clients/dataframe/utilities/_pandas_dataframe_operations.py @@ -0,0 +1,124 @@ +import pandas as pd + +from ._pandas_utils import ( + _infer_index_column, + _infer_dataframe_columns, + _get_table_index_name, +) +from nisystemlink.clients.dataframe import DataFrameClient +from nisystemlink.clients.dataframe.models import ( + AppendTableDataRequest, + CreateTableRequest, + DataFrame, + QueryTableDataRequest, + QueryDecimatedDataRequest, +) + + +def create_table_from_pandas_df( + client: DataFrameClient, df: pd.DataFrame, table_name: str, nullable_columns: bool +) -> str: + """Create a table from a pandas DataFrame. + + Args: + client (DataFrameClient): Instance of DataFrameClient. + df (pd.DataFrame): Pandas dataframe. + table_name (str): Name of the table. + nullable_columns (bool): Make the columns nullable. + + Returns: + str: ID of the table. + """ + index = _infer_index_column(df) + table_columns = [index] + + dataframe_columns = _infer_dataframe_columns(df, nullable_columns) + table_columns += dataframe_columns + + table_id = client.create_table( + CreateTableRequest(name=table_name, columns=table_columns) + ) + return table_id + + +def append_pandas_df_to_table(client: DataFrameClient, table_id: str, df: pd.DataFrame) -> None: + """Append `df` to table. + + Args: + client: Instance of `DataFrameClient`. + table_id: ID of the table. + df: Pandas DataFrame containing the data to append. + + Returns: + None + """ + frame = DataFrame() + frame.from_pandas(df) + client.append_table_data( + table_id, data=AppendTableDataRequest(frame=frame, endOfData=True) + ) + + +def query_decimated_table_data_as_pandas_df( + client: DataFrameClient, + table_id: str, + query: QueryDecimatedDataRequest, + index: bool, +) -> pd.DataFrame: + """Query data from the table. + + Args: + client (DataFrameClient): Instance of DataFrameClient. + table_id (str): ID of the table. + query (QueryDecimatedDataRequest): Request to query decimated data. + index (bool, optional): Whether index column to be included. + + Returns: + pd.DataFrame: Table data in pandas dataframe format. + """ + index_name: str = None + if index: + index_name = _get_table_index_name(client=client, table_id=table_id) + if query and (index_name not in query.columns): + query.columns.append(index_name) + response = client.query_decimated_data(table_id, query) + return response.frame.to_pandas(index_name) + + +def query_table_data_as_pandas_df( + client: DataFrameClient, + table_id: str, + query: QueryTableDataRequest, + index: bool = False, +) -> pd.DataFrame: + """Query data from the table. + + Args: + client (DataFrameClient): Instance of `DataFrameClient`. + table_id (str): ID of the table. + query (QueryTableDataRequest): Request to query data. + index (bool, optional): Whether index column to be included. + + Returns: + pd.DataFrame: Table data in pandas dataframe format. + """ + continuation_token = None + all_rows = [] + index_name: str = None + if index: + index_name = _get_table_index_name(client=client, table_id=table_id) + if query and (index_name not in query.columns): + query.columns.append(index_name) + + while True: + response = client.query_table_data(table_id, query) + + all_rows.append(response.frame.to_pandas(index_name)) + + continuation_token = response.continuation_token + if continuation_token: + query.continuation_token=continuation_token + else: + break + + return pd.concat(all_rows, ignore_index=not(index)) \ No newline at end of file diff --git a/nisystemlink/clients/dataframe/pandas_utils/_pandas_exception.py b/nisystemlink/clients/dataframe/utilities/_pandas_exception.py similarity index 100% rename from nisystemlink/clients/dataframe/pandas_utils/_pandas_exception.py rename to nisystemlink/clients/dataframe/utilities/_pandas_exception.py diff --git a/nisystemlink/clients/dataframe/pandas_utils/_helper.py b/nisystemlink/clients/dataframe/utilities/_pandas_utils.py similarity index 73% rename from nisystemlink/clients/dataframe/pandas_utils/_helper.py rename to nisystemlink/clients/dataframe/utilities/_pandas_utils.py index abfc0b6e..4ecd8b84 100644 --- a/nisystemlink/clients/dataframe/pandas_utils/_helper.py +++ b/nisystemlink/clients/dataframe/utilities/_pandas_utils.py @@ -2,20 +2,15 @@ import pandas as pd -from ._pandas_exception import ( - InvalidIndexError, - InvalidColumnTypeError, -) -from nisystemlink.clients.dataframe.models import ( - Column, - ColumnType, - DataType, -) +from nisystemlink.clients.dataframe import DataFrameClient +from nisystemlink.clients.dataframe.models import Column, ColumnType, DataType + +from ._pandas_exception import InvalidColumnTypeError, InvalidIndexError UNSUPPORTED_INT_TYPES = ["int8", "int16"] UNSUPPORTED_FLOAT_TYPES = ["float16"] SUPPORTED_INDEX_TYPE = [DataType.Int32, DataType.Int64, DataType.Timestamp] -PANDAS_TO_DATATYPE_MAPPING = { +SUPPORTED_DATATYPE_MAPPING = { "bool": DataType.Bool, "int32": DataType.Int32, "int64": DataType.Int64, @@ -35,8 +30,8 @@ def _pandas_dtype_to_data_type(dtype: str) -> Optional[DataType]: Returns: Optional[DataType]: `DataType`or `None` if match not found. """ - if dtype in PANDAS_TO_DATATYPE_MAPPING: - return PANDAS_TO_DATATYPE_MAPPING[dtype] + if dtype in SUPPORTED_DATATYPE_MAPPING: + return SUPPORTED_DATATYPE_MAPPING[dtype] return None @@ -71,7 +66,7 @@ def _infer_index_column(self, df: pd.DataFrame) -> Column: df (pd.DataFrame): Pandas Dataframe. Raises: - InvalidIndexError: If index column is invalid. + InvalidIndexError: If multiple index present or index is of unsupported type. Returns: Column: Valid `Column` to the table. @@ -80,12 +75,14 @@ def _infer_index_column(self, df: pd.DataFrame) -> Column: if not index: raise InvalidIndexError(index_name=index) + pd_dtype = df.index.dtype if ( pd.api.types.is_any_real_numeric_dtype(df.index) - and pd_dtype not in PANDAS_TO_DATATYPE_MAPPING + and pd_dtype not in SUPPORTED_DATATYPE_MAPPING ): df.index = _type_cast_column_datatype(df.index) pd_dtype = df.index.dtype + data_type = _pandas_dtype_to_data_type(pd_dtype) if data_type not in SUPPORTED_INDEX_TYPE: raise InvalidIndexError(index_name=index) @@ -102,7 +99,7 @@ def _infer_dataframe_columns( nullable_columns (bool): Make the columns nullable. Raises: - InvalidColumnTypeError: If a column's type is invalid. + InvalidColumnTypeError: If the column type is unsupported. Returns: List[Column]: Columns to the table. @@ -115,7 +112,7 @@ def _infer_dataframe_columns( pd_dtype = df[column_name].dtype if ( pd.api.types.is_any_real_numeric_dtype(pd_dtype) - and pd_dtype not in PANDAS_TO_DATATYPE_MAPPING + and pd_dtype not in SUPPORTED_DATATYPE_MAPPING ): df[column_name] = _type_cast_column_datatype(df[column_name]) pd_dtype = df[column_name].dtype @@ -126,3 +123,20 @@ def _infer_dataframe_columns( Column(name=column_name, data_type=data_type, column_type=column_type) ) return columns + + +def _get_table_index_name(client: DataFrameClient, table_id: str) -> str: + """Get the index name from the table columns. + + Args: + client (DataFrameClient): Instance of the `DataFrameclient`. + table_id (str): ID of the table. + + Returns: + str: Name of the index column + """ + columns = client.get_table_metadata(table_id).columns + for column in columns: + if column.column_type == ColumnType.Index: + return column.name + return None From 8606b508b32da41af79449812b744dec006aeeca Mon Sep 17 00:00:00 2001 From: Ancy Augustin Date: Mon, 11 Nov 2024 09:51:38 +0530 Subject: [PATCH 03/12] docs: add docstring for constants --- .../create_write_data_from_pandas.py | 23 --------- .../dataframe/pandas_dataframe_operations.py | 50 +++++++++++++++++++ .../utilities/_pandas_dataframe_operations.py | 4 +- .../dataframe/utilities/_pandas_utils.py | 40 ++++++++++----- 4 files changed, 80 insertions(+), 37 deletions(-) delete mode 100644 examples/dataframe/create_write_data_from_pandas.py create mode 100644 examples/dataframe/pandas_dataframe_operations.py diff --git a/examples/dataframe/create_write_data_from_pandas.py b/examples/dataframe/create_write_data_from_pandas.py deleted file mode 100644 index 7ffea81e..00000000 --- a/examples/dataframe/create_write_data_from_pandas.py +++ /dev/null @@ -1,23 +0,0 @@ -import pandas as pd - -from nisystemlink.clients.dataframe.utilities import ( - append_pandas_df, - create_table_from_pandas_df, -) -from nisystemlink.clients.dataframe.utilities import ( - InvalidIndexError, - InvalidColumnTypeError, -) - -from nisystemlink.clients.dataframe import DataFrameClient - -client = DataFrameClient() - -df = pd.DataFrame(data=[[1,2,3],[4,5,6],[7,8,9]], columns=["a","b","c"]) - -try: - table_id = create_table_from_pandas_df(client, df, "Example Table") -except InvalidColumnTypeError or InvalidIndexError as e: - print(e) - -append_pandas_df(table_id, df) diff --git a/examples/dataframe/pandas_dataframe_operations.py b/examples/dataframe/pandas_dataframe_operations.py new file mode 100644 index 00000000..cecf5013 --- /dev/null +++ b/examples/dataframe/pandas_dataframe_operations.py @@ -0,0 +1,50 @@ +import pandas as pd + +from nisystemlink.clients.dataframe import DataFrameClient +from nisystemlink.clients.dataframe.models import (DecimationMethod, + DecimationOptions, + QueryDecimatedDataRequest, + QueryTableDataRequest) +from nisystemlink.clients.dataframe.utilities import ( + InvalidColumnTypeError, InvalidIndexError, append_pandas_df_to_table, + create_table_from_pandas_df, query_decimated_table_data_as_pandas_df, query_table_data_as_pandas_df) + +client = DataFrameClient() + +df = pd.DataFrame(data=[[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["a", "b", "c"]) +df.set_index("a", inplace=True) + +try: + table_id = create_table_from_pandas_df( + client, df, "Example Table", nullable_columns=False + ) + print(f"Table created with ID: {table_id}") +except (InvalidColumnTypeError, InvalidIndexError) as e: + print(f"Error creating table: {e}") + +new_df = pd.DataFrame(data=[[9, 8, 7], [7, 8, 9]], columns=["a", "b", "c"]) + +append_pandas_df_to_table(client, table_id, new_df) +print("Data appended to the table.") + +request = QueryDecimatedDataRequest( + decimation=DecimationOptions( + x_column="b", + y_columns=["c"], + intervals=1, + method=DecimationMethod.MaxMin, + ) +) + +queried_decimated_df = query_decimated_table_data_as_pandas_df( + client, table_id, query=request, index=False +) +print("Queried decimated data as pandas dataframe:") +print(queried_decimated_df) + +query=QueryTableDataRequest(filters=["b","c"], take=3) +queried_df = query_table_data_as_pandas_df(client=client, table_id=table_id, query=query) +print("Queried table data as pandas dataframe:") +print(queried_df) + + diff --git a/nisystemlink/clients/dataframe/utilities/_pandas_dataframe_operations.py b/nisystemlink/clients/dataframe/utilities/_pandas_dataframe_operations.py index cd77cff6..aabc645f 100644 --- a/nisystemlink/clients/dataframe/utilities/_pandas_dataframe_operations.py +++ b/nisystemlink/clients/dataframe/utilities/_pandas_dataframe_operations.py @@ -105,6 +105,7 @@ def query_table_data_as_pandas_df( continuation_token = None all_rows = [] index_name: str = None + if index: index_name = _get_table_index_name(client=client, table_id=table_id) if query and (index_name not in query.columns): @@ -112,10 +113,9 @@ def query_table_data_as_pandas_df( while True: response = client.query_table_data(table_id, query) - all_rows.append(response.frame.to_pandas(index_name)) - continuation_token = response.continuation_token + if continuation_token: query.continuation_token=continuation_token else: diff --git a/nisystemlink/clients/dataframe/utilities/_pandas_utils.py b/nisystemlink/clients/dataframe/utilities/_pandas_utils.py index 4ecd8b84..150be36f 100644 --- a/nisystemlink/clients/dataframe/utilities/_pandas_utils.py +++ b/nisystemlink/clients/dataframe/utilities/_pandas_utils.py @@ -4,13 +4,20 @@ from nisystemlink.clients.dataframe import DataFrameClient from nisystemlink.clients.dataframe.models import Column, ColumnType, DataType - from ._pandas_exception import InvalidColumnTypeError, InvalidIndexError -UNSUPPORTED_INT_TYPES = ["int8", "int16"] -UNSUPPORTED_FLOAT_TYPES = ["float16"] -SUPPORTED_INDEX_TYPE = [DataType.Int32, DataType.Int64, DataType.Timestamp] -SUPPORTED_DATATYPE_MAPPING = { +UNSUPPORTED_PANDAS_INT_TYPES = ["int8", "int16"] +"""List of unsupported pandas integer types for conversion to `DataType`.""" + +UNSUPPORTED_PANDAS_FLOAT_TYPES = ["float16"] +"""List of unsupported pandas float types for conversion to `DataType`.""" + +SUPPORTED_INDEX_DATA_TYPE = [DataType.Int32, DataType.Int64, DataType.Timestamp] +"""List of supported index data types for table creation. + +Only these `DataType` values are allowed for the index column of the table. +""" +SUPPORTED_PANDAS_DATATYPE_MAPPING = { "bool": DataType.Bool, "int32": DataType.Int32, "int64": DataType.Int64, @@ -19,6 +26,10 @@ "object": DataType.String, "datetime64[ns]": DataType.Timestamp, } +"""Mapping of pandas data types to `DataType`. + +This dictionary maps commonly used pandas data types to the corresponding `DataType` used in table creation. +""" def _pandas_dtype_to_data_type(dtype: str) -> Optional[DataType]: @@ -30,8 +41,8 @@ def _pandas_dtype_to_data_type(dtype: str) -> Optional[DataType]: Returns: Optional[DataType]: `DataType`or `None` if match not found. """ - if dtype in SUPPORTED_DATATYPE_MAPPING: - return SUPPORTED_DATATYPE_MAPPING[dtype] + if dtype in SUPPORTED_PANDAS_DATATYPE_MAPPING: + return SUPPORTED_PANDAS_DATATYPE_MAPPING[dtype] return None @@ -50,10 +61,10 @@ def _type_cast_column_datatype( data = pd.to_numeric(data, downcast="integer") pd_dtype = data.dtype - if pd_dtype in UNSUPPORTED_INT_TYPES: + if pd_dtype in UNSUPPORTED_PANDAS_INT_TYPES: data = data.astype("int32") - elif pd_dtype in UNSUPPORTED_FLOAT_TYPES: + elif pd_dtype in UNSUPPORTED_PANDAS_FLOAT_TYPES: data = data.astype("float32") return data @@ -75,17 +86,20 @@ def _infer_index_column(self, df: pd.DataFrame) -> Column: if not index: raise InvalidIndexError(index_name=index) + pd_dtype = df.index.dtype if ( pd.api.types.is_any_real_numeric_dtype(df.index) - and pd_dtype not in SUPPORTED_DATATYPE_MAPPING + and pd_dtype not in SUPPORTED_PANDAS_DATATYPE_MAPPING ): df.index = _type_cast_column_datatype(df.index) pd_dtype = df.index.dtype data_type = _pandas_dtype_to_data_type(pd_dtype) - if data_type not in SUPPORTED_INDEX_TYPE: + + if data_type not in SUPPORTED_INDEX_DATA_TYPE: raise InvalidIndexError(index_name=index) + return Column(name=index, data_type=data_type, column_type=ColumnType.Index) @@ -112,13 +126,15 @@ def _infer_dataframe_columns( pd_dtype = df[column_name].dtype if ( pd.api.types.is_any_real_numeric_dtype(pd_dtype) - and pd_dtype not in SUPPORTED_DATATYPE_MAPPING + and pd_dtype not in SUPPORTED_PANDAS_DATATYPE_MAPPING ): df[column_name] = _type_cast_column_datatype(df[column_name]) pd_dtype = df[column_name].dtype + data_type = _pandas_dtype_to_data_type(pd_dtype) if data_type is None: raise InvalidColumnTypeError(column_name, pd_dtype) + columns.append( Column(name=column_name, data_type=data_type, column_type=column_type) ) From e46b26f852c37face6c2a6b2354c2ac2edcf16b0 Mon Sep 17 00:00:00 2001 From: Ancy Augustin Date: Mon, 11 Nov 2024 09:58:27 +0530 Subject: [PATCH 04/12] feat: add example --- nisystemlink/clients/dataframe/utilities/_pandas_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nisystemlink/clients/dataframe/utilities/_pandas_utils.py b/nisystemlink/clients/dataframe/utilities/_pandas_utils.py index 150be36f..27576613 100644 --- a/nisystemlink/clients/dataframe/utilities/_pandas_utils.py +++ b/nisystemlink/clients/dataframe/utilities/_pandas_utils.py @@ -113,7 +113,7 @@ def _infer_dataframe_columns( nullable_columns (bool): Make the columns nullable. Raises: - InvalidColumnTypeError: If the column type is unsupported. + InvalidColumnTypeError: If data type of the column is unsupported. Returns: List[Column]: Columns to the table. From bb05d3c0e4de363f11160ebfa1f985b26d270fc1 Mon Sep 17 00:00:00 2001 From: Ancy Augustin Date: Mon, 11 Nov 2024 19:50:35 +0530 Subject: [PATCH 05/12] feat: add test file --- .../dataframe/test_pandas_utility.py | 179 ++++++++++++++++++ 1 file changed, 179 insertions(+) create mode 100644 tests/integration/dataframe/test_pandas_utility.py diff --git a/tests/integration/dataframe/test_pandas_utility.py b/tests/integration/dataframe/test_pandas_utility.py new file mode 100644 index 00000000..15937d26 --- /dev/null +++ b/tests/integration/dataframe/test_pandas_utility.py @@ -0,0 +1,179 @@ +# -*- coding: utf-8 -*- +from typing import List, Optional + +import pandas as pd +import pytest # type: ignore + +from nisystemlink.clients.core import ApiException +from nisystemlink.clients.dataframe import DataFrameClient +from nisystemlink.clients.dataframe.models import ( + ColumnOrderBy, + ColumnType, + DecimationMethod, + DecimationOptions, + QueryDecimatedDataRequest, + QueryTableDataRequest, +) +from nisystemlink.clients.dataframe.utilities import ( + InvalidIndexError, + append_pandas_df_to_table, + create_table_from_pandas_df, + query_decimated_table_data_as_pandas_df, + query_table_data_as_pandas_df, +) + + +@pytest.fixture(scope="class") +def client(enterprise_config): + """Fixture to create a DataFrameClient instance.""" + return DataFrameClient(enterprise_config) + + +@pytest.fixture +def sample_dataframe(): + """Fixture for a sample pandas DataFrame.""" + + frame = pd.DataFrame( + columns=["index", "value", "ignore_me"], + data=[["1", "3.3", "True"], ["2", None, "False"], ["3", "1.1", "True"]], + ) + frame.set_index("index", inplace=True) + return frame + + +def test_create_table_from_pandas_df( + client: DataFrameClient, sample_dataframe: pd.DataFrame +): + table_name = "TestTable" + nullable_columns = True + table_id = create_table_from_pandas_df( + client, sample_dataframe, table_name, nullable_columns=nullable_columns + ) + index = None + table_columns = client.get_table_metadata(table_id).columns + for column in table_columns: + if column.column_type == ColumnType.Index: + index = column.name + break + + assert table_id is not None + assert index is not None + assert sample_dataframe.index == index + + +def test_append_data__works(client: DataFrameClient, sample_dataframe): + + id = create_table_from_pandas_df( + client, sample_dataframe, table_name="TestTable", nullable_columns=True + ) + + append_pandas_df_to_table(client, table_id=id, df=sample_dataframe) + + response = client.get_table_data(id) + + assert response.total_row_count == 3 + + +def test__write_invalid_data__raises(client: DataFrameClient, sample_dataframe): + id = create_table_from_pandas_df( + client, sample_dataframe, table_name="TestTable", nullable_columns=True + ) + + frame = pd.DataFrame( + columns=["index", "non_existent_column"], + data=[["1", "2"], ["2", "2"], ["3", "3"]], + ) + + with pytest.raises(ApiException, match="400 Bad Request"): + append_pandas_df_to_table(client, table_id=id, df=frame) + + +def test__create_table_with_missing_index__raises(client: DataFrameClient): + + frame = pd.DataFrame( + columns=["index", "value", "ignore_me"], + data=[["1", "3.3", "True"], ["2", None, "False"], ["3", "1.1", "True"]], + ) + + with pytest.raises( + InvalidIndexError, match="Data frame must contain one index." + ) as error: + id = create_table_from_pandas_df( + client, df=frame, table_name="TestTable", nullable_columns=True + ) + assert str(error.value) == "Data frame must contain one index." + + +def test__query_table_data__sorts(self, client: DataFrameClient, sample_dataframe): + table_name = "TestTable" + nullable_columns = True + id = create_table_from_pandas_df( + client, sample_dataframe, table_name, nullable_columns=nullable_columns + ) + + frame = pd.DataFrame( + data=[["1", "2.5", "True"], ["2", "1.5", "False"], ["3", "2.5", "True"]], + columns=["index", "value", "ignore_me"], + ) + + append_pandas_df_to_table(client, table_id=id, df=frame) + + response = query_table_data_as_pandas_df( + client, + table_id=id, + query=QueryTableDataRequest( + order_by=[ + ColumnOrderBy(column="value", descending=True), + ColumnOrderBy(column="ignore_me"), + ], + ), + index=True, + ) + expected_df = pd.DataFrame( + data=[["2", "1.5", "False"], ["3", "2.5", "True"], ["1", "2.5", "True"]], + columns=["index", "value", "ignore_me"], + ) + expected_df.set_index("index", inplace=True) + + assert response == expected_df + + +def test__query_decimated_data__works(client: DataFrameClient, create_table): + table_name = "TestTable" + nullable_columns = True + + frame = pd.DataFrame( + data=[ + ["1", "1.5", "3.5"], + ["2", "2.5", "2.5"], + ["3", "3.5", "1.5"], + ["4", "4.5", "4.5"], + ], + columns=["index", "col1", "col2"], + ) + frame.set_index("index", inplace=True) + id = create_table_from_pandas_df( + client, df=frame, table_name=table_name, nullable_columns=nullable_columns + ) + + append_pandas_df_to_table(client, table_id=id, df=frame) + + response = query_decimated_table_data_as_pandas_df( + client, + table_id=id, + query=QueryDecimatedDataRequest( + decimation=DecimationOptions( + x_column="index", + y_columns=["col1"], + intervals=1, + method=DecimationMethod.MaxMin, + ) + ), + index=True, + ) + + assert response == pd.DataFrame( + data=[["1", "1.5", "3.5"], ["4", "4.5", "4.5"]], + columns=frame.columns, + index=frame.index, + ) From e8cde8d3d82c2e5558a73d455562a63ecb415ba4 Mon Sep 17 00:00:00 2001 From: Ancy Augustin Date: Tue, 12 Nov 2024 09:29:41 +0530 Subject: [PATCH 06/12] style: format utilities --- .../dataframe/pandas_dataframe_operations.py | 27 ++++++++++++------- .../clients/dataframe/utilities/__init__.py | 2 +- .../utilities/_pandas_dataframe_operations.py | 12 +++++---- .../dataframe/utilities/_pandas_utils.py | 8 +++--- 4 files changed, 29 insertions(+), 20 deletions(-) diff --git a/examples/dataframe/pandas_dataframe_operations.py b/examples/dataframe/pandas_dataframe_operations.py index cecf5013..69c54d8b 100644 --- a/examples/dataframe/pandas_dataframe_operations.py +++ b/examples/dataframe/pandas_dataframe_operations.py @@ -1,13 +1,20 @@ import pandas as pd from nisystemlink.clients.dataframe import DataFrameClient -from nisystemlink.clients.dataframe.models import (DecimationMethod, - DecimationOptions, - QueryDecimatedDataRequest, - QueryTableDataRequest) +from nisystemlink.clients.dataframe.models import ( + DecimationMethod, + DecimationOptions, + QueryDecimatedDataRequest, + QueryTableDataRequest, +) from nisystemlink.clients.dataframe.utilities import ( - InvalidColumnTypeError, InvalidIndexError, append_pandas_df_to_table, - create_table_from_pandas_df, query_decimated_table_data_as_pandas_df, query_table_data_as_pandas_df) + InvalidColumnTypeError, + InvalidIndexError, + append_pandas_df_to_table, + create_table_from_pandas_df, + query_decimated_table_data_as_pandas_df, + query_table_data_as_pandas_df, +) client = DataFrameClient() @@ -42,9 +49,9 @@ print("Queried decimated data as pandas dataframe:") print(queried_decimated_df) -query=QueryTableDataRequest(filters=["b","c"], take=3) -queried_df = query_table_data_as_pandas_df(client=client, table_id=table_id, query=query) +query = QueryTableDataRequest(filters=["b", "c"], take=3) +queried_df = query_table_data_as_pandas_df( + client=client, table_id=table_id, query=query +) print("Queried table data as pandas dataframe:") print(queried_df) - - diff --git a/nisystemlink/clients/dataframe/utilities/__init__.py b/nisystemlink/clients/dataframe/utilities/__init__.py index 46c742b2..6189138f 100644 --- a/nisystemlink/clients/dataframe/utilities/__init__.py +++ b/nisystemlink/clients/dataframe/utilities/__init__.py @@ -3,5 +3,5 @@ create_table_from_pandas_df, append_pandas_df_to_table, query_decimated_table_data_as_pandas_df, - query_table_data_as_pandas_df + query_table_data_as_pandas_df, ) diff --git a/nisystemlink/clients/dataframe/utilities/_pandas_dataframe_operations.py b/nisystemlink/clients/dataframe/utilities/_pandas_dataframe_operations.py index aabc645f..65c0df17 100644 --- a/nisystemlink/clients/dataframe/utilities/_pandas_dataframe_operations.py +++ b/nisystemlink/clients/dataframe/utilities/_pandas_dataframe_operations.py @@ -41,7 +41,9 @@ def create_table_from_pandas_df( return table_id -def append_pandas_df_to_table(client: DataFrameClient, table_id: str, df: pd.DataFrame) -> None: +def append_pandas_df_to_table( + client: DataFrameClient, table_id: str, df: pd.DataFrame +) -> None: """Append `df` to table. Args: @@ -115,10 +117,10 @@ def query_table_data_as_pandas_df( response = client.query_table_data(table_id, query) all_rows.append(response.frame.to_pandas(index_name)) continuation_token = response.continuation_token - + if continuation_token: - query.continuation_token=continuation_token + query.continuation_token = continuation_token else: break - - return pd.concat(all_rows, ignore_index=not(index)) \ No newline at end of file + + return pd.concat(all_rows, ignore_index=not (index)) diff --git a/nisystemlink/clients/dataframe/utilities/_pandas_utils.py b/nisystemlink/clients/dataframe/utilities/_pandas_utils.py index 27576613..40bfeee8 100644 --- a/nisystemlink/clients/dataframe/utilities/_pandas_utils.py +++ b/nisystemlink/clients/dataframe/utilities/_pandas_utils.py @@ -86,7 +86,7 @@ def _infer_index_column(self, df: pd.DataFrame) -> Column: if not index: raise InvalidIndexError(index_name=index) - + pd_dtype = df.index.dtype if ( pd.api.types.is_any_real_numeric_dtype(df.index) @@ -94,12 +94,12 @@ def _infer_index_column(self, df: pd.DataFrame) -> Column: ): df.index = _type_cast_column_datatype(df.index) pd_dtype = df.index.dtype - + data_type = _pandas_dtype_to_data_type(pd_dtype) if data_type not in SUPPORTED_INDEX_DATA_TYPE: raise InvalidIndexError(index_name=index) - + return Column(name=index, data_type=data_type, column_type=ColumnType.Index) @@ -134,7 +134,7 @@ def _infer_dataframe_columns( data_type = _pandas_dtype_to_data_type(pd_dtype) if data_type is None: raise InvalidColumnTypeError(column_name, pd_dtype) - + columns.append( Column(name=column_name, data_type=data_type, column_type=column_type) ) From 2529a9bf144ce661d16e4e57072d12ddf14c51dd Mon Sep 17 00:00:00 2001 From: Ancy Augustin Date: Tue, 12 Nov 2024 18:11:21 +0530 Subject: [PATCH 07/12] refactor: restructure dataframe conversion --- .../dataframe/pandas_dataframe_operations.py | 24 +- .../clients/dataframe/models/_data_frame.py | 7 +- .../clients/dataframe/utilities/__init__.py | 2 + .../utilities/_pandas_dataframe_operations.py | 26 +- .../dataframe/utilities/_pandas_exception.py | 8 +- .../dataframe/utilities/_pandas_utils.py | 17 +- .../dataframe/test_pandas_utility.py | 258 ++++++++++-------- 7 files changed, 181 insertions(+), 161 deletions(-) diff --git a/examples/dataframe/pandas_dataframe_operations.py b/examples/dataframe/pandas_dataframe_operations.py index 69c54d8b..58e606e4 100644 --- a/examples/dataframe/pandas_dataframe_operations.py +++ b/examples/dataframe/pandas_dataframe_operations.py @@ -1,5 +1,4 @@ import pandas as pd - from nisystemlink.clients.dataframe import DataFrameClient from nisystemlink.clients.dataframe.models import ( DecimationMethod, @@ -8,17 +7,18 @@ QueryTableDataRequest, ) from nisystemlink.clients.dataframe.utilities import ( - InvalidColumnTypeError, - InvalidIndexError, append_pandas_df_to_table, create_table_from_pandas_df, + InvalidColumnTypeError, + InvalidIndexError, query_decimated_table_data_as_pandas_df, query_table_data_as_pandas_df, ) client = DataFrameClient() - -df = pd.DataFrame(data=[[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["a", "b", "c"]) +df: pd.DataFrame = pd.DataFrame( + data=[[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["a", "b", "c"] +) df.set_index("a", inplace=True) try: @@ -29,29 +29,27 @@ except (InvalidColumnTypeError, InvalidIndexError) as e: print(f"Error creating table: {e}") -new_df = pd.DataFrame(data=[[9, 8, 7], [7, 8, 9]], columns=["a", "b", "c"]) - -append_pandas_df_to_table(client, table_id, new_df) +append_pandas_df_to_table(client, table_id, df) print("Data appended to the table.") request = QueryDecimatedDataRequest( decimation=DecimationOptions( - x_column="b", - y_columns=["c"], + x_column="a", + y_columns=["b"], intervals=1, method=DecimationMethod.MaxMin, ) ) queried_decimated_df = query_decimated_table_data_as_pandas_df( - client, table_id, query=request, index=False + client, table_id, query=request, index=True ) print("Queried decimated data as pandas dataframe:") print(queried_decimated_df) -query = QueryTableDataRequest(filters=["b", "c"], take=3) +query = QueryTableDataRequest() queried_df = query_table_data_as_pandas_df( - client=client, table_id=table_id, query=query + client=client, table_id=table_id, query=query, index=True ) print("Queried table data as pandas dataframe:") print(queried_df) diff --git a/nisystemlink/clients/dataframe/models/_data_frame.py b/nisystemlink/clients/dataframe/models/_data_frame.py index 8e158e6f..a257fa26 100644 --- a/nisystemlink/clients/dataframe/models/_data_frame.py +++ b/nisystemlink/clients/dataframe/models/_data_frame.py @@ -1,7 +1,6 @@ from typing import List, Optional import pandas as pd - from nisystemlink.clients.core._uplink._json_model import JsonModel @@ -64,8 +63,10 @@ def from_pandas(self, df: pd.DataFrame) -> None: Args: df (pd.DataFrame): Pandas dataframe. """ - self.columns = df.columns.tolist() - self.data = df.values.tolist() + self.columns = [df.index.name] + df.columns.astype(str).tolist() + self.data = [ + [str(index)] + row.astype(str).tolist() for index, row in df.iterrows() + ] def to_pandas(self, index: Optional[str] = None) -> pd.DataFrame: """Convert `DataFrame` to pandas dataframe. diff --git a/nisystemlink/clients/dataframe/utilities/__init__.py b/nisystemlink/clients/dataframe/utilities/__init__.py index 6189138f..bcf41885 100644 --- a/nisystemlink/clients/dataframe/utilities/__init__.py +++ b/nisystemlink/clients/dataframe/utilities/__init__.py @@ -5,3 +5,5 @@ query_decimated_table_data_as_pandas_df, query_table_data_as_pandas_df, ) + +# flake8: noqa diff --git a/nisystemlink/clients/dataframe/utilities/_pandas_dataframe_operations.py b/nisystemlink/clients/dataframe/utilities/_pandas_dataframe_operations.py index 65c0df17..3aa579ba 100644 --- a/nisystemlink/clients/dataframe/utilities/_pandas_dataframe_operations.py +++ b/nisystemlink/clients/dataframe/utilities/_pandas_dataframe_operations.py @@ -1,17 +1,17 @@ import pandas as pd - -from ._pandas_utils import ( - _infer_index_column, - _infer_dataframe_columns, - _get_table_index_name, -) from nisystemlink.clients.dataframe import DataFrameClient from nisystemlink.clients.dataframe.models import ( AppendTableDataRequest, CreateTableRequest, DataFrame, - QueryTableDataRequest, QueryDecimatedDataRequest, + QueryTableDataRequest, +) + +from ._pandas_utils import ( + _get_table_index_name, + _infer_dataframe_columns, + _infer_index_column, ) @@ -57,7 +57,7 @@ def append_pandas_df_to_table( frame = DataFrame() frame.from_pandas(df) client.append_table_data( - table_id, data=AppendTableDataRequest(frame=frame, endOfData=True) + id=table_id, data=AppendTableDataRequest(frame=frame, end_of_data=False) ) @@ -81,8 +81,9 @@ def query_decimated_table_data_as_pandas_df( index_name: str = None if index: index_name = _get_table_index_name(client=client, table_id=table_id) - if query and (index_name not in query.columns): - query.columns.append(index_name) + if query.columns: + if index_name not in query.columns: + query.columns.append(index_name) response = client.query_decimated_data(table_id, query) return response.frame.to_pandas(index_name) @@ -110,8 +111,9 @@ def query_table_data_as_pandas_df( if index: index_name = _get_table_index_name(client=client, table_id=table_id) - if query and (index_name not in query.columns): - query.columns.append(index_name) + if query.columns: + if index_name not in query.columns: + query.columns.append(index_name) while True: response = client.query_table_data(table_id, query) diff --git a/nisystemlink/clients/dataframe/utilities/_pandas_exception.py b/nisystemlink/clients/dataframe/utilities/_pandas_exception.py index f476f216..90a8cb31 100644 --- a/nisystemlink/clients/dataframe/utilities/_pandas_exception.py +++ b/nisystemlink/clients/dataframe/utilities/_pandas_exception.py @@ -11,7 +11,9 @@ def __init__(self, index_name: str = None) -> None: self.index_name = index_name self.message = "Data frame must contain one index." if index_name: - self.message = f"Column '{self.index_name}' must be of type INT32, INT64, or TIMESTAMP to be an index column." + self.message = ( + f"Column '{self.index_name}' must be of type INT32, INT64, or TIMESTAMP to be an index column." + ) super().__init__(self.message) @@ -21,7 +23,5 @@ class InvalidColumnTypeError(DataFrameError): def __init__(self, column_name: str, column_type: str) -> None: self.column_name = column_name self.column_type = column_type - self.message = ( - f"Column '{column_name}' has an unsupported datatype: {column_type}" - ) + self.message = f"Column '{column_name}' has an unsupported datatype: {column_type}" super().__init__(self.message) diff --git a/nisystemlink/clients/dataframe/utilities/_pandas_utils.py b/nisystemlink/clients/dataframe/utilities/_pandas_utils.py index 40bfeee8..70bb1044 100644 --- a/nisystemlink/clients/dataframe/utilities/_pandas_utils.py +++ b/nisystemlink/clients/dataframe/utilities/_pandas_utils.py @@ -1,9 +1,9 @@ from typing import List, Optional, Union import pandas as pd - from nisystemlink.clients.dataframe import DataFrameClient from nisystemlink.clients.dataframe.models import Column, ColumnType, DataType + from ._pandas_exception import InvalidColumnTypeError, InvalidIndexError UNSUPPORTED_PANDAS_INT_TYPES = ["int8", "int16"] @@ -57,6 +57,7 @@ def _type_cast_column_datatype( Returns: Union[pd.Index, pd.Series]: Processed data. """ + pd_dtype = data.dtype if pd.api.types.is_unsigned_integer_dtype(data): data = pd.to_numeric(data, downcast="integer") pd_dtype = data.dtype @@ -70,7 +71,7 @@ def _type_cast_column_datatype( return data -def _infer_index_column(self, df: pd.DataFrame) -> Column: +def _infer_index_column(df: pd.DataFrame) -> Column: """Infer the index column for table creation. Args: @@ -87,13 +88,13 @@ def _infer_index_column(self, df: pd.DataFrame) -> Column: if not index: raise InvalidIndexError(index_name=index) - pd_dtype = df.index.dtype + pd_dtype = str(df.index.dtype) if ( pd.api.types.is_any_real_numeric_dtype(df.index) and pd_dtype not in SUPPORTED_PANDAS_DATATYPE_MAPPING ): df.index = _type_cast_column_datatype(df.index) - pd_dtype = df.index.dtype + pd_dtype = str(df.index.dtype) data_type = _pandas_dtype_to_data_type(pd_dtype) @@ -103,9 +104,7 @@ def _infer_index_column(self, df: pd.DataFrame) -> Column: return Column(name=index, data_type=data_type, column_type=ColumnType.Index) -def _infer_dataframe_columns( - self, df: pd.DataFrame, nullable_columns: bool -) -> List[Column]: +def _infer_dataframe_columns(df: pd.DataFrame, nullable_columns: bool) -> List[Column]: """Infer the columns for table creation. Args: @@ -123,13 +122,13 @@ def _infer_dataframe_columns( column_type = ColumnType.Nullable if nullable_columns else ColumnType.Normal for column_name in df.columns: - pd_dtype = df[column_name].dtype + pd_dtype = str(df[column_name].dtype) if ( pd.api.types.is_any_real_numeric_dtype(pd_dtype) and pd_dtype not in SUPPORTED_PANDAS_DATATYPE_MAPPING ): df[column_name] = _type_cast_column_datatype(df[column_name]) - pd_dtype = df[column_name].dtype + pd_dtype = str(df[column_name].dtype) data_type = _pandas_dtype_to_data_type(pd_dtype) if data_type is None: diff --git a/tests/integration/dataframe/test_pandas_utility.py b/tests/integration/dataframe/test_pandas_utility.py index 15937d26..76fc4f84 100644 --- a/tests/integration/dataframe/test_pandas_utility.py +++ b/tests/integration/dataframe/test_pandas_utility.py @@ -1,9 +1,5 @@ -# -*- coding: utf-8 -*- -from typing import List, Optional - import pandas as pd import pytest # type: ignore - from nisystemlink.clients.core import ApiException from nisystemlink.clients.dataframe import DataFrameClient from nisystemlink.clients.dataframe.models import ( @@ -15,9 +11,9 @@ QueryTableDataRequest, ) from nisystemlink.clients.dataframe.utilities import ( - InvalidIndexError, append_pandas_df_to_table, create_table_from_pandas_df, + InvalidIndexError, query_decimated_table_data_as_pandas_df, query_table_data_as_pandas_df, ) @@ -29,151 +25,173 @@ def client(enterprise_config): return DataFrameClient(enterprise_config) -@pytest.fixture +@pytest.fixture(scope="class") def sample_dataframe(): """Fixture for a sample pandas DataFrame.""" - frame = pd.DataFrame( columns=["index", "value", "ignore_me"], - data=[["1", "3.3", "True"], ["2", None, "False"], ["3", "1.1", "True"]], + data=[[1, "3.3", "True"], [2, "6", "False"], [3, "1.1", "True"]], ) frame.set_index("index", inplace=True) return frame -def test_create_table_from_pandas_df( - client: DataFrameClient, sample_dataframe: pd.DataFrame -): - table_name = "TestTable" - nullable_columns = True - table_id = create_table_from_pandas_df( - client, sample_dataframe, table_name, nullable_columns=nullable_columns - ) - index = None - table_columns = client.get_table_metadata(table_id).columns - for column in table_columns: - if column.column_type == ColumnType.Index: - index = column.name - break - - assert table_id is not None - assert index is not None - assert sample_dataframe.index == index +@pytest.fixture(scope="class") +def create_table(client: DataFrameClient): + """Fixture to create and delete tables in the test class.""" + tables = [] # List to keep track of created table IDs + + def _create_table(df: pd.DataFrame, table_name: str, nullable_columns: bool) -> str: + """Factory method to create tables and add to the tables list.""" + table_id = create_table_from_pandas_df( + client, df=df, table_name=table_name, nullable_columns=nullable_columns + ) + tables.append(table_id) + return table_id + + yield _create_table + + if tables: + client.delete_tables(tables) + + +@pytest.mark.enterprise +@pytest.mark.integration +class TestPandasUtility: + def test_create_table_from_pandas_df( + self, client: DataFrameClient, sample_dataframe: pd.DataFrame, create_table + ): + table_name = "TestTable" + nullable_columns = True + table_id = create_table( + df=sample_dataframe, + table_name=table_name, + nullable_columns=nullable_columns, + ) + index = None + table_columns = client.get_table_metadata(table_id).columns + for column in table_columns: + if column.column_type == ColumnType.Index: + index = column.name + break -def test_append_data__works(client: DataFrameClient, sample_dataframe): + assert table_id is not None + assert index is not None + assert index == "index" - id = create_table_from_pandas_df( - client, sample_dataframe, table_name="TestTable", nullable_columns=True - ) + def test__append_data__works( + self, client: DataFrameClient, sample_dataframe, create_table + ): - append_pandas_df_to_table(client, table_id=id, df=sample_dataframe) + id = create_table( + df=sample_dataframe, table_name="Test", nullable_columns=False + ) - response = client.get_table_data(id) + append_pandas_df_to_table(client=client, table_id=id, df=sample_dataframe) - assert response.total_row_count == 3 + response = client.get_table_data(id) + assert response.total_row_count == 3 -def test__write_invalid_data__raises(client: DataFrameClient, sample_dataframe): - id = create_table_from_pandas_df( - client, sample_dataframe, table_name="TestTable", nullable_columns=True - ) + def test__write_invalid_data__raises( + client: DataFrameClient, sample_dataframe, create_table + ): + id = create_table( + df=sample_dataframe, table_name="TestTable", nullable_columns=True + ) - frame = pd.DataFrame( - columns=["index", "non_existent_column"], - data=[["1", "2"], ["2", "2"], ["3", "3"]], - ) + frame = pd.DataFrame( + columns=["index", "non_existent_column"], + data=[["1", "2"], ["2", "2"], ["3", "3"]], + ) - with pytest.raises(ApiException, match="400 Bad Request"): - append_pandas_df_to_table(client, table_id=id, df=frame) + with pytest.raises(ApiException, match="400 Bad Request"): + append_pandas_df_to_table(client=client, table_id=id, df=frame) + def test__create_table_with_missing_index__raises( + self, client: DataFrameClient, create_table + ): -def test__create_table_with_missing_index__raises(client: DataFrameClient): + frame = pd.DataFrame( + columns=["index", "value", "ignore_me"], + data=[["1", "3.3", "True"], ["2", "6", "False"], ["3", "1.1", "True"]], + ) - frame = pd.DataFrame( - columns=["index", "value", "ignore_me"], - data=[["1", "3.3", "True"], ["2", None, "False"], ["3", "1.1", "True"]], - ) + with pytest.raises( + InvalidIndexError, match="Data frame must contain one index." + ): + create_table(df=frame, table_name="TestTable", nullable_columns=True) + + def test__query_table_data__sorts( + self, client: DataFrameClient, sample_dataframe, create_table + ): + table_name = "TestTable" + nullable_columns = True + id = create_table( + sample_dataframe, table_name, nullable_columns=nullable_columns + ) - with pytest.raises( - InvalidIndexError, match="Data frame must contain one index." - ) as error: - id = create_table_from_pandas_df( - client, df=frame, table_name="TestTable", nullable_columns=True + frame = pd.DataFrame( + data=[[1, "2.5", "True"], [2, "1.5", "False"], [3, "2.5", "True"]], + columns=["index", "value", "ignore_me"], ) - assert str(error.value) == "Data frame must contain one index." + append_pandas_df_to_table(client, table_id=id, df=frame) -def test__query_table_data__sorts(self, client: DataFrameClient, sample_dataframe): - table_name = "TestTable" - nullable_columns = True - id = create_table_from_pandas_df( - client, sample_dataframe, table_name, nullable_columns=nullable_columns - ) + response = query_table_data_as_pandas_df( + client, + table_id=id, + query=QueryTableDataRequest( + order_by=[ + ColumnOrderBy(column="value", descending=True), + ColumnOrderBy(column="ignore_me"), + ] + ), + index=True, + ) + expected_df = pd.DataFrame( + data=[[2, "1.5", "False"], [3, "2.5", "True"], [1, "2.5", "True"]], + columns=["index", "value", "ignore_me"], + ) + expected_df.set_index("index", inplace=True) - frame = pd.DataFrame( - data=[["1", "2.5", "True"], ["2", "1.5", "False"], ["3", "2.5", "True"]], - columns=["index", "value", "ignore_me"], - ) + assert response == expected_df - append_pandas_df_to_table(client, table_id=id, df=frame) + def test__query_decimated_data__works(self, client: DataFrameClient, create_table): + table_name = "TestTable" + nullable_columns = True - response = query_table_data_as_pandas_df( - client, - table_id=id, - query=QueryTableDataRequest( - order_by=[ - ColumnOrderBy(column="value", descending=True), - ColumnOrderBy(column="ignore_me"), + frame = pd.DataFrame( + data=[ + [1, 1.5, 3.5], + [2, 2.5, 2.5], + [3, 3.5, 1.5], + [4, 4.5, 4.5], ], - ), - index=True, - ) - expected_df = pd.DataFrame( - data=[["2", "1.5", "False"], ["3", "2.5", "True"], ["1", "2.5", "True"]], - columns=["index", "value", "ignore_me"], - ) - expected_df.set_index("index", inplace=True) - - assert response == expected_df - - -def test__query_decimated_data__works(client: DataFrameClient, create_table): - table_name = "TestTable" - nullable_columns = True + columns=["index", "col1", "col2"], + ) + frame.set_index("index", inplace=True) + id = create_table( + df=frame, table_name=table_name, nullable_columns=nullable_columns + ) - frame = pd.DataFrame( - data=[ - ["1", "1.5", "3.5"], - ["2", "2.5", "2.5"], - ["3", "3.5", "1.5"], - ["4", "4.5", "4.5"], - ], - columns=["index", "col1", "col2"], - ) - frame.set_index("index", inplace=True) - id = create_table_from_pandas_df( - client, df=frame, table_name=table_name, nullable_columns=nullable_columns - ) + append_pandas_df_to_table(client, table_id=id, df=frame) - append_pandas_df_to_table(client, table_id=id, df=frame) - - response = query_decimated_table_data_as_pandas_df( - client, - table_id=id, - query=QueryDecimatedDataRequest( - decimation=DecimationOptions( - x_column="index", - y_columns=["col1"], - intervals=1, - method=DecimationMethod.MaxMin, - ) - ), - index=True, - ) + response = query_decimated_table_data_as_pandas_df( + client, + table_id=id, + query=QueryDecimatedDataRequest( + decimation=DecimationOptions( + x_column="index", + y_columns=["col1"], + intervals=1, + method=DecimationMethod.MaxMin, + ) + ), + index=True, + ) - assert response == pd.DataFrame( - data=[["1", "1.5", "3.5"], ["4", "4.5", "4.5"]], - columns=frame.columns, - index=frame.index, - ) + assert response == pd.DataFrame( + data=[[1.5, 3.5], [4.5, 4.5]], columns=frame.columns, index=[1, 4] + ) From 29b763022fa405d36e5d90d43a0d7260a7be2039 Mon Sep 17 00:00:00 2001 From: Ancy Augustin Date: Tue, 12 Nov 2024 20:06:23 +0530 Subject: [PATCH 08/12] docs: include utility docs --- docs/api_reference/dataframe.rst | 4 + docs/getting_started.rst | 19 + .../dataframe/test_pandas_utility.py | 417 +++++++++--------- 3 files changed, 243 insertions(+), 197 deletions(-) diff --git a/docs/api_reference/dataframe.rst b/docs/api_reference/dataframe.rst index 27dd7ab9..7c216764 100644 --- a/docs/api_reference/dataframe.rst +++ b/docs/api_reference/dataframe.rst @@ -25,3 +25,7 @@ nisystemlink.clients.dataframe .. automodule:: nisystemlink.clients.dataframe.models :members: :imported-members: + +.. automodule:: nisystemlink.clients.dataframe.utilities + :members: + :imported-members: diff --git a/docs/getting_started.rst b/docs/getting_started.rst index 0efe9358..f0473792 100644 --- a/docs/getting_started.rst +++ b/docs/getting_started.rst @@ -100,6 +100,19 @@ With a :class:`.DataFrameClient` object, you can: * Export table data in a comma-separated values (CSV) format. +Pandas Utility +~~~~~~~~~~~~~~ + +Utility functions for managing Pandas DataFrames and interacting with the DataFrame API include: + +* Create a table from a pandas dataframe. + +* Append pandas dataframe to an existing table. + +* Query decimated data from a table as pandas dataframe. + +* Query data from a table as pandas dataframe. + Examples ~~~~~~~~ @@ -121,6 +134,12 @@ Export data from a table :language: python :linenos: +Table operations using pandas dataframe + +.. literalinclude:: ../examples/dataframe/pandas_dataframe_operations.py + :language: python + :linenos: + Spec API ------- diff --git a/tests/integration/dataframe/test_pandas_utility.py b/tests/integration/dataframe/test_pandas_utility.py index 76fc4f84..341e6854 100644 --- a/tests/integration/dataframe/test_pandas_utility.py +++ b/tests/integration/dataframe/test_pandas_utility.py @@ -1,197 +1,220 @@ -import pandas as pd -import pytest # type: ignore -from nisystemlink.clients.core import ApiException -from nisystemlink.clients.dataframe import DataFrameClient -from nisystemlink.clients.dataframe.models import ( - ColumnOrderBy, - ColumnType, - DecimationMethod, - DecimationOptions, - QueryDecimatedDataRequest, - QueryTableDataRequest, -) -from nisystemlink.clients.dataframe.utilities import ( - append_pandas_df_to_table, - create_table_from_pandas_df, - InvalidIndexError, - query_decimated_table_data_as_pandas_df, - query_table_data_as_pandas_df, -) - - -@pytest.fixture(scope="class") -def client(enterprise_config): - """Fixture to create a DataFrameClient instance.""" - return DataFrameClient(enterprise_config) - - -@pytest.fixture(scope="class") -def sample_dataframe(): - """Fixture for a sample pandas DataFrame.""" - frame = pd.DataFrame( - columns=["index", "value", "ignore_me"], - data=[[1, "3.3", "True"], [2, "6", "False"], [3, "1.1", "True"]], - ) - frame.set_index("index", inplace=True) - return frame - - -@pytest.fixture(scope="class") -def create_table(client: DataFrameClient): - """Fixture to create and delete tables in the test class.""" - tables = [] # List to keep track of created table IDs - - def _create_table(df: pd.DataFrame, table_name: str, nullable_columns: bool) -> str: - """Factory method to create tables and add to the tables list.""" - table_id = create_table_from_pandas_df( - client, df=df, table_name=table_name, nullable_columns=nullable_columns - ) - tables.append(table_id) - return table_id - - yield _create_table - - if tables: - client.delete_tables(tables) - - -@pytest.mark.enterprise -@pytest.mark.integration -class TestPandasUtility: - def test_create_table_from_pandas_df( - self, client: DataFrameClient, sample_dataframe: pd.DataFrame, create_table - ): - table_name = "TestTable" - nullable_columns = True - table_id = create_table( - df=sample_dataframe, - table_name=table_name, - nullable_columns=nullable_columns, - ) - - index = None - table_columns = client.get_table_metadata(table_id).columns - for column in table_columns: - if column.column_type == ColumnType.Index: - index = column.name - break - - assert table_id is not None - assert index is not None - assert index == "index" - - def test__append_data__works( - self, client: DataFrameClient, sample_dataframe, create_table - ): - - id = create_table( - df=sample_dataframe, table_name="Test", nullable_columns=False - ) - - append_pandas_df_to_table(client=client, table_id=id, df=sample_dataframe) - - response = client.get_table_data(id) - - assert response.total_row_count == 3 - - def test__write_invalid_data__raises( - client: DataFrameClient, sample_dataframe, create_table - ): - id = create_table( - df=sample_dataframe, table_name="TestTable", nullable_columns=True - ) - - frame = pd.DataFrame( - columns=["index", "non_existent_column"], - data=[["1", "2"], ["2", "2"], ["3", "3"]], - ) - - with pytest.raises(ApiException, match="400 Bad Request"): - append_pandas_df_to_table(client=client, table_id=id, df=frame) - - def test__create_table_with_missing_index__raises( - self, client: DataFrameClient, create_table - ): - - frame = pd.DataFrame( - columns=["index", "value", "ignore_me"], - data=[["1", "3.3", "True"], ["2", "6", "False"], ["3", "1.1", "True"]], - ) - - with pytest.raises( - InvalidIndexError, match="Data frame must contain one index." - ): - create_table(df=frame, table_name="TestTable", nullable_columns=True) - - def test__query_table_data__sorts( - self, client: DataFrameClient, sample_dataframe, create_table - ): - table_name = "TestTable" - nullable_columns = True - id = create_table( - sample_dataframe, table_name, nullable_columns=nullable_columns - ) - - frame = pd.DataFrame( - data=[[1, "2.5", "True"], [2, "1.5", "False"], [3, "2.5", "True"]], - columns=["index", "value", "ignore_me"], - ) - - append_pandas_df_to_table(client, table_id=id, df=frame) - - response = query_table_data_as_pandas_df( - client, - table_id=id, - query=QueryTableDataRequest( - order_by=[ - ColumnOrderBy(column="value", descending=True), - ColumnOrderBy(column="ignore_me"), - ] - ), - index=True, - ) - expected_df = pd.DataFrame( - data=[[2, "1.5", "False"], [3, "2.5", "True"], [1, "2.5", "True"]], - columns=["index", "value", "ignore_me"], - ) - expected_df.set_index("index", inplace=True) - - assert response == expected_df - - def test__query_decimated_data__works(self, client: DataFrameClient, create_table): - table_name = "TestTable" - nullable_columns = True - - frame = pd.DataFrame( - data=[ - [1, 1.5, 3.5], - [2, 2.5, 2.5], - [3, 3.5, 1.5], - [4, 4.5, 4.5], - ], - columns=["index", "col1", "col2"], - ) - frame.set_index("index", inplace=True) - id = create_table( - df=frame, table_name=table_name, nullable_columns=nullable_columns - ) - - append_pandas_df_to_table(client, table_id=id, df=frame) - - response = query_decimated_table_data_as_pandas_df( - client, - table_id=id, - query=QueryDecimatedDataRequest( - decimation=DecimationOptions( - x_column="index", - y_columns=["col1"], - intervals=1, - method=DecimationMethod.MaxMin, - ) - ), - index=True, - ) - - assert response == pd.DataFrame( - data=[[1.5, 3.5], [4.5, 4.5]], columns=frame.columns, index=[1, 4] - ) +# import pandas as pd +# import pytest # type: ignore +# from nisystemlink.clients.core import ApiException +# from nisystemlink.clients.dataframe import DataFrameClient +# from nisystemlink.clients.dataframe.models import ( +# ColumnOrderBy, +# ColumnType, +# DecimationMethod, +# DecimationOptions, +# QueryDecimatedDataRequest, +# QueryTableDataRequest, +# ) +# from nisystemlink.clients.dataframe.utilities import ( +# append_pandas_df_to_table, +# create_table_from_pandas_df, +# InvalidIndexError, +# query_decimated_table_data_as_pandas_df, +# query_table_data_as_pandas_df, +# ) + + +# @pytest.fixture(scope="class") +# def client(enterprise_config): +# """Fixture to create a DataFrameClient instance.""" +# return DataFrameClient(enterprise_config) + + +# @pytest.fixture(scope="class") +# def sample_dataframe(): +# """Fixture for a sample pandas DataFrame.""" +# columns = ["index", "value", "ignore_me"] +# data = [ +# [1, "3.3", "True"], +# [2, "6", "False"], +# [3, "1.1", "True"], +# ] + +# frame = pd.DataFrame(columns=columns, data=data) +# frame.set_index("index", inplace=True) + +# return frame + + +# @pytest.fixture(scope="class") +# def create_table(client: DataFrameClient): +# """Fixture to create and delete tables in the test class.""" +# tables = [] + +# def _create_table(df: pd.DataFrame, table_name: str, nullable_columns: bool) -> str: +# """Factory method to create tables and add to the tables list.""" +# table_id = create_table_from_pandas_df( +# client, df=df, table_name=table_name, nullable_columns=nullable_columns +# ) +# tables.append(table_id) +# return table_id + +# yield _create_table + +# if tables: +# client.delete_tables(tables) + + +# @pytest.mark.enterprise +# @pytest.mark.integration +# class TestPandasUtility: +# def test_create_table_from_pandas_df( +# self, client: DataFrameClient, sample_dataframe: pd.DataFrame, create_table +# ): +# table_name = "TestTable0" +# nullable_columns = True + +# table_id = create_table( +# df=sample_dataframe, +# table_name=table_name, +# nullable_columns=nullable_columns, +# ) + +# index = None +# table_data = client.get_table_metadata(table_id) +# table_columns = table_data.columns + +# for column in table_columns: +# if column.column_type == ColumnType.Index: +# index = column.name +# break + +# assert table_id is not None +# assert index == "index" +# assert table_data.row_count == 0 + +# def test__create_table_with_missing_index__raises( +# self, client: DataFrameClient, create_table +# ): + +# frame = pd.DataFrame( +# columns=["index", "value", "ignore_me"], +# data=[["1", "3.3", "True"], ["2", "6", "False"], ["3", "1.1", "True"]], +# ) + +# with pytest.raises( +# InvalidIndexError, match="Data frame must contain one index." +# ): +# create_table(df=frame, table_name="TestTable1", nullable_columns=True) + +# def test__append_data__works( +# self, client: DataFrameClient, sample_dataframe, create_table +# ): + +# id = create_table( +# df=sample_dataframe, table_name="TestTable2", nullable_columns=False +# ) + +# append_pandas_df_to_table(client=client, table_id=id, df=sample_dataframe) + +# response = client.get_table_data(id) + +# assert response.total_row_count == 3 + +# def test__append_pandas_df_to_table_with_invalid_data__raises( +# client: DataFrameClient, sample_dataframe, create_table +# ): +# id = create_table( +# df=sample_dataframe, table_name="TestTable3", nullable_columns=True +# ) + +# columns = ["index", "non_existent_column"] +# data = [ +# [1, 2], +# [2, 2], +# [3, 3], +# ] +# frame = pd.DataFrame(columns=columns, data=data) + +# with pytest.raises(ApiException, match="400 Bad Request"): +# append_pandas_df_to_table(client, table_id=id, df=frame) + + +# def test__append_pandas_df_to_table__raises( +# client: DataFrameClient, sample_dataframe, create_table +# ): +# id = create_table( +# df=sample_dataframe, table_name="TestTable3", nullable_columns=True +# ) + +# with pytest.raises(ApiException, match="400 Bad Request"): +# append_pandas_df_to_table(client, table_id=id, df=sample_dataframe) + +# def test__query_table_data__sorts( +# self, client: DataFrameClient, sample_dataframe, create_table +# ): +# table_name = "TestTable4" +# nullable_columns = True +# id = create_table( +# sample_dataframe, table_name, nullable_columns=nullable_columns +# ) + +# frame = pd.DataFrame( +# data=[[1, "2.5", "True"], [2, "1.5", "False"], [3, "2.5", "True"]], +# columns=["index", "value", "ignore_me"], +# ) + +# append_pandas_df_to_table(client, table_id=id, df=frame) + +# response = query_table_data_as_pandas_df( +# client, +# table_id=id, +# query=QueryTableDataRequest( +# order_by=[ +# ColumnOrderBy(column="value", descending=True), +# ColumnOrderBy(column="ignore_me"), +# ] +# ), +# index=True, +# ) +# expected_df = pd.DataFrame( +# data=[[2, "1.5", "False"], [3, "2.5", "True"], [1, "2.5", "True"]], +# columns=["index", "value", "ignore_me"], +# ) +# expected_df.set_index("index", inplace=True) + +# assert response == expected_df + +# def test__query_decimated_data__works(self, client: DataFrameClient, create_table): +# table_name = "TestTable5" +# nullable_columns = True + +# frame = pd.DataFrame( +# data=[ +# [1, 1.5, 3.5], +# [2, 2.5, 2.5], +# [3, 3.5, 1.5], +# [4, 4.5, 4.5], +# ], +# columns=["index", "col1", "col2"], +# ) +# frame.set_index("index", inplace=True) + +# id = create_table( +# df=frame, table_name=table_name, nullable_columns=nullable_columns +# ) + +# append_pandas_df_to_table(client, table_id=id, df=frame) + +# response = query_decimated_table_data_as_pandas_df( +# client, +# table_id=id, +# query=QueryDecimatedDataRequest( +# decimation=DecimationOptions( +# x_column="index", +# y_columns=["col1"], +# intervals=1, +# method=DecimationMethod.MaxMin, +# ) +# ), +# index=True, +# ) + +# assert response == pd.DataFrame( +# data=[[1.5, 3.5], [4.5, 4.5]], columns=frame.columns, index=[1, 4] +# ) From 8ca6389b129b7414f1f7730c6c3f92e904eec6a6 Mon Sep 17 00:00:00 2001 From: Ancy Augustin Date: Wed, 13 Nov 2024 00:14:22 +0530 Subject: [PATCH 09/12] refactor: rename test functions --- .../dataframe/pandas_dataframe_operations.py | 5 +- .../dataframe/test_pandas_utility.py | 413 ++++++++---------- 2 files changed, 197 insertions(+), 221 deletions(-) diff --git a/examples/dataframe/pandas_dataframe_operations.py b/examples/dataframe/pandas_dataframe_operations.py index 58e606e4..9018be59 100644 --- a/examples/dataframe/pandas_dataframe_operations.py +++ b/examples/dataframe/pandas_dataframe_operations.py @@ -1,5 +1,6 @@ import pandas as pd from nisystemlink.clients.dataframe import DataFrameClient +from nisystemlink.clients.core import HttpConfiguration from nisystemlink.clients.dataframe.models import ( DecimationMethod, DecimationOptions, @@ -45,7 +46,7 @@ client, table_id, query=request, index=True ) print("Queried decimated data as pandas dataframe:") -print(queried_decimated_df) +print(queried_decimated_df.columns) query = QueryTableDataRequest() queried_df = query_table_data_as_pandas_df( @@ -53,3 +54,5 @@ ) print("Queried table data as pandas dataframe:") print(queried_df) + +client.delete_table(table_id) \ No newline at end of file diff --git a/tests/integration/dataframe/test_pandas_utility.py b/tests/integration/dataframe/test_pandas_utility.py index 341e6854..73b76e85 100644 --- a/tests/integration/dataframe/test_pandas_utility.py +++ b/tests/integration/dataframe/test_pandas_utility.py @@ -1,220 +1,193 @@ -# import pandas as pd -# import pytest # type: ignore -# from nisystemlink.clients.core import ApiException -# from nisystemlink.clients.dataframe import DataFrameClient -# from nisystemlink.clients.dataframe.models import ( -# ColumnOrderBy, -# ColumnType, -# DecimationMethod, -# DecimationOptions, -# QueryDecimatedDataRequest, -# QueryTableDataRequest, -# ) -# from nisystemlink.clients.dataframe.utilities import ( -# append_pandas_df_to_table, -# create_table_from_pandas_df, -# InvalidIndexError, -# query_decimated_table_data_as_pandas_df, -# query_table_data_as_pandas_df, -# ) - - -# @pytest.fixture(scope="class") -# def client(enterprise_config): -# """Fixture to create a DataFrameClient instance.""" -# return DataFrameClient(enterprise_config) - - -# @pytest.fixture(scope="class") -# def sample_dataframe(): -# """Fixture for a sample pandas DataFrame.""" -# columns = ["index", "value", "ignore_me"] -# data = [ -# [1, "3.3", "True"], -# [2, "6", "False"], -# [3, "1.1", "True"], -# ] - -# frame = pd.DataFrame(columns=columns, data=data) -# frame.set_index("index", inplace=True) - -# return frame - - -# @pytest.fixture(scope="class") -# def create_table(client: DataFrameClient): -# """Fixture to create and delete tables in the test class.""" -# tables = [] - -# def _create_table(df: pd.DataFrame, table_name: str, nullable_columns: bool) -> str: -# """Factory method to create tables and add to the tables list.""" -# table_id = create_table_from_pandas_df( -# client, df=df, table_name=table_name, nullable_columns=nullable_columns -# ) -# tables.append(table_id) -# return table_id - -# yield _create_table - -# if tables: -# client.delete_tables(tables) - - -# @pytest.mark.enterprise -# @pytest.mark.integration -# class TestPandasUtility: -# def test_create_table_from_pandas_df( -# self, client: DataFrameClient, sample_dataframe: pd.DataFrame, create_table -# ): -# table_name = "TestTable0" -# nullable_columns = True - -# table_id = create_table( -# df=sample_dataframe, -# table_name=table_name, -# nullable_columns=nullable_columns, -# ) - -# index = None -# table_data = client.get_table_metadata(table_id) -# table_columns = table_data.columns - -# for column in table_columns: -# if column.column_type == ColumnType.Index: -# index = column.name -# break - -# assert table_id is not None -# assert index == "index" -# assert table_data.row_count == 0 - -# def test__create_table_with_missing_index__raises( -# self, client: DataFrameClient, create_table -# ): - -# frame = pd.DataFrame( -# columns=["index", "value", "ignore_me"], -# data=[["1", "3.3", "True"], ["2", "6", "False"], ["3", "1.1", "True"]], -# ) - -# with pytest.raises( -# InvalidIndexError, match="Data frame must contain one index." -# ): -# create_table(df=frame, table_name="TestTable1", nullable_columns=True) - -# def test__append_data__works( -# self, client: DataFrameClient, sample_dataframe, create_table -# ): - -# id = create_table( -# df=sample_dataframe, table_name="TestTable2", nullable_columns=False -# ) - -# append_pandas_df_to_table(client=client, table_id=id, df=sample_dataframe) - -# response = client.get_table_data(id) - -# assert response.total_row_count == 3 - -# def test__append_pandas_df_to_table_with_invalid_data__raises( -# client: DataFrameClient, sample_dataframe, create_table -# ): -# id = create_table( -# df=sample_dataframe, table_name="TestTable3", nullable_columns=True -# ) - -# columns = ["index", "non_existent_column"] -# data = [ -# [1, 2], -# [2, 2], -# [3, 3], -# ] -# frame = pd.DataFrame(columns=columns, data=data) - -# with pytest.raises(ApiException, match="400 Bad Request"): -# append_pandas_df_to_table(client, table_id=id, df=frame) - - -# def test__append_pandas_df_to_table__raises( -# client: DataFrameClient, sample_dataframe, create_table -# ): -# id = create_table( -# df=sample_dataframe, table_name="TestTable3", nullable_columns=True -# ) - -# with pytest.raises(ApiException, match="400 Bad Request"): -# append_pandas_df_to_table(client, table_id=id, df=sample_dataframe) - -# def test__query_table_data__sorts( -# self, client: DataFrameClient, sample_dataframe, create_table -# ): -# table_name = "TestTable4" -# nullable_columns = True -# id = create_table( -# sample_dataframe, table_name, nullable_columns=nullable_columns -# ) - -# frame = pd.DataFrame( -# data=[[1, "2.5", "True"], [2, "1.5", "False"], [3, "2.5", "True"]], -# columns=["index", "value", "ignore_me"], -# ) - -# append_pandas_df_to_table(client, table_id=id, df=frame) - -# response = query_table_data_as_pandas_df( -# client, -# table_id=id, -# query=QueryTableDataRequest( -# order_by=[ -# ColumnOrderBy(column="value", descending=True), -# ColumnOrderBy(column="ignore_me"), -# ] -# ), -# index=True, -# ) -# expected_df = pd.DataFrame( -# data=[[2, "1.5", "False"], [3, "2.5", "True"], [1, "2.5", "True"]], -# columns=["index", "value", "ignore_me"], -# ) -# expected_df.set_index("index", inplace=True) - -# assert response == expected_df - -# def test__query_decimated_data__works(self, client: DataFrameClient, create_table): -# table_name = "TestTable5" -# nullable_columns = True - -# frame = pd.DataFrame( -# data=[ -# [1, 1.5, 3.5], -# [2, 2.5, 2.5], -# [3, 3.5, 1.5], -# [4, 4.5, 4.5], -# ], -# columns=["index", "col1", "col2"], -# ) -# frame.set_index("index", inplace=True) - -# id = create_table( -# df=frame, table_name=table_name, nullable_columns=nullable_columns -# ) - -# append_pandas_df_to_table(client, table_id=id, df=frame) - -# response = query_decimated_table_data_as_pandas_df( -# client, -# table_id=id, -# query=QueryDecimatedDataRequest( -# decimation=DecimationOptions( -# x_column="index", -# y_columns=["col1"], -# intervals=1, -# method=DecimationMethod.MaxMin, -# ) -# ), -# index=True, -# ) - -# assert response == pd.DataFrame( -# data=[[1.5, 3.5], [4.5, 4.5]], columns=frame.columns, index=[1, 4] -# ) +import pandas as pd +import pytest # type: ignore +from nisystemlink.clients.core import ApiException +from nisystemlink.clients.dataframe import DataFrameClient +from nisystemlink.clients.dataframe.models import ( + ColumnOrderBy, + ColumnType, + DecimationMethod, + DecimationOptions, + QueryDecimatedDataRequest, + QueryTableDataRequest, +) +from nisystemlink.clients.dataframe.utilities import ( + append_pandas_df_to_table, + create_table_from_pandas_df, + InvalidIndexError, + query_decimated_table_data_as_pandas_df, + query_table_data_as_pandas_df, +) + + +@pytest.fixture(scope="class") +def client(enterprise_config): + """Fixture to create a DataFrameClient instance.""" + return DataFrameClient(enterprise_config) + + +@pytest.fixture(scope="class") +def sample_dataframe(): + """Fixture for a sample pandas DataFrame.""" + columns = ["index", "value", "ignore_me"] + data = [ + [1, "3.3", "True"], + [2, "6", "False"], + [3, "1.1", "True"], + ] + + frame = pd.DataFrame(columns=columns, data=data) + frame.set_index("index", inplace=True) + + return frame + + +@pytest.fixture(scope="class") +def create_table(client: DataFrameClient): + """Fixture to create and delete tables in the test class.""" + tables = [] + + def _create_table(df: pd.DataFrame, table_name: str, nullable_columns: bool) -> str: + """Factory method to create tables and add to the tables list.""" + table_id = create_table_from_pandas_df( + client, df=df, table_name=table_name, nullable_columns=nullable_columns + ) + tables.append(table_id) + return table_id + + yield _create_table + + client.delete_tables(tables) + + +@pytest.mark.enterprise +@pytest.mark.integration +class TestPandasUtility: + def test_create_table_from_pandas_df( + self, client: DataFrameClient, sample_dataframe: pd.DataFrame, create_table + ): + table_name = "TestTable1" + nullable_columns = True + + table_id = create_table( + df=sample_dataframe, + table_name=table_name, + nullable_columns=nullable_columns, + ) + + index = None + table_data = client.get_table_metadata(table_id) + table_columns = table_data.columns + + for column in table_columns: + if column.column_type == ColumnType.Index: + index = column.name + break + + assert table_id is not None + assert index == "index" + assert table_data.row_count == 0 + + def test__create_table_with_missing_index__raises( + self, client: DataFrameClient, create_table + ): + + frame = pd.DataFrame( + columns=["index", "value", "ignore_me"], + data=[["1", "3.3", "True"], ["2", "6", "False"], ["3", "1.1", "True"]], + ) + + with pytest.raises( + InvalidIndexError, match="Data frame must contain one index." + ): + create_table(df=frame, table_name="TestTable2", nullable_columns=True) + + def test__append_data__works( + self, client: DataFrameClient, sample_dataframe, create_table + ): + + id = create_table( + df=sample_dataframe, table_name="TestTable3", nullable_columns=False + ) + + append_pandas_df_to_table(client=client, table_id=id, df=sample_dataframe) + + response = client.get_table_data(id) + + assert response.total_row_count == 3 + + def test__append_pandas_df_to_table__raises( + client: DataFrameClient, sample_dataframe, create_table + ): + id = create_table( + df=sample_dataframe, table_name="TestTable3", nullable_columns=True + ) + + with pytest.raises(ApiException, match="400 Bad Request"): + append_pandas_df_to_table(client, table_id=id, df=sample_dataframe) + + def test__query_table_data__sorts( + self, client: DataFrameClient, sample_dataframe, create_table + ): + table_name = "TestTable4" + nullable_columns = True + id = create_table( + sample_dataframe, table_name, nullable_columns=nullable_columns + ) + + frame = pd.DataFrame( + data=[[1, "2.5", "True"], [2, "1.5", "False"], [3, "2.5", "True"]], + columns=["index", "value", "ignore_me"], + ) + + append_pandas_df_to_table(client, table_id=id, df=frame) + + response = query_table_data_as_pandas_df( + client, + table_id=id, + query=QueryTableDataRequest( + order_by=[ + ColumnOrderBy(column="value", descending=True), + ColumnOrderBy(column="ignore_me"), + ] + ), + index=True, + ) + expected_df = pd.DataFrame( + data=[[2, "1.5", "False"], [3, "2.5", "True"], [1, "2.5", "True"]], + columns=["index", "value", "ignore_me"], + ) + expected_df.set_index("index", inplace=True) + + assert response == expected_df + + def test__query_decimated_data__works(self, client: DataFrameClient, create_table): + table_name = "TestTable5" + nullable_columns = True + + frame: pd.DataFrame = pd.DataFrame( + data=[[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["a", "b", "c"] + ) + frame.set_index("a", inplace=True) + id = create_table( + df=frame, table_name=table_name, nullable_columns=nullable_columns + ) + + append_pandas_df_to_table(client, table_id=id, df=frame) + + response = query_decimated_table_data_as_pandas_df( + client, + table_id=id, + query=QueryDecimatedDataRequest( + decimation=DecimationOptions( + x_column="a", + y_columns=["b"], + intervals=1, + method=DecimationMethod.MaxMin, + ) + ), + index=True, + ) + expected_df = pd.DataFrame(data=[['1','2','3'],['7','8','9']], columns=["a","b","c"]) + expected_df.set_index("a", inplace=True) + + assert response.values == expected_df.values From 303de9f7dec2621fd4f5b5d3ea4f4ab23ec4381f Mon Sep 17 00:00:00 2001 From: Ancy Augustin Date: Mon, 25 Nov 2024 15:28:23 +0530 Subject: [PATCH 10/12] fix: review comments --- docs/getting_started.rst | 12 +- .../dataframe/pandas_dataframe_operations.py | 6 +- .../clients/dataframe/models/_data_frame.py | 2 +- .../utilities/_pandas_dataframe_operations.py | 56 +++++++-- .../dataframe/utilities/_pandas_exception.py | 12 +- .../dataframe/utilities/_pandas_utils.py | 22 ++-- poetry.lock | 116 +++++++++--------- .../dataframe/test_pandas_utility.py | 21 ++-- 8 files changed, 148 insertions(+), 99 deletions(-) diff --git a/docs/getting_started.rst b/docs/getting_started.rst index f0473792..4249f284 100644 --- a/docs/getting_started.rst +++ b/docs/getting_started.rst @@ -103,15 +103,15 @@ With a :class:`.DataFrameClient` object, you can: Pandas Utility ~~~~~~~~~~~~~~ -Utility functions for managing Pandas DataFrames and interacting with the DataFrame API include: +Utility functions to interact with :class:`.DataFrameClient` using [pandas.DataFrame](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html) -* Create a table from a pandas dataframe. +* Create a table from a `pandas.DataFrame`. -* Append pandas dataframe to an existing table. +* Append `pandas.DataFrame` to an existing table. -* Query decimated data from a table as pandas dataframe. +* Query decimated data from a table as `pandas.DataFrame`. -* Query data from a table as pandas dataframe. +* Query data from a table as `pandas.DataFrame`. Examples ~~~~~~~~ @@ -134,7 +134,7 @@ Export data from a table :language: python :linenos: -Table operations using pandas dataframe +Table operations using [pandas.DataFrame](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html) .. literalinclude:: ../examples/dataframe/pandas_dataframe_operations.py :language: python diff --git a/examples/dataframe/pandas_dataframe_operations.py b/examples/dataframe/pandas_dataframe_operations.py index 9018be59..3245b4f4 100644 --- a/examples/dataframe/pandas_dataframe_operations.py +++ b/examples/dataframe/pandas_dataframe_operations.py @@ -1,6 +1,5 @@ import pandas as pd from nisystemlink.clients.dataframe import DataFrameClient -from nisystemlink.clients.core import HttpConfiguration from nisystemlink.clients.dataframe.models import ( DecimationMethod, DecimationOptions, @@ -21,7 +20,10 @@ data=[[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["a", "b", "c"] ) df.set_index("a", inplace=True) +print(df) +print(client.list_tables()) +client.list_tables() try: table_id = create_table_from_pandas_df( client, df, "Example Table", nullable_columns=False @@ -55,4 +57,4 @@ print("Queried table data as pandas dataframe:") print(queried_df) -client.delete_table(table_id) \ No newline at end of file +client.delete_table(table_id) diff --git a/nisystemlink/clients/dataframe/models/_data_frame.py b/nisystemlink/clients/dataframe/models/_data_frame.py index a257fa26..61202ea1 100644 --- a/nisystemlink/clients/dataframe/models/_data_frame.py +++ b/nisystemlink/clients/dataframe/models/_data_frame.py @@ -53,7 +53,7 @@ class DataFrame(JsonModel): columns: Optional[List[str]] = None """The names and order of the columns included in the data frame.""" - data: List[List[Optional[str]]] = None + data: Optional[List[List[Optional[str]]]] = None """The data for each row with the order specified in the columns property. Must contain a value for each column in the columns property.""" diff --git a/nisystemlink/clients/dataframe/utilities/_pandas_dataframe_operations.py b/nisystemlink/clients/dataframe/utilities/_pandas_dataframe_operations.py index 3aa579ba..26797621 100644 --- a/nisystemlink/clients/dataframe/utilities/_pandas_dataframe_operations.py +++ b/nisystemlink/clients/dataframe/utilities/_pandas_dataframe_operations.py @@ -1,3 +1,5 @@ +from typing import Optional + import pandas as pd from nisystemlink.clients.dataframe import DataFrameClient from nisystemlink.clients.dataframe.models import ( @@ -24,7 +26,7 @@ def create_table_from_pandas_df( client (DataFrameClient): Instance of DataFrameClient. df (pd.DataFrame): Pandas dataframe. table_name (str): Name of the table. - nullable_columns (bool): Make the columns nullable. + nullable_columns (bool): Make the columns nullable. Nullable columns can contain `null` values. Returns: str: ID of the table. @@ -42,7 +44,10 @@ def create_table_from_pandas_df( def append_pandas_df_to_table( - client: DataFrameClient, table_id: str, df: pd.DataFrame + client: DataFrameClient, + table_id: str, + df: pd.DataFrame, + end_of_data: Optional[bool] = None, ) -> None: """Append `df` to table. @@ -54,12 +59,48 @@ def append_pandas_df_to_table( Returns: None """ - frame = DataFrame() + frame: DataFrame = DataFrame() frame.from_pandas(df) client.append_table_data( - id=table_id, data=AppendTableDataRequest(frame=frame, end_of_data=False) + id=table_id, data=AppendTableDataRequest(frame=frame, end_of_data=end_of_data) + ) + + +def create_table_with_data_from_pandas_df( + client: DataFrameClient, + df: pd.DataFrame, + table_name: str, + nullable_columns: bool, + batch_size: int = 1000, + end_of_data: Optional[bool] = None, +) -> str: + """Create a table and upload data from a pandas DataFrame. + + This function creates the table, uploads the data (with batching for large data), + and closes the upload process in one seamless call. + + Args: + client (DataFrameClient): Instance of DataFrameClient. + df (pd.DataFrame): Pandas DataFrame with data to upload. + table_name (str): Name of the table to create. + nullable_columns (bool): Make the columns nullable. Nullable columns can contain `null` values. + batch_size (Optional[int]): Number of rows to batch in each upload. Default is 1000. + + Returns: + str: ID of the created table. + """ + table_id = create_table_from_pandas_df( + client=client, df=df, table_name=table_name, nullable_columns=nullable_columns ) + num_rows = len(df) + for start_row in range(0, num_rows, batch_size): + end_row = min(start_row + batch_size, num_rows) + batch_df = df.iloc[start_row:end_row] + append_pandas_df_to_table(client, table_id, batch_df, end_of_data) + + return table_id + def query_decimated_table_data_as_pandas_df( client: DataFrameClient, @@ -78,10 +119,10 @@ def query_decimated_table_data_as_pandas_df( Returns: pd.DataFrame: Table data in pandas dataframe format. """ - index_name: str = None + index_name = None if index: index_name = _get_table_index_name(client=client, table_id=table_id) - if query.columns: + if query.columns and index_name: if index_name not in query.columns: query.columns.append(index_name) response = client.query_decimated_data(table_id, query) @@ -107,11 +148,10 @@ def query_table_data_as_pandas_df( """ continuation_token = None all_rows = [] - index_name: str = None if index: index_name = _get_table_index_name(client=client, table_id=table_id) - if query.columns: + if query.columns and index_name: if index_name not in query.columns: query.columns.append(index_name) diff --git a/nisystemlink/clients/dataframe/utilities/_pandas_exception.py b/nisystemlink/clients/dataframe/utilities/_pandas_exception.py index 90a8cb31..5f0069fd 100644 --- a/nisystemlink/clients/dataframe/utilities/_pandas_exception.py +++ b/nisystemlink/clients/dataframe/utilities/_pandas_exception.py @@ -1,3 +1,6 @@ +SUPPORTED_INDEX_DATA_TYPE = ["INT32", "INT64", "TIMESTAMP"] + + class DataFrameError(Exception): """Base class for Dataframe errors.""" @@ -11,9 +14,8 @@ def __init__(self, index_name: str = None) -> None: self.index_name = index_name self.message = "Data frame must contain one index." if index_name: - self.message = ( - f"Column '{self.index_name}' must be of type INT32, INT64, or TIMESTAMP to be an index column." - ) + self.message = f"Column '{self.index_name}' must be of type {SUPPORTED_INDEX_DATA_TYPE}" + " to be an index column." super().__init__(self.message) @@ -23,5 +25,7 @@ class InvalidColumnTypeError(DataFrameError): def __init__(self, column_name: str, column_type: str) -> None: self.column_name = column_name self.column_type = column_type - self.message = f"Column '{column_name}' has an unsupported datatype: {column_type}" + self.message = ( + f"Column '{column_name}' has an unsupported datatype: {column_type}" + ) super().__init__(self.message) diff --git a/nisystemlink/clients/dataframe/utilities/_pandas_utils.py b/nisystemlink/clients/dataframe/utilities/_pandas_utils.py index 70bb1044..0df93453 100644 --- a/nisystemlink/clients/dataframe/utilities/_pandas_utils.py +++ b/nisystemlink/clients/dataframe/utilities/_pandas_utils.py @@ -6,11 +6,12 @@ from ._pandas_exception import InvalidColumnTypeError, InvalidIndexError -UNSUPPORTED_PANDAS_INT_TYPES = ["int8", "int16"] -"""List of unsupported pandas integer types for conversion to `DataType`.""" - -UNSUPPORTED_PANDAS_FLOAT_TYPES = ["float16"] -"""List of unsupported pandas float types for conversion to `DataType`.""" +UNSUPPORTED_PANDAS_DATA_TYPE_CONVERSION = { + "int8": "int32", + "int16": "int32", + "float16": "float32", +} +"""Mapping of unsupported pandas types to supported data types for `DataType`.""" SUPPORTED_INDEX_DATA_TYPE = [DataType.Int32, DataType.Int64, DataType.Timestamp] """List of supported index data types for table creation. @@ -62,11 +63,8 @@ def _type_cast_column_datatype( data = pd.to_numeric(data, downcast="integer") pd_dtype = data.dtype - if pd_dtype in UNSUPPORTED_PANDAS_INT_TYPES: - data = data.astype("int32") - - elif pd_dtype in UNSUPPORTED_PANDAS_FLOAT_TYPES: - data = data.astype("float32") + if pd_dtype in UNSUPPORTED_PANDAS_DATA_TYPE_CONVERSION: + data = data.astype(UNSUPPORTED_PANDAS_DATA_TYPE_CONVERSION[pd_dtype]) return data @@ -81,7 +79,7 @@ def _infer_index_column(df: pd.DataFrame) -> Column: InvalidIndexError: If multiple index present or index is of unsupported type. Returns: - Column: Valid `Column` to the table. + Column: Valid Index `Column` for the table. """ index = df.index.name @@ -140,7 +138,7 @@ def _infer_dataframe_columns(df: pd.DataFrame, nullable_columns: bool) -> List[C return columns -def _get_table_index_name(client: DataFrameClient, table_id: str) -> str: +def _get_table_index_name(client: DataFrameClient, table_id: str) -> Optional[str]: """Get the index name from the table columns. Args: diff --git a/poetry.lock b/poetry.lock index 8d5d51b6..2f1357ee 100644 --- a/poetry.lock +++ b/poetry.lock @@ -565,13 +565,13 @@ files = [ [[package]] name = "packaging" -version = "24.1" +version = "24.2" description = "Core utilities for Python packages" optional = false python-versions = ">=3.8" files = [ - {file = "packaging-24.1-py3-none-any.whl", hash = "sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124"}, - {file = "packaging-24.1.tar.gz", hash = "sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002"}, + {file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"}, + {file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"}, ] [[package]] @@ -744,54 +744,54 @@ files = [ [[package]] name = "pydantic" -version = "1.10.18" +version = "1.10.19" description = "Data validation and settings management using python type hints" optional = false python-versions = ">=3.7" files = [ - {file = "pydantic-1.10.18-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e405ffcc1254d76bb0e760db101ee8916b620893e6edfbfee563b3c6f7a67c02"}, - {file = "pydantic-1.10.18-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e306e280ebebc65040034bff1a0a81fd86b2f4f05daac0131f29541cafd80b80"}, - {file = "pydantic-1.10.18-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:11d9d9b87b50338b1b7de4ebf34fd29fdb0d219dc07ade29effc74d3d2609c62"}, - {file = "pydantic-1.10.18-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b661ce52c7b5e5f600c0c3c5839e71918346af2ef20062705ae76b5c16914cab"}, - {file = "pydantic-1.10.18-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:c20f682defc9ef81cd7eaa485879ab29a86a0ba58acf669a78ed868e72bb89e0"}, - {file = "pydantic-1.10.18-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c5ae6b7c8483b1e0bf59e5f1843e4fd8fd405e11df7de217ee65b98eb5462861"}, - {file = "pydantic-1.10.18-cp310-cp310-win_amd64.whl", hash = "sha256:74fe19dda960b193b0eb82c1f4d2c8e5e26918d9cda858cbf3f41dd28549cb70"}, - {file = "pydantic-1.10.18-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:72fa46abace0a7743cc697dbb830a41ee84c9db8456e8d77a46d79b537efd7ec"}, - {file = "pydantic-1.10.18-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ef0fe7ad7cbdb5f372463d42e6ed4ca9c443a52ce544472d8842a0576d830da5"}, - {file = "pydantic-1.10.18-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a00e63104346145389b8e8f500bc6a241e729feaf0559b88b8aa513dd2065481"}, - {file = "pydantic-1.10.18-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ae6fa2008e1443c46b7b3a5eb03800121868d5ab6bc7cda20b5df3e133cde8b3"}, - {file = "pydantic-1.10.18-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:9f463abafdc92635da4b38807f5b9972276be7c8c5121989768549fceb8d2588"}, - {file = "pydantic-1.10.18-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3445426da503c7e40baccefb2b2989a0c5ce6b163679dd75f55493b460f05a8f"}, - {file = "pydantic-1.10.18-cp311-cp311-win_amd64.whl", hash = "sha256:467a14ee2183bc9c902579bb2f04c3d3dac00eff52e252850509a562255b2a33"}, - {file = "pydantic-1.10.18-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:efbc8a7f9cb5fe26122acba1852d8dcd1e125e723727c59dcd244da7bdaa54f2"}, - {file = "pydantic-1.10.18-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:24a4a159d0f7a8e26bf6463b0d3d60871d6a52eac5bb6a07a7df85c806f4c048"}, - {file = "pydantic-1.10.18-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b74be007703547dc52e3c37344d130a7bfacca7df112a9e5ceeb840a9ce195c7"}, - {file = "pydantic-1.10.18-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fcb20d4cb355195c75000a49bb4a31d75e4295200df620f454bbc6bdf60ca890"}, - {file = "pydantic-1.10.18-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:46f379b8cb8a3585e3f61bf9ae7d606c70d133943f339d38b76e041ec234953f"}, - {file = "pydantic-1.10.18-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:cbfbca662ed3729204090c4d09ee4beeecc1a7ecba5a159a94b5a4eb24e3759a"}, - {file = "pydantic-1.10.18-cp312-cp312-win_amd64.whl", hash = "sha256:c6d0a9f9eccaf7f438671a64acf654ef0d045466e63f9f68a579e2383b63f357"}, - {file = "pydantic-1.10.18-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:3d5492dbf953d7d849751917e3b2433fb26010d977aa7a0765c37425a4026ff1"}, - {file = "pydantic-1.10.18-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fe734914977eed33033b70bfc097e1baaffb589517863955430bf2e0846ac30f"}, - {file = "pydantic-1.10.18-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:15fdbe568beaca9aacfccd5ceadfb5f1a235087a127e8af5e48df9d8a45ae85c"}, - {file = "pydantic-1.10.18-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:c3e742f62198c9eb9201781fbebe64533a3bbf6a76a91b8d438d62b813079dbc"}, - {file = "pydantic-1.10.18-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:19a3bd00b9dafc2cd7250d94d5b578edf7a0bd7daf102617153ff9a8fa37871c"}, - {file = "pydantic-1.10.18-cp37-cp37m-win_amd64.whl", hash = "sha256:2ce3fcf75b2bae99aa31bd4968de0474ebe8c8258a0110903478bd83dfee4e3b"}, - {file = "pydantic-1.10.18-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:335a32d72c51a313b33fa3a9b0fe283503272ef6467910338e123f90925f0f03"}, - {file = "pydantic-1.10.18-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:34a3613c7edb8c6fa578e58e9abe3c0f5e7430e0fc34a65a415a1683b9c32d9a"}, - {file = "pydantic-1.10.18-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e9ee4e6ca1d9616797fa2e9c0bfb8815912c7d67aca96f77428e316741082a1b"}, - {file = "pydantic-1.10.18-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:23e8ec1ce4e57b4f441fc91e3c12adba023fedd06868445a5b5f1d48f0ab3682"}, - {file = "pydantic-1.10.18-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:44ae8a3e35a54d2e8fa88ed65e1b08967a9ef8c320819a969bfa09ce5528fafe"}, - {file = "pydantic-1.10.18-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:d5389eb3b48a72da28c6e061a247ab224381435256eb541e175798483368fdd3"}, - {file = "pydantic-1.10.18-cp38-cp38-win_amd64.whl", hash = "sha256:069b9c9fc645474d5ea3653788b544a9e0ccd3dca3ad8c900c4c6eac844b4620"}, - {file = "pydantic-1.10.18-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:80b982d42515632eb51f60fa1d217dfe0729f008e81a82d1544cc392e0a50ddf"}, - {file = "pydantic-1.10.18-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:aad8771ec8dbf9139b01b56f66386537c6fe4e76c8f7a47c10261b69ad25c2c9"}, - {file = "pydantic-1.10.18-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:941a2eb0a1509bd7f31e355912eb33b698eb0051730b2eaf9e70e2e1589cae1d"}, - {file = "pydantic-1.10.18-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:65f7361a09b07915a98efd17fdec23103307a54db2000bb92095457ca758d485"}, - {file = "pydantic-1.10.18-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:6951f3f47cb5ca4da536ab161ac0163cab31417d20c54c6de5ddcab8bc813c3f"}, - {file = "pydantic-1.10.18-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:7a4c5eec138a9b52c67f664c7d51d4c7234c5ad65dd8aacd919fb47445a62c86"}, - {file = "pydantic-1.10.18-cp39-cp39-win_amd64.whl", hash = "sha256:49e26c51ca854286bffc22b69787a8d4063a62bf7d83dc21d44d2ff426108518"}, - {file = "pydantic-1.10.18-py3-none-any.whl", hash = "sha256:06a189b81ffc52746ec9c8c007f16e5167c8b0a696e1a726369327e3db7b2a82"}, - {file = "pydantic-1.10.18.tar.gz", hash = "sha256:baebdff1907d1d96a139c25136a9bb7d17e118f133a76a2ef3b845e831e3403a"}, + {file = "pydantic-1.10.19-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a415b9e95fa602b10808113967f72b2da8722061265d6af69268c111c254832d"}, + {file = "pydantic-1.10.19-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:11965f421f7eb026439d4eb7464e9182fe6d69c3d4d416e464a4485d1ba61ab6"}, + {file = "pydantic-1.10.19-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f5bb81fcfc6d5bff62cd786cbd87480a11d23f16d5376ad2e057c02b3b44df96"}, + {file = "pydantic-1.10.19-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:83ee8c9916689f8e6e7d90161e6663ac876be2efd32f61fdcfa3a15e87d4e413"}, + {file = "pydantic-1.10.19-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:0399094464ae7f28482de22383e667625e38e1516d6b213176df1acdd0c477ea"}, + {file = "pydantic-1.10.19-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8b2cf5e26da84f2d2dee3f60a3f1782adedcee785567a19b68d0af7e1534bd1f"}, + {file = "pydantic-1.10.19-cp310-cp310-win_amd64.whl", hash = "sha256:1fc8cc264afaf47ae6a9bcbd36c018d0c6b89293835d7fb0e5e1a95898062d59"}, + {file = "pydantic-1.10.19-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d7a8a1dd68bac29f08f0a3147de1885f4dccec35d4ea926e6e637fac03cdb4b3"}, + {file = "pydantic-1.10.19-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:07d00ca5ef0de65dd274005433ce2bb623730271d495a7d190a91c19c5679d34"}, + {file = "pydantic-1.10.19-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ad57004e5d73aee36f1e25e4e73a4bc853b473a1c30f652dc8d86b0a987ffce3"}, + {file = "pydantic-1.10.19-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dce355fe7ae53e3090f7f5fa242423c3a7b53260747aa398b4b3aaf8b25f41c3"}, + {file = "pydantic-1.10.19-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:0d32227ea9a3bf537a2273fd2fdb6d64ab4d9b83acd9e4e09310a777baaabb98"}, + {file = "pydantic-1.10.19-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e351df83d1c9cffa53d4e779009a093be70f1d5c6bb7068584086f6a19042526"}, + {file = "pydantic-1.10.19-cp311-cp311-win_amd64.whl", hash = "sha256:d8d72553d2f3f57ce547de4fa7dc8e3859927784ab2c88343f1fc1360ff17a08"}, + {file = "pydantic-1.10.19-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:d5b5b7c6bafaef90cbb7dafcb225b763edd71d9e22489647ee7df49d6d341890"}, + {file = "pydantic-1.10.19-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:570ad0aeaf98b5e33ff41af75aba2ef6604ee25ce0431ecd734a28e74a208555"}, + {file = "pydantic-1.10.19-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0890fbd7fec9e151c7512941243d830b2d6076d5df159a2030952d480ab80a4e"}, + {file = "pydantic-1.10.19-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ec5c44e6e9eac5128a9bfd21610df3b8c6b17343285cc185105686888dc81206"}, + {file = "pydantic-1.10.19-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:6eb56074b11a696e0b66c7181da682e88c00e5cebe6570af8013fcae5e63e186"}, + {file = "pydantic-1.10.19-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:9d7d48fbc5289efd23982a0d68e973a1f37d49064ccd36d86de4543aff21e086"}, + {file = "pydantic-1.10.19-cp312-cp312-win_amd64.whl", hash = "sha256:fd34012691fbd4e67bdf4accb1f0682342101015b78327eaae3543583fcd451e"}, + {file = "pydantic-1.10.19-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:4a5d5b877c7d3d9e17399571a8ab042081d22fe6904416a8b20f8af5909e6c8f"}, + {file = "pydantic-1.10.19-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c46f58ef2df958ed2ea7437a8be0897d5efe9ee480818405338c7da88186fb3"}, + {file = "pydantic-1.10.19-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6d8a38a44bb6a15810084316ed69c854a7c06e0c99c5429f1d664ad52cec353c"}, + {file = "pydantic-1.10.19-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:a82746c6d6e91ca17e75f7f333ed41d70fce93af520a8437821dec3ee52dfb10"}, + {file = "pydantic-1.10.19-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:566bebdbe6bc0ac593fa0f67d62febbad9f8be5433f686dc56401ba4aab034e3"}, + {file = "pydantic-1.10.19-cp37-cp37m-win_amd64.whl", hash = "sha256:22a1794e01591884741be56c6fba157c4e99dcc9244beb5a87bd4aa54b84ea8b"}, + {file = "pydantic-1.10.19-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:076c49e24b73d346c45f9282d00dbfc16eef7ae27c970583d499f11110d9e5b0"}, + {file = "pydantic-1.10.19-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:5d4320510682d5a6c88766b2a286d03b87bd3562bf8d78c73d63bab04b21e7b4"}, + {file = "pydantic-1.10.19-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e66aa0fa7f8aa9d0a620361834f6eb60d01d3e9cea23ca1a92cda99e6f61dac"}, + {file = "pydantic-1.10.19-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d216f8d0484d88ab72ab45d699ac669fe031275e3fa6553e3804e69485449fa0"}, + {file = "pydantic-1.10.19-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:9f28a81978e936136c44e6a70c65bde7548d87f3807260f73aeffbf76fb94c2f"}, + {file = "pydantic-1.10.19-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:d3449633c207ec3d2d672eedb3edbe753e29bd4e22d2e42a37a2c1406564c20f"}, + {file = "pydantic-1.10.19-cp38-cp38-win_amd64.whl", hash = "sha256:7ea24e8614f541d69ea72759ff635df0e612b7dc9d264d43f51364df310081a3"}, + {file = "pydantic-1.10.19-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:573254d844f3e64093f72fcd922561d9c5696821ff0900a0db989d8c06ab0c25"}, + {file = "pydantic-1.10.19-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ff09600cebe957ecbb4a27496fe34c1d449e7957ed20a202d5029a71a8af2e35"}, + {file = "pydantic-1.10.19-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4739c206bfb6bb2bdc78dcd40bfcebb2361add4ceac6d170e741bb914e9eff0f"}, + {file = "pydantic-1.10.19-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0bfb5b378b78229119d66ced6adac2e933c67a0aa1d0a7adffbe432f3ec14ce4"}, + {file = "pydantic-1.10.19-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:7f31742c95e3f9443b8c6fa07c119623e61d76603be9c0d390bcf7e888acabcb"}, + {file = "pydantic-1.10.19-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:c6444368b651a14c2ce2fb22145e1496f7ab23cbdb978590d47c8d34a7bc0289"}, + {file = "pydantic-1.10.19-cp39-cp39-win_amd64.whl", hash = "sha256:945407f4d08cd12485757a281fca0e5b41408606228612f421aa4ea1b63a095d"}, + {file = "pydantic-1.10.19-py3-none-any.whl", hash = "sha256:2206a1752d9fac011e95ca83926a269fb0ef5536f7e053966d058316e24d929f"}, + {file = "pydantic-1.10.19.tar.gz", hash = "sha256:fea36c2065b7a1d28c6819cc2e93387b43dd5d3cf5a1e82d8132ee23f36d1f10"}, ] [package.dependencies] @@ -1016,23 +1016,23 @@ idna2008 = ["idna"] [[package]] name = "setuptools" -version = "75.3.0" +version = "75.4.0" description = "Easily download, build, install, upgrade, and uninstall Python packages" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "setuptools-75.3.0-py3-none-any.whl", hash = "sha256:f2504966861356aa38616760c0f66568e535562374995367b4e69c7143cf6bcd"}, - {file = "setuptools-75.3.0.tar.gz", hash = "sha256:fba5dd4d766e97be1b1681d98712680ae8f2f26d7881245f2ce9e40714f1a686"}, + {file = "setuptools-75.4.0-py3-none-any.whl", hash = "sha256:b3c5d862f98500b06ffdf7cc4499b48c46c317d8d56cb30b5c8bce4d88f5c216"}, + {file = "setuptools-75.4.0.tar.gz", hash = "sha256:1dc484f5cf56fd3fe7216d7b8df820802e7246cfb534a1db2aa64f14fcb9cdcb"}, ] [package.extras] -check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)", "ruff (>=0.5.2)"] -core = ["importlib-metadata (>=6)", "importlib-resources (>=5.10.2)", "jaraco.collections", "jaraco.functools", "jaraco.text (>=3.7)", "more-itertools", "more-itertools (>=8.8)", "packaging", "packaging (>=24)", "platformdirs (>=4.2.2)", "tomli (>=2.0.1)", "wheel (>=0.43.0)"] +check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)", "ruff (>=0.7.0)"] +core = ["importlib-metadata (>=6)", "jaraco.collections", "jaraco.functools (>=4)", "jaraco.text (>=3.7)", "more-itertools", "more-itertools (>=8.8)", "packaging", "packaging (>=24.2)", "platformdirs (>=4.2.2)", "tomli (>=2.0.1)", "wheel (>=0.43.0)"] cover = ["pytest-cov"] doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier", "towncrier (<24.7)"] enabler = ["pytest-enabler (>=2.2)"] -test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test (>=5.5)", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"] -type = ["importlib-metadata (>=7.0.2)", "jaraco.develop (>=7.21)", "mypy (==1.12.*)", "pytest-mypy"] +test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test (>=5.5)", "packaging (>=24.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"] +type = ["importlib-metadata (>=7.0.2)", "jaraco.develop (>=7.21)", "mypy (>=1.12,<1.14)", "pytest-mypy"] [[package]] name = "six" @@ -1080,13 +1080,13 @@ files = [ [[package]] name = "tomli" -version = "2.0.2" +version = "2.1.0" description = "A lil' TOML parser" optional = false python-versions = ">=3.8" files = [ - {file = "tomli-2.0.2-py3-none-any.whl", hash = "sha256:2ebe24485c53d303f690b0ec092806a085f07af5a5aa1464f3931eec36caaa38"}, - {file = "tomli-2.0.2.tar.gz", hash = "sha256:d46d457a85337051c36524bc5349dd91b1877838e2979ac5ced3e710ed8a60ed"}, + {file = "tomli-2.1.0-py3-none-any.whl", hash = "sha256:a5c57c3d1c56f5ccdf89f6523458f60ef716e210fc47c4cfb188c5ba473e0391"}, + {file = "tomli-2.1.0.tar.gz", hash = "sha256:3f646cae2aec94e17d04973e4249548320197cfabdf130015d023de4b74d8ab8"}, ] [[package]] diff --git a/tests/integration/dataframe/test_pandas_utility.py b/tests/integration/dataframe/test_pandas_utility.py index 73b76e85..2947a98a 100644 --- a/tests/integration/dataframe/test_pandas_utility.py +++ b/tests/integration/dataframe/test_pandas_utility.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- import pandas as pd import pytest # type: ignore from nisystemlink.clients.core import ApiException @@ -62,7 +63,7 @@ def _create_table(df: pd.DataFrame, table_name: str, nullable_columns: bool) -> @pytest.mark.enterprise @pytest.mark.integration class TestPandasUtility: - def test_create_table_from_pandas_df( + def test__create_table_from_pandas_df__succeeds( self, client: DataFrameClient, sample_dataframe: pd.DataFrame, create_table ): table_name = "TestTable1" @@ -87,7 +88,7 @@ def test_create_table_from_pandas_df( assert index == "index" assert table_data.row_count == 0 - def test__create_table_with_missing_index__raises( + def test__create_table_from_pandas_df__missing_index_raises( self, client: DataFrameClient, create_table ): @@ -101,7 +102,7 @@ def test__create_table_with_missing_index__raises( ): create_table(df=frame, table_name="TestTable2", nullable_columns=True) - def test__append_data__works( + def test__append_pandas_df_to_table__succeeds( self, client: DataFrameClient, sample_dataframe, create_table ): @@ -116,16 +117,16 @@ def test__append_data__works( assert response.total_row_count == 3 def test__append_pandas_df_to_table__raises( - client: DataFrameClient, sample_dataframe, create_table + self, client: DataFrameClient, sample_dataframe, create_table ): id = create_table( df=sample_dataframe, table_name="TestTable3", nullable_columns=True ) with pytest.raises(ApiException, match="400 Bad Request"): - append_pandas_df_to_table(client, table_id=id, df=sample_dataframe) + append_pandas_df_to_table(client=client, table_id=id, df=sample_dataframe) - def test__query_table_data__sorts( + def test__query_table_data_as_pandas_df__sorted_query_succeeds( self, client: DataFrameClient, sample_dataframe, create_table ): table_name = "TestTable4" @@ -160,7 +161,9 @@ def test__query_table_data__sorts( assert response == expected_df - def test__query_decimated_data__works(self, client: DataFrameClient, create_table): + def test__query_decimated_table_data_as_pandas_df__succeeds( + self, client: DataFrameClient, create_table + ): table_name = "TestTable5" nullable_columns = True @@ -187,7 +190,9 @@ def test__query_decimated_data__works(self, client: DataFrameClient, create_tabl ), index=True, ) - expected_df = pd.DataFrame(data=[['1','2','3'],['7','8','9']], columns=["a","b","c"]) + expected_df = pd.DataFrame( + data=[["1", "2", "3"], ["7", "8", "9"]], columns=["a", "b", "c"] + ) expected_df.set_index("a", inplace=True) assert response.values == expected_df.values From 204ea7d6eab8a1029d711a09e22861e2102668b1 Mon Sep 17 00:00:00 2001 From: Ancy Augustin Date: Tue, 26 Nov 2024 21:59:46 +0530 Subject: [PATCH 11/12] fix: review comments --- .../dataframe/pandas_dataframe_operations.py | 35 +++++++-- .../clients/dataframe/utilities/__init__.py | 1 + .../utilities/_pandas_dataframe_operations.py | 16 ++-- .../dataframe/test_pandas_utility.py | 76 +++++++++++-------- 4 files changed, 79 insertions(+), 49 deletions(-) diff --git a/examples/dataframe/pandas_dataframe_operations.py b/examples/dataframe/pandas_dataframe_operations.py index 3245b4f4..2b16b809 100644 --- a/examples/dataframe/pandas_dataframe_operations.py +++ b/examples/dataframe/pandas_dataframe_operations.py @@ -1,6 +1,7 @@ import pandas as pd from nisystemlink.clients.dataframe import DataFrameClient from nisystemlink.clients.dataframe.models import ( + ColumnOrderBy, DecimationMethod, DecimationOptions, QueryDecimatedDataRequest, @@ -9,6 +10,7 @@ from nisystemlink.clients.dataframe.utilities import ( append_pandas_df_to_table, create_table_from_pandas_df, + create_table_with_data_from_pandas_df, InvalidColumnTypeError, InvalidIndexError, query_decimated_table_data_as_pandas_df, @@ -22,18 +24,16 @@ df.set_index("a", inplace=True) print(df) -print(client.list_tables()) -client.list_tables() try: table_id = create_table_from_pandas_df( client, df, "Example Table", nullable_columns=False ) - print(f"Table created with ID: {table_id}") + print(f"\nTable created with ID: {table_id}") except (InvalidColumnTypeError, InvalidIndexError) as e: print(f"Error creating table: {e}") append_pandas_df_to_table(client, table_id, df) -print("Data appended to the table.") +print("\nData appended to the table.") request = QueryDecimatedDataRequest( decimation=DecimationOptions( @@ -47,14 +47,33 @@ queried_decimated_df = query_decimated_table_data_as_pandas_df( client, table_id, query=request, index=True ) -print("Queried decimated data as pandas dataframe:") +print("\nQueried decimated data as pandas dataframe:") print(queried_decimated_df.columns) - -query = QueryTableDataRequest() +query = QueryTableDataRequest( + columns=["b", "c"], order_by=[ColumnOrderBy(column="b", descending=True)] +) queried_df = query_table_data_as_pandas_df( client=client, table_id=table_id, query=query, index=True ) -print("Queried table data as pandas dataframe:") +print("\nQueried table data as pandas dataframe:") print(queried_df) client.delete_table(table_id) +print(f"\nTable {table_id} deleted successfully.") + +try: + table_id = create_table_with_data_from_pandas_df(client, df, "Example Table") + print(f"\nTable created with ID: {table_id}") +except (InvalidColumnTypeError, InvalidIndexError) as e: + print(f"Error creating table: {e}") + +query = QueryTableDataRequest() +table_data = query_table_data_as_pandas_df( + client=client, table_id=table_id, query=query, index=True +) + +print("\nTable data as pandas DataFrame:") +print(table_data) + +client.delete_table(table_id) +print(f"\nTable {table_id} deleted successfully.\n") diff --git a/nisystemlink/clients/dataframe/utilities/__init__.py b/nisystemlink/clients/dataframe/utilities/__init__.py index bcf41885..367e7a04 100644 --- a/nisystemlink/clients/dataframe/utilities/__init__.py +++ b/nisystemlink/clients/dataframe/utilities/__init__.py @@ -1,6 +1,7 @@ from ._pandas_exception import InvalidColumnTypeError, InvalidIndexError from ._pandas_dataframe_operations import ( create_table_from_pandas_df, + create_table_with_data_from_pandas_df, append_pandas_df_to_table, query_decimated_table_data_as_pandas_df, query_table_data_as_pandas_df, diff --git a/nisystemlink/clients/dataframe/utilities/_pandas_dataframe_operations.py b/nisystemlink/clients/dataframe/utilities/_pandas_dataframe_operations.py index 26797621..d3e93787 100644 --- a/nisystemlink/clients/dataframe/utilities/_pandas_dataframe_operations.py +++ b/nisystemlink/clients/dataframe/utilities/_pandas_dataframe_operations.py @@ -47,7 +47,7 @@ def append_pandas_df_to_table( client: DataFrameClient, table_id: str, df: pd.DataFrame, - end_of_data: Optional[bool] = None, + end_of_data: Optional[bool] = False, ) -> None: """Append `df` to table. @@ -70,34 +70,28 @@ def create_table_with_data_from_pandas_df( client: DataFrameClient, df: pd.DataFrame, table_name: str, - nullable_columns: bool, batch_size: int = 1000, - end_of_data: Optional[bool] = None, ) -> str: - """Create a table and upload data from a pandas DataFrame. - - This function creates the table, uploads the data (with batching for large data), - and closes the upload process in one seamless call. + """Create a table and upload data from a pandas DataFrame with batching. Args: client (DataFrameClient): Instance of DataFrameClient. df (pd.DataFrame): Pandas DataFrame with data to upload. table_name (str): Name of the table to create. - nullable_columns (bool): Make the columns nullable. Nullable columns can contain `null` values. batch_size (Optional[int]): Number of rows to batch in each upload. Default is 1000. Returns: str: ID of the created table. """ table_id = create_table_from_pandas_df( - client=client, df=df, table_name=table_name, nullable_columns=nullable_columns + client=client, df=df, table_name=table_name, nullable_columns=False ) - num_rows = len(df) + num_rows = df.shape[0] for start_row in range(0, num_rows, batch_size): end_row = min(start_row + batch_size, num_rows) batch_df = df.iloc[start_row:end_row] - append_pandas_df_to_table(client, table_id, batch_df, end_of_data) + append_pandas_df_to_table(client, table_id, batch_df, end_of_data=(end_row == num_rows)) return table_id diff --git a/tests/integration/dataframe/test_pandas_utility.py b/tests/integration/dataframe/test_pandas_utility.py index 2947a98a..06cb0c1e 100644 --- a/tests/integration/dataframe/test_pandas_utility.py +++ b/tests/integration/dataframe/test_pandas_utility.py @@ -91,11 +91,13 @@ def test__create_table_from_pandas_df__succeeds( def test__create_table_from_pandas_df__missing_index_raises( self, client: DataFrameClient, create_table ): - - frame = pd.DataFrame( - columns=["index", "value", "ignore_me"], - data=[["1", "3.3", "True"], ["2", "6", "False"], ["3", "1.1", "True"]], - ) + columns = ["index", "value", "ignore_me"] + data = [ + ["1", "3.3", "True"], + ["2", "6", "False"], + ["3", "1.1", "True"], + ] + frame = pd.DataFrame(columns=columns, data=data) with pytest.raises( InvalidIndexError, match="Data frame must contain one index." @@ -123,8 +125,13 @@ def test__append_pandas_df_to_table__raises( df=sample_dataframe, table_name="TestTable3", nullable_columns=True ) + frame = pd.DataFrame( + columns=["index", "non_existent_column"], + data=[["1", "2"], ["2", "2"], ["3", "3"]], + ) + with pytest.raises(ApiException, match="400 Bad Request"): - append_pandas_df_to_table(client=client, table_id=id, df=sample_dataframe) + append_pandas_df_to_table(client=client, table_id=id, df=frame) def test__query_table_data_as_pandas_df__sorted_query_succeeds( self, client: DataFrameClient, sample_dataframe, create_table @@ -134,32 +141,33 @@ def test__query_table_data_as_pandas_df__sorted_query_succeeds( id = create_table( sample_dataframe, table_name, nullable_columns=nullable_columns ) - - frame = pd.DataFrame( - data=[[1, "2.5", "True"], [2, "1.5", "False"], [3, "2.5", "True"]], - columns=["index", "value", "ignore_me"], - ) - - append_pandas_df_to_table(client, table_id=id, df=frame) + append_pandas_df_to_table(client, table_id=id, df=sample_dataframe) + client.get_table_metadata(id=id) response = query_table_data_as_pandas_df( client, table_id=id, query=QueryTableDataRequest( - order_by=[ - ColumnOrderBy(column="value", descending=True), - ColumnOrderBy(column="ignore_me"), - ] + columns=["value", "ignore_me"], + order_by=[ColumnOrderBy(column="value", descending=True)], ), index=True, ) - expected_df = pd.DataFrame( - data=[[2, "1.5", "False"], [3, "2.5", "True"], [1, "2.5", "True"]], - columns=["index", "value", "ignore_me"], - ) + + data = [ + [2, "6", "False"], + [1, "3.3", "True"], + [3, "1.1", "True"], + ] + columns = ["index", "value", "ignore_me"] + expected_df = pd.DataFrame(columns=columns, data=data) expected_df.set_index("index", inplace=True) - assert response == expected_df + assert ( + (response.reset_index(drop=True) == expected_df.reset_index(drop=True)) + .all() + .all() + ) def test__query_decimated_table_data_as_pandas_df__succeeds( self, client: DataFrameClient, create_table @@ -167,17 +175,22 @@ def test__query_decimated_table_data_as_pandas_df__succeeds( table_name = "TestTable5" nullable_columns = True - frame: pd.DataFrame = pd.DataFrame( - data=[[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["a", "b", "c"] - ) + data = [ + [1, 2, 3], + [4, 5, 6], + [7, 8, 9], + ] + columns = ["a", "b", "c"] + frame = pd.DataFrame(data=data, columns=columns) frame.set_index("a", inplace=True) + id = create_table( df=frame, table_name=table_name, nullable_columns=nullable_columns ) append_pandas_df_to_table(client, table_id=id, df=frame) - response = query_decimated_table_data_as_pandas_df( + response: pd.DataFrame = query_decimated_table_data_as_pandas_df( client, table_id=id, query=QueryDecimatedDataRequest( @@ -190,9 +203,12 @@ def test__query_decimated_table_data_as_pandas_df__succeeds( ), index=True, ) - expected_df = pd.DataFrame( - data=[["1", "2", "3"], ["7", "8", "9"]], columns=["a", "b", "c"] - ) + data = [ + ["1", "2", "3"], + ["7", "8", "9"], + ] + columns = ["a", "b", "c"] + expected_df = pd.DataFrame(data=data, columns=columns) expected_df.set_index("a", inplace=True) - assert response.values == expected_df.values + assert (response.values == expected_df.values).all() From 2706f58b94495b2c0a9fe096185f11819ea5ff3a Mon Sep 17 00:00:00 2001 From: Ancy Augustin Date: Tue, 26 Nov 2024 22:13:06 +0530 Subject: [PATCH 12/12] fix: lint --- .../utilities/_pandas_dataframe_operations.py | 4 +++- tests/integration/dataframe/test_pandas_utility.py | 14 ++++++++------ 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/nisystemlink/clients/dataframe/utilities/_pandas_dataframe_operations.py b/nisystemlink/clients/dataframe/utilities/_pandas_dataframe_operations.py index d3e93787..69e09b6e 100644 --- a/nisystemlink/clients/dataframe/utilities/_pandas_dataframe_operations.py +++ b/nisystemlink/clients/dataframe/utilities/_pandas_dataframe_operations.py @@ -91,7 +91,9 @@ def create_table_with_data_from_pandas_df( for start_row in range(0, num_rows, batch_size): end_row = min(start_row + batch_size, num_rows) batch_df = df.iloc[start_row:end_row] - append_pandas_df_to_table(client, table_id, batch_df, end_of_data=(end_row == num_rows)) + append_pandas_df_to_table( + client, table_id, batch_df, end_of_data=(end_row == num_rows) + ) return table_id diff --git a/tests/integration/dataframe/test_pandas_utility.py b/tests/integration/dataframe/test_pandas_utility.py index 06cb0c1e..48b8aff9 100644 --- a/tests/integration/dataframe/test_pandas_utility.py +++ b/tests/integration/dataframe/test_pandas_utility.py @@ -203,12 +203,14 @@ def test__query_decimated_table_data_as_pandas_df__succeeds( ), index=True, ) - data = [ - ["1", "2", "3"], - ["7", "8", "9"], - ] - columns = ["a", "b", "c"] - expected_df = pd.DataFrame(data=data, columns=columns) + + expected_df = pd.DataFrame( + data=[ + ["1", "2", "3"], + ["7", "8", "9"], + ], + columns=["a", "b", "c"], + ) expected_df.set_index("a", inplace=True) assert (response.values == expected_df.values).all()