From cbfbe3fdcac00d9a432aff73bea72383e66824e6 Mon Sep 17 00:00:00 2001 From: Andrew Atamanyuk <ataman.andrew87@gmail.com> Date: Fri, 28 Feb 2020 14:58:39 +0300 Subject: [PATCH] numeric feature processing --- tensorflow_data_validation/api/validation_api.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tensorflow_data_validation/api/validation_api.py b/tensorflow_data_validation/api/validation_api.py index 4d4df08a..d7855b6c 100644 --- a/tensorflow_data_validation/api/validation_api.py +++ b/tensorflow_data_validation/api/validation_api.py @@ -22,6 +22,7 @@ import logging import apache_beam as beam import pyarrow as pa +import pandas as pd import tensorflow as tf from tensorflow_data_validation import constants from tensorflow_data_validation import types @@ -52,6 +53,17 @@ anomalies_pb2.AnomalyInfo.NO_DATA_IN_SPAN, ]) +def preprocess_numerical_to_categorical_by_own_quantiles( + dataframe: pd.DataFrame, +): + # TODO: refactor implementation from private project + return dataframe + +def preprocess_numerical_to_categorical_by_training_quantiles( + dataframe: pd.DataFrame, +): + # TODO: refactor implementation from private project + return dataframe def infer_schema( statistics: statistics_pb2.DatasetFeatureStatisticsList,