Skip to content

Commit

Permalink
style: adjust gwalker_props.py code style
Browse files Browse the repository at this point in the history
  • Loading branch information
longxiaofei committed Jun 15, 2023
1 parent a3d1adb commit f4a3eeb
Show file tree
Hide file tree
Showing 3 changed files with 93 additions and 79 deletions.
4 changes: 2 additions & 2 deletions pygwalker/gwalker.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def walk(df: "pl.DataFrame | pd.DataFrame", gid: tp.Union[int, str] = None, *,
import time
import json
from .utils.render import DataFrameEncoder
from .utils.gwalker_props import getPropGetter
from .utils.gwalker_props import get_prop_getter

from .base import __hash__, rand_str
def rand_slot_id():
Expand Down Expand Up @@ -161,7 +161,7 @@ def send_msg(msg):
# static output is truncated.
time.sleep(0.1)
chunk = 1 << 14
prop_getter = getPropGetter(df)
prop_getter = get_prop_getter(df)
df = prop_getter.escape_fname(df, env=env, fieldSpecs=fieldSpecs, **kwargs)
records = prop_getter.to_records(df)
# matrix = prop_getter.to_matrix(df)
Expand Down
166 changes: 90 additions & 76 deletions pygwalker/utils/gwalker_props.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
import importlib
from typing import NamedTuple, NewType, TYPE_CHECKING, TypeVar, Generic, Dict, List, Any, Optional
from typing_extensions import Literal
from types import ModuleType
import sys
import json

from pygwalker.base import tp
from ..base import *
from .fname_encodings import fname_decode, fname_encode
from pygwalker.base import BYTE_LIMIT

from typing import TYPE_CHECKING

class FieldSpec(tp.NamedTuple):
class FieldSpec(NamedTuple):
"""Field specification.
Args:
Expand All @@ -17,93 +19,100 @@ class FieldSpec(tp.NamedTuple):
semanticType: Literal['?', 'nominal', 'ordinal', 'temporal', 'quantitative'] = '?'
analyticType: Literal['?', 'dimension', 'measure'] = '?'
display_as: str = None
default_field_spec = FieldSpec()
tp.FieldSpec = tp.NewType('FieldSpec', FieldSpec)


default_field_spec = FieldSpec()

dataframe_types = []
if TYPE_CHECKING:
try:
import pandas as pd
except:
dataframe_types.append(pd.DataFrame)
except ModuleNotFoundError:
pass
try:
import polars as pl
except:
dataframe_types.append(pl.DataFrame)
except ModuleNotFoundError:
pass
dataframe_types = [pd.DataFrame, pl.DataFrame]
else:
dataframe_types = []


DataFrame = tp.TypeVar("DataFrame", *dataframe_types)
DataFrame = TypeVar("DataFrame", *dataframe_types)
"""
DataFrame can be either pandas.DataFrame or polars.DataFrame
"""
class DataFramePropGetter(tp.Generic[DataFrame]):
Series = tp.TypeVar("Series")


class DataFramePropGetter(Generic[DataFrame]):
"""DataFrame property getter"""
Series = TypeVar("Series")

@classmethod
def infer_semantic(cls, df: DataFrame, **kwargs):
raise NotImplementedError
pass

@classmethod
def infer_analytic(cls, df: DataFrame, **kwargs):
raise NotImplementedError
pass

@classmethod
def to_records(cls, df: DataFrame, **kwargs) -> tp.List[tp.Dict[str, tp.Any]]:
def to_records(cls, df: DataFrame, **kwargs) -> List[Dict[str, Any]]:
"""Convert DataFrame to a list of records"""
raise NotImplementedError
pass


@classmethod
def to_matrix(cls, df: DataFrame, **kwargs) -> tp.List[tp.Dict[str, tp.Any]]:
def to_matrix(cls, df: DataFrame, **kwargs) -> List[Dict[str, Any]]:
raise NotImplementedError

@classmethod
def escape_fname(cls, df: DataFrame, **kwargs) -> DataFrame:
"""Encode fname to prefent special characters in field name to cause errors"""
raise NotImplementedError
pass


@classmethod
def series(cls, df: DataFrame, i: int, col: str) -> Series:
return df[col]

@classmethod
def infer_prop(cls, df: DataFrame, col: str, i=None, fieldSpecs: tp.Dict[str, tp.FieldSpec]={}) -> tp.Dict:
def infer_prop(
cls, df: DataFrame, col: str, i=None, field_specs: Optional[Dict[str, FieldSpec]] = None
) -> Dict[str, str]:
"""get IMutField
Returns:
(IMutField, tp.Dict)
(IMutField, Dict)
"""
if field_specs is None:
field_specs = {}

s: cls.Series = cls.series(df, i, col)
orig_fname = cls.decode_fname(s)
fieldSpec = fieldSpecs.get(orig_fname, default_field_spec)
semanticType = cls.infer_semantic(s) if fieldSpec.semanticType == '?' else fieldSpec.semanticType
field_spec = field_specs.get(orig_fname, default_field_spec)
semantic_type = cls.infer_semantic(s) if field_spec.semanticType == '?' else field_spec.semanticType
# 'quantitative' | 'nominal' | 'ordinal' | 'temporal';
analyticType = cls.infer_analytic(s) if fieldSpec.analyticType == '?' else fieldSpec.analyticType
analytic_type = cls.infer_analytic(s) if field_spec.analyticType == '?' else field_spec.analyticType
# 'measure' | 'dimension';
fname = orig_fname if fieldSpec.display_as is None else fieldSpec.display_as
fname = orig_fname if field_spec.display_as is None else field_spec.display_as
return {
'fid': col,
'name': fname,
'semanticType': semanticType,
'analyticType': analyticType,
'semanticType': semantic_type,
'analyticType': analytic_type,
}

@classmethod
def raw_fields(cls, df: DataFrame, **kwargs):
fieldSpecs = kwargs.get('fieldSpecs', {})
field_specs = kwargs.get('fieldSpecs', {})
return [
cls.infer_prop(df, col, i, fieldSpecs)
cls.infer_prop(df, col, i, field_specs)
for i, col in enumerate(df.columns)
]

@classmethod
def limited_sample(cls, df: DataFrame) -> DataFrame:
"""Return the max sample that can be sent to GraphicWalker"""
raise NotImplementedError
pass


@classmethod
def get_props(cls, df: DataFrame, **kwargs):
"""Remove data volume restrictions for non-JUPyter environments.
Expand All @@ -123,29 +132,34 @@ def get_props(cls, df: DataFrame, **kwargs):
**kwargs,
}
return props

@classmethod
def decode_fname(cls, s: Series, **kwargs) -> str:
"""Get safe field name from series."""
raise NotImplementedError

class PandasDataFramePropGetter(DataFramePropGetter[DataFrame]): pass
class PolarsDataFramePropGetter(DataFramePropGetter[DataFrame]): pass

class PandasDataFramePropGetter(DataFramePropGetter[DataFrame]):
pass


class PolarsDataFramePropGetter(DataFramePropGetter[DataFrame]):
pass


__classname2method = {}
__supported_modules = ['pandas', 'polars']

import sys

def buildPandasPropGetter():
import pandas as pd
def _build_pandas_prop_getter(pd: ModuleType):

class PandasDataFramePropGetter(DataFramePropGetter[pd.DataFrame]):
@classmethod
def limited_sample(cls, df: DataFrame) -> DataFrame:
if len(df)*2 > BYTE_LIMIT:
df = df.iloc[:BYTE_LIMIT//2]
return df

@classmethod
def infer_semantic(cls, s: pd.Series):
v_cnt = len(s.value_counts())
Expand All @@ -154,112 +168,112 @@ def infer_semantic(cls, s: pd.Series):
'temporal' if kind in 'M' else \
'nominal' if kind in 'bOSUV' or v_cnt <= 2 else \
'ordinal'

@classmethod
def infer_analytic(cls, s: pd.Series):
kind = s.dtype.kind
return 'measure' if \
kind in 'fcm' or (kind in 'iu' and len(s.value_counts()) > 16) \
else 'dimension'

@classmethod
def series(cls, df: pd.DataFrame, i: int, col: str):
return df.iloc[:,i]
return df.iloc[:, i]

@classmethod
def to_records(cls, df: pd.DataFrame):
df = df.replace({float('nan'): None})
return df.to_dict(orient='records')

@classmethod
def to_matrix(cls, df: pd.DataFrame, **kwargs) -> tp.List[tp.List[tp.Any]]:
def to_matrix(cls, df: pd.DataFrame, **kwargs) -> List[List[Any]]:
df = df.replace({float('nan'): None})
return df.to_dict(orient='tight')

@classmethod
def escape_fname(cls, df: pd.DataFrame, **kwargs):
df = df.reset_index()
df.columns = [f"{col}_{i}" for i, col in enumerate(df.columns)]
df = df.rename(fname_encode, axis='columns')
return df

@classmethod
def decode_fname(cls, s: pd.Series, **kwargs):
fname = fname_decode(s.name)
fname = json.dumps(fname, ensure_ascii=False)[1:-1]
return fname

return PandasDataFramePropGetter

def buildPolarsPropGetter():
import polars as pl

def _build_polars_prop_getter(pl: ModuleType):
class PolarsDataFramePropGetter(DataFramePropGetter[pl.DataFrame]):
Series = pl.Series
@classmethod
def limited_sample(cls, df: DataFrame) -> DataFrame:
if len(df)*2 > BYTE_LIMIT:
df = df.head(BYTE_LIMIT//2)
return df

@classmethod
def infer_semantic(cls, s: pl.Series):
v_cnt = len(s.value_counts())
kind = s.dtype
return 'quantitative' if kind in pl.NUMERIC_DTYPES and v_cnt > 16 else \
'temporal' if kind in pl.TEMPORAL_DTYPES else \
'nominal' if kind in [pl.Boolean, pl.Object,pl.Utf8,pl.Categorical,pl.Struct,pl.List] or v_cnt <= 2 else \
'nominal' if kind in [pl.Boolean, pl.Object, pl.Utf8, pl.Categorical, pl.Struct, pl.List] or v_cnt <= 2 else \
'ordinal'

@classmethod
def infer_analytic(cls, s: pl.Series):
kind = s.dtype
return 'measure' if kind in pl.FLOAT_DTYPES | pl.DURATION_DTYPES or \
(kind in pl.INTEGER_DTYPES and len(s.value_counts()) > 16) else \
(kind in pl.INTEGER_DTYPES and len(s.value_counts()) > 16) else \
'dimension'

@classmethod
def to_records(cls, df: pl.DataFrame, **kwargs) -> tp.List[tp.Dict[str, tp.Any]]:
def to_records(cls, df: pl.DataFrame, **kwargs) -> List[Dict[str, Any]]:
df = df.fill_nan(None)
return df.to_dicts()

@classmethod
def to_matrix(cls, df: pl.DataFrame, **kwargs) -> tp.List[tp.Dict[str, tp.Any]]:
def to_matrix(cls, df: pl.DataFrame, **kwargs) -> List[Dict[str, Any]]:
df = df.fill_nan(None)
dicts = df.to_dicts()
return {'columns': list(dicts[0].keys()), 'data': [list(d.values()) for d in dicts]}

@classmethod
def escape_fname(cls, df: pl.DataFrame, **kwargs) -> pl.DataFrame:
df = df.rename({i : fname_encode(i) for i in df.columns})
df = df.rename({i: fname_encode(i) for i in df.columns})
return df

@classmethod
def decode_fname(cls, s: pl.Series, **kwargs):
import json
fname = fname_decode(s.name)
fname = json.dumps(fname, ensure_ascii=False)[1:-1]
return fname

return PolarsDataFramePropGetter

def getPropGetter(df: DataFrame) -> DataFramePropGetter:

def get_prop_getter(df: DataFrame) -> DataFramePropGetter:
if type(df) in __classname2method:
return __classname2method[type(df)]

if 'pandas' in sys.modules:
import pandas as pd
if isinstance(df, pd.DataFrame):
__classname2method[pd.DataFrame] = buildPandasPropGetter()
__classname2method[pd.DataFrame] = _build_pandas_prop_getter(pd)
return __classname2method[pd.DataFrame]

if 'polars' in sys.modules:
import polars as pl
if isinstance(df, pl.DataFrame):
__classname2method[pl.DataFrame] = buildPolarsPropGetter()
__classname2method[pl.DataFrame] = _build_polars_prop_getter(pl)
return __classname2method[pl.DataFrame]
return DataFramePropGetter


def get_props(df: DataFrame , **kwargs):
props = getPropGetter(df).get_props(df,**kwargs)
def get_props(df: DataFrame, **kwargs):
props = get_prop_getter(df).get_props(df, **kwargs)
return props
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ dependencies = [
"jinja2",
"ipython",
"astor",
"typing_extensions; python_version <= '3.7'",
"typing_extensions",
"aiohttp>=3.0.0; platform_machine != 'wasm32'",
]
[project.urls]
Expand Down

0 comments on commit f4a3eeb

Please sign in to comment.