|
1 | 1 | from __future__ import annotations
|
2 | 2 |
|
3 | 3 | import json
|
4 |
| -import pandas as pd |
5 |
| -import numpy as np |
6 | 4 | from typing import Any as TypingAny
|
7 | 5 |
|
8 | 6 | import pandas as pd
|
|
21 | 19 | StringField,
|
22 | 20 | )
|
23 | 21 | from palimpzest.utils.hash_helpers import hash_for_temp_schema
|
24 |
| -from palimpzest.constants import DERIVED_SCHEMA_PREFIX, FROM_DF_PREFIX |
| 22 | + |
25 | 23 |
|
26 | 24 | class SchemaMetaclass(type):
|
27 | 25 | """
|
@@ -240,7 +238,7 @@ def project(cls, project_cols: list[str]) -> Schema:
|
240 | 238 | return type(new_schema_name, (Schema,), attributes)
|
241 | 239 |
|
242 | 240 | @staticmethod
|
243 |
| - def from_df(df: pd.DataFrame) -> "Schema": |
| 241 | + def from_df(df: pd.DataFrame) -> Schema: |
244 | 242 | # Create a unique schema name based on columns
|
245 | 243 | schema_name = f"{DERIVED_SCHEMA_PREFIX}{hash_for_temp_schema(str(tuple(sorted(df.columns))))}"
|
246 | 244 |
|
@@ -278,39 +276,6 @@ def class_name(cls) -> str:
|
278 | 276 | """Return the name of this class"""
|
279 | 277 | return cls.__name__
|
280 | 278 |
|
281 |
| - @staticmethod |
282 |
| - def from_df(df: pd.DataFrame) -> Schema: |
283 |
| - # Create a unique schema name based on columns |
284 |
| - schema_name = f"{DERIVED_SCHEMA_PREFIX}{hash_for_temp_schema(str(tuple(sorted(df.columns))))}" |
285 |
| - |
286 |
| - # consider to save to temp file and load from there |
287 |
| - if schema_name in globals(): |
288 |
| - return globals()[schema_name] |
289 |
| - |
290 |
| - # NOTE: we will not be able to infer more complicated types like ImageFilepathField |
291 |
| - # without some input from the user |
292 |
| - # construct attributes for schema (i.e. its fields and metadata) |
293 |
| - desc = "Schema derived from DataFrame" |
294 |
| - attributes = {"_desc": desc, "__doc__": desc, "__module__": Schema.__module__} |
295 |
| - for col, dtype in zip(df.columns, df.dtypes): |
296 |
| - if dtype == "object": |
297 |
| - attributes[col] = StringField(desc=col) |
298 |
| - elif dtype == "bool": |
299 |
| - attributes[col] = BooleanField(desc=col) |
300 |
| - elif dtype == "int64": |
301 |
| - attributes[col] = IntField(desc=col) |
302 |
| - elif dtype == "float64": |
303 |
| - attributes[col] = FloatField(desc=col) |
304 |
| - else: |
305 |
| - attributes[col] = Field(desc=col) |
306 |
| - |
307 |
| - # Create new schema only if it doesn't exist |
308 |
| - new_schema = type(schema_name, (Schema,), attributes) |
309 |
| - |
310 |
| - # Store the schema class globally |
311 |
| - globals()[schema_name] = new_schema |
312 |
| - return new_schema |
313 |
| - |
314 | 279 | ###################################################################################
|
315 | 280 | # "Core" useful Schemas. These are Schemas that almost everyone will need.
|
316 | 281 | # File, TextFile, Image, PDF, etc.
|
|
0 commit comments