databrickslabs
diff --git a/‎dbldatagen/__init__.py
+3-1 b/‎dbldatagen/__init__.py
+3-1
diff --git a/‎dbldatagen/column_generation_spec.py
+19-3 b/‎dbldatagen/column_generation_spec.py
+19-3
diff --git a/‎dbldatagen/constraints/chained_relation.py
+13 b/‎dbldatagen/constraints/chained_relation.py
+13
diff --git a/‎dbldatagen/constraints/constraint.py
+2-1 b/‎dbldatagen/constraints/constraint.py
+2-1
diff --git a/‎dbldatagen/constraints/literal_range_constraint.py
+19 b/‎dbldatagen/constraints/literal_range_constraint.py
+19
diff --git a/‎dbldatagen/constraints/literal_relation_constraint.py
+18 b/‎dbldatagen/constraints/literal_relation_constraint.py
+18
diff --git a/‎dbldatagen/constraints/negative_values.py
+13 b/‎dbldatagen/constraints/negative_values.py
+13
diff --git a/‎dbldatagen/constraints/positive_values.py
+13 b/‎dbldatagen/constraints/positive_values.py
+13
diff --git a/‎dbldatagen/constraints/ranged_values_constraint.py
+19 b/‎dbldatagen/constraints/ranged_values_constraint.py
+19
diff --git a/‎dbldatagen/constraints/sql_expr.py
+13 b/‎dbldatagen/constraints/sql_expr.py
+13
diff --git a/‎dbldatagen/constraints/unique_combinations.py
+13 b/‎dbldatagen/constraints/unique_combinations.py
+13
@@ -34,6 +34,8 @@
 from ._version import __version__
 from .column_generation_spec import ColumnGenerationSpec
 from .column_spec_options import ColumnSpecOptions
+from .constraints import Constraint, ChainedRelation, LiteralRange, LiteralRelation, NegativeValues, PositiveValues, \
+    RangedValues, SqlExpr, UniqueCombinations
 from .data_analyzer import DataAnalyzer
 from .schema_parser import SchemaParser
 from .daterange import DateRange
@@ -49,7 +51,7 @@
 __all__ = ["data_generator", "data_analyzer", "schema_parser", "daterange", "nrange",
            "column_generation_spec", "utils", "function_builder",
            "spark_singleton", "text_generators", "datarange", "datagen_constants",
-           "text_generator_plugins", "html_utils", "datasets_object"
+           "text_generator_plugins", "html_utils", "datasets_object", "constraints"
            ]
 
 
 
@@ -25,6 +25,7 @@
 from .daterange import DateRange
 from .distributions import Normal, DataDistribution
 from .nrange import NRange
+from .serialization import SerializableToDict
 from .text_generators import TemplateGenerator
 from .utils import ensure, coalesce_values
 from .schema_parser import SchemaParser
@@ -40,7 +41,7 @@
                                RAW_VALUES_COMPUTE_METHOD]
 
 
-class ColumnGenerationSpec(object):
+class ColumnGenerationSpec(SerializableToDict):
     """ Column generation spec object - specifies how column is to be generated
 
     Each column to be output will have a corresponding ColumnGenerationSpec object.
@@ -119,7 +120,7 @@ def __init__(self, name, colType=None, minValue=0, maxValue=None, step=1, prefix
             if EXPR_OPTION not in kwargs:
                 raise ValueError("Column generation spec must have `expr` attribute specified if datatype is inferred")
 
-        elif type(colType) == str:
+        elif isinstance(colType, str):
             colType = SchemaParser.columnTypeFromString(colType)
 
         assert isinstance(colType, DataType), f"colType `{colType}` is not instance of DataType"
@@ -299,6 +300,21 @@ def __init__(self, name, colType=None, minValue=0, maxValue=None, step=1, prefix
         # set up the temporary columns needed for data generation
         self._setupTemporaryColumns()
 
+    def _toInitializationDict(self):
+        """ Converts an object to a Python dictionary. Keys represent the object's
+            constructor arguments.
+            :return: Python dictionary representation of the object
+        """
+        _options = self._csOptions.options.copy()
+        _options["colName"] = _options.pop("name", self.name)
+        _options["colType"] = _options.pop("type", self.datatype).simpleString()
+        _options["kind"] = self.__class__.__name__
+        return {
+            k: v._toInitializationDict()
+            if isinstance(v, SerializableToDict) else v
+            for k, v in _options.items() if v is not None
+        }
+
     def _temporaryRename(self, tmpName):
         """ Create enter / exit object to support temporary renaming of column spec
 
@@ -451,7 +467,7 @@ def setBaseColumnDatatypes(self, columnDatatypes):
         assert type(columnDatatypes) is list, " `column_datatypes` parameter must be list"
         ensure(len(columnDatatypes) == len(self.baseColumns),
                "number of base column datatypes must match number of  base columns")
-        self._baseColumnDatatypes = [].append(columnDatatypes)
+        self._baseColumnDatatypes = columnDatatypes.copy()
 
     def _setupTemporaryColumns(self):
         """ Set up any temporary columns needed for test data generation.
 
@@ -8,6 +8,7 @@
 from pyspark.sql import DataFrame
 import pyspark.sql.functions as F
 from .constraint import Constraint, NoPrepareTransformMixin
+from ..serialization import SerializableToDict
 
 
 class ChainedRelation(NoPrepareTransformMixin, Constraint):
@@ -38,6 +39,18 @@ def __init__(self, columns, relation):
         if not isinstance(self._columns, list) or len(self._columns) <= 1:
             raise ValueError("ChainedRelation constraints must be defined across more than one column")
 
+    def _toInitializationDict(self):
+        """ Converts an object to a Python dictionary. Keys represent the object's
+            constructor arguments.
+            :return: Python dictionary representation of the object
+        """
+        _options = {"kind": self.__class__.__name__, "relation": self._relation, "columns": self._columns}
+        return {
+            k: v._toInitializationDict()
+            if isinstance(v, SerializableToDict) else v
+            for k, v in _options.items() if v is not None
+        }
+
     def _generateFilterExpression(self):
         """ Generated composite filter expression for chained set of filter expressions
 
 
@@ -8,9 +8,10 @@
 import types
 from abc import ABC, abstractmethod
 from pyspark.sql import Column
+from ..serialization import SerializableToDict
 
 
-class Constraint(ABC):
+class Constraint(SerializableToDict, ABC):
     """ Constraint object - base class for predefined and custom constraints
 
     This class is meant for internal use only.
 
@@ -8,6 +8,7 @@
 import pyspark.sql.functions as F
 
 from .constraint import Constraint, NoPrepareTransformMixin
+from ..serialization import SerializableToDict
 
 
 class LiteralRange(NoPrepareTransformMixin, Constraint):
@@ -29,6 +30,24 @@ def __init__(self, columns, lowValue, highValue, strict=False):
         self._highValue = highValue
         self._strict = strict
 
+    def _toInitializationDict(self):
+        """ Converts an object to a Python dictionary. Keys represent the object's
+            constructor arguments.
+            :return: Python dictionary representation of the object
+        """
+        _options = {
+            "kind": self.__class__.__name__,
+            "columns": self._columns,
+            "lowValue": self._lowValue,
+            "highValue": self._highValue,
+            "strict": self._strict
+        }
+        return {
+            k: v._toInitializationDict()
+            if isinstance(v, SerializableToDict) else v
+            for k, v in _options.items() if v is not None
+        }
+
     def _generateFilterExpression(self):
         """ Generate a SQL filter expression that may be used for filtering"""
         expressions = [F.col(colname) for colname in self._columns]
 
@@ -8,6 +8,7 @@
 import pyspark.sql.functions as F
 
 from .constraint import Constraint, NoPrepareTransformMixin
+from ..serialization import SerializableToDict
 
 
 class LiteralRelation(NoPrepareTransformMixin, Constraint):
@@ -29,6 +30,23 @@ def __init__(self, columns, relation, value):
         if relation not in self.SUPPORTED_OPERATORS:
             raise ValueError(f"Parameter `relation` should be one of the operators :{self.SUPPORTED_OPERATORS}")
 
+    def _toInitializationDict(self):
+        """ Converts an object to a Python dictionary. Keys represent the object's
+            constructor arguments.
+            :return: Python dictionary representation of the object
+        """
+        _options = {
+            "kind": self.__class__.__name__,
+            "columns": self._columns,
+            "relation": self._relation,
+            "value": self._value
+        }
+        return {
+            k: v._toInitializationDict()
+            if isinstance(v, SerializableToDict) else v
+            for k, v in _options.items() if v is not None
+        }
+
     def _generateFilterExpression(self):
         expressions = [F.col(colname) for colname in self._columns]
         literalValue = F.lit(self._value)
 
@@ -7,6 +7,7 @@
 """
 import pyspark.sql.functions as F
 from .constraint import Constraint, NoPrepareTransformMixin
+from ..serialization import SerializableToDict
 
 
 class NegativeValues(NoPrepareTransformMixin, Constraint):
@@ -27,6 +28,18 @@ def __init__(self, columns, strict=False):
         self._columns = self._columnsFromListOrString(columns)
         self._strict = strict
 
+    def _toInitializationDict(self):
+        """ Converts an object to a Python dictionary. Keys represent the object's
+            constructor arguments.
+            :return: Python dictionary representation of the object
+        """
+        _options = {"kind": self.__class__.__name__, "columns": self._columns, "strict": self._strict}
+        return {
+            k: v._toInitializationDict()
+            if isinstance(v, SerializableToDict) else v
+            for k, v in _options.items() if v is not None
+        }
+
     def _generateFilterExpression(self):
         expressions = [F.col(colname) for colname in self._columns]
         if self._strict:
 
@@ -7,6 +7,7 @@
 """
 import pyspark.sql.functions as F
 from .constraint import Constraint, NoPrepareTransformMixin
+from ..serialization import SerializableToDict
 
 
 class PositiveValues(NoPrepareTransformMixin, Constraint):
@@ -27,6 +28,18 @@ def __init__(self, columns, strict=False):
         self._columns = self._columnsFromListOrString(columns)
         self._strict = strict
 
+    def _toInitializationDict(self):
+        """ Converts an object to a Python dictionary. Keys represent the object's
+            constructor arguments.
+            :return: Python dictionary representation of the object
+        """
+        _options = {"kind": self.__class__.__name__, "columns": self._columns, "strict": self._strict}
+        return {
+            k: v._toInitializationDict()
+            if isinstance(v, SerializableToDict) else v
+            for k, v in _options.items() if v is not None
+        }
+
     def _generateFilterExpression(self):
         """ Generate a filter expression that may be used for filtering"""
         expressions = [F.col(colname) for colname in self._columns]
 
@@ -8,6 +8,7 @@
 import pyspark.sql.functions as F
 
 from .constraint import Constraint, NoPrepareTransformMixin
+from ..serialization import SerializableToDict
 
 
 class RangedValues(NoPrepareTransformMixin, Constraint):
@@ -28,6 +29,24 @@ def __init__(self, columns, lowValue, highValue, strict=False):
         self._highValue = highValue
         self._strict = strict
 
+    def _toInitializationDict(self):
+        """ Returns an internal mapping dictionary for the object. Keys represent the
+            class constructor arguments and values representing the object's internal data.
+            :return: Python dictionary mapping constructor options to the object properties
+        """
+        _options = {
+            "kind": self.__class__.__name__,
+            "columns": self._columns,
+            "lowValue": self._lowValue,
+            "highValue": self._highValue,
+            "strict": self._strict
+        }
+        return {
+            k: v._toInitializationDict()
+            if isinstance(v, SerializableToDict) else v
+            for k, v in _options.items() if v is not None
+        }
+
     def _generateFilterExpression(self):
         """ Generate a SQL filter expression that may be used for filtering"""
         expressions = [F.col(colname) for colname in self._columns]
 
@@ -8,6 +8,7 @@
 import pyspark.sql.functions as F
 
 from .constraint import Constraint, NoPrepareTransformMixin
+from ..serialization import SerializableToDict
 
 
 class SqlExpr(NoPrepareTransformMixin, Constraint):
@@ -25,6 +26,18 @@ def __init__(self, expr: str):
         assert isinstance(expr, str) and len(expr.strip()) > 0, "Expression must be a valid SQL string"
         self._expr = expr
 
+    def _toInitializationDict(self):
+        """ Converts an object to a Python dictionary. Keys represent the object's
+            constructor arguments.
+            :return: Python dictionary representation of the object
+        """
+        _options = {"kind": self.__class__.__name__, "expr": self._expr}
+        return {
+            k: v._toInitializationDict()
+            if isinstance(v, SerializableToDict) else v
+            for k, v in _options.items() if v is not None
+        }
+
     def _generateFilterExpression(self):
         """ Generate a SQL filter expression that may be used for filtering"""
         return F.expr(self._expr)
@@ -6,6 +6,7 @@
 This module defines the Positive class
 """
 from .constraint import Constraint, NoFilterMixin
+from ..serialization import SerializableToDict
 
 
 class UniqueCombinations(NoFilterMixin, Constraint):
@@ -45,6 +46,18 @@ def __init__(self, columns=None):
         else:
             self._columns = None
 
+    def _toInitializationDict(self):
+        """ Converts an object to a Python dictionary. Keys represent the object's
+            constructor arguments.
+            :return: Python dictionary representation of the object
+        """
+        _options = {"kind": self.__class__.__name__, "columns": self._columns}
+        return {
+            k: v._toInitializationDict()
+            if isinstance(v, SerializableToDict) else v
+            for k, v in _options.items() if v is not None
+        }
+
     def prepareDataGenerator(self, dataGenerator):
         """ Prepare the data generator to generate data that matches the constraint