@@ -16,32 +16,42 @@ object DataframeGenerator {
16
16
* @param minPartitions minimum number of partitions, defaults to 1.
17
17
* @return Arbitrary DataFrames generator of the required schema.
18
18
*/
19
- def arbitraryDataFrame (sqlContext : SQLContext , schema : StructType , minPartitions : Int = 1 ): Arbitrary [DataFrame ] = {
19
+ def arbitraryDataFrame (
20
+ sqlContext : SQLContext , schema : StructType , minPartitions : Int = 1 ):
21
+ Arbitrary [DataFrame ] = {
20
22
arbitraryDataFrameWithCustomFields(sqlContext, schema, minPartitions)()
21
23
}
22
24
23
25
/**
24
- * Creates a DataFrame Generator for the given Schema, and the given custom generators.
25
- * custom generators should be in the form of (column index, generator function).
26
+ * Creates a DataFrame Generator for the given Schema, and the given custom
27
+ * generators.
28
+ * Custom generators should be specified as a list of:
29
+ * (column index, generator function) tuples.
26
30
*
27
31
* Note: The given custom generators should match the required schema,
28
32
* for ex. you can't use Int generator for StringType.
29
33
*
30
- * Note 2: The ColumnGenerator* accepted as userGenerators has changed. ColumnGenerator is now the base class of the
31
- * accepted generators, users upgrading to 0.6 need to change their calls to use Column. Futher explanation can be
32
- * found in the release notes, and in the class descriptions at the bottom of this file.
34
+ * Note 2: The ColumnGenerator* accepted as userGenerators has changed.
35
+ * ColumnGenerator is now the base class of the
36
+ * accepted generators, users upgrading to 0.6 need to change their calls
37
+ * to use Column. Further explanation can be found in the release notes, and
38
+ * in the class descriptions at the bottom of this file.
33
39
*
34
40
* @param sqlContext SQL Context.
35
41
* @param schema The required Schema.
36
42
* @param minPartitions minimum number of partitions, defaults to 1.
37
- * @param userGenerators custom user generators in the form of (column index, generator function).
38
- * column index starts from 0 to length - 1
43
+ * @param userGenerators custom user generators in the form of:
44
+ * (column index, generator function).
45
+ * where column index starts from 0 to length - 1
39
46
* @return Arbitrary DataFrames generator of the required schema.
40
47
*/
41
- def arbitraryDataFrameWithCustomFields (sqlContext : SQLContext , schema : StructType , minPartitions : Int = 1 )
42
- (userGenerators : ColumnGenerator * ): Arbitrary [DataFrame ] = {
48
+ def arbitraryDataFrameWithCustomFields (
49
+ sqlContext : SQLContext , schema : StructType , minPartitions : Int = 1 )
50
+ (userGenerators : ColumnGenerator * ): Arbitrary [DataFrame ] = {
43
51
44
- val arbitraryRDDs = RDDGenerator .genRDD(sqlContext.sparkContext, minPartitions)(getRowGenerator(schema, userGenerators))
52
+ val arbitraryRDDs = RDDGenerator .genRDD(
53
+ sqlContext.sparkContext, minPartitions)(
54
+ getRowGenerator(schema, userGenerators))
45
55
Arbitrary {
46
56
arbitraryRDDs.map(sqlContext.createDataFrame(_, schema))
47
57
}
@@ -60,22 +70,31 @@ object DataframeGenerator {
60
70
/**
61
71
* Creates row generator for the required schema and with user's custom generators.
62
72
*
63
- * Note: Custom generators should match the required schema, for ex. you can't use Int generator for StringType.
73
+ * Note: Custom generators should match the required schema, for example
74
+ * you can't use Int generator for StringType.
64
75
*
65
76
* @param schema the required Row's schema.
66
- * @param customGenerators user custom generator, this is useful if the user want to
67
- * Control specific columns generation.
77
+ * @param customGenerators user custom generator, this is useful if the you want
78
+ * to control specific columns generation.
68
79
* @return Gen[Row]
69
80
*/
70
- def getRowGenerator (schema : StructType , customGenerators : Seq [ColumnGenerator ]): Gen [Row ] = {
71
- val generators : List [Gen [Any ]] = createGenerators(schema.fields, customGenerators)
72
- val listGen : Gen [List [Any ]] = Gen .sequence[List [Any ], Any ](generators)
73
- val generator : Gen [Row ] = listGen.map(list => Row .fromSeq(list))
81
+ def getRowGenerator (
82
+ schema : StructType , customGenerators : Seq [ColumnGenerator ]): Gen [Row ] = {
83
+ val generators : List [Gen [Any ]] =
84
+ createGenerators(schema.fields, customGenerators)
85
+ val listGen : Gen [List [Any ]] =
86
+ Gen .sequence[List [Any ], Any ](generators)
87
+ val generator : Gen [Row ] =
88
+ listGen.map(list => Row .fromSeq(list))
74
89
generator
75
90
}
76
91
77
- private def createGenerators (fields : Array [StructField ], userGenerators : Seq [ColumnGenerator ]): List [Gen [Any ]] = {
78
- val generatorMap = userGenerators.map(generator => (generator.columnName -> generator)).toMap
92
+ private def createGenerators (
93
+ fields : Array [StructField ],
94
+ userGenerators : Seq [ColumnGenerator ]):
95
+ List [Gen [Any ]] = {
96
+ val generatorMap = userGenerators.map(
97
+ generator => (generator.columnName -> generator)).toMap
79
98
(0 until fields.length).toList.map(index => {
80
99
if (generatorMap.contains(fields(index).name)) {
81
100
generatorMap.get(fields(index).name).get match {
@@ -87,7 +106,8 @@ object DataframeGenerator {
87
106
})
88
107
}
89
108
90
- private def getGenerator (dataType : DataType , generators : Seq [ColumnGenerator ] = Seq ()): Gen [Any ] = {
109
+ private def getGenerator (
110
+ dataType : DataType , generators : Seq [ColumnGenerator ] = Seq ()): Gen [Any ] = {
91
111
dataType match {
92
112
case ByteType => Arbitrary .arbitrary[Byte ]
93
113
case ShortType => Arbitrary .arbitrary[Short ]
@@ -102,7 +122,7 @@ object DataframeGenerator {
102
122
case DateType => Arbitrary .arbLong.arbitrary.map(new Date (_))
103
123
case arr : ArrayType => {
104
124
val elementGenerator = getGenerator(arr.elementType)
105
- return Gen .listOf(elementGenerator)
125
+ Gen .listOf(elementGenerator)
106
126
}
107
127
case map : MapType => {
108
128
val keyGenerator = getGenerator(map.keyType)
@@ -112,31 +132,37 @@ object DataframeGenerator {
112
132
value <- valueGenerator
113
133
} yield (key, value)
114
134
115
- return Gen .mapOf(keyValueGenerator)
135
+ Gen .mapOf(keyValueGenerator)
116
136
}
117
- case row : StructType => return getRowGenerator(row, generators)
118
- case _ => throw new UnsupportedOperationException (s " Type: $dataType not supported " )
137
+ case row : StructType => getRowGenerator(row, generators)
138
+ case _ => throw new UnsupportedOperationException (
139
+ s " Type: $dataType not supported " )
119
140
}
120
141
}
121
142
122
143
}
123
144
124
145
/**
125
- * Previously ColumnGenerator. Allows the user to specify a generator for a specific column
146
+ * Previously ColumnGenerator. Allows the user to specify a generator for a
147
+ * specific column.
126
148
*/
127
- class Column (val columnName : String , generator : => Gen [Any ]) extends ColumnGenerator {
149
+ class Column (val columnName : String , generator : => Gen [Any ])
150
+ extends ColumnGenerator {
128
151
lazy val gen = generator
129
152
}
130
153
131
154
/**
132
- * ColumnList allows users to specify custom generators for a list of columns inside a StructType column
155
+ * ColumnList allows users to specify custom generators for a list of
156
+ * columns inside a StructType column.
133
157
*/
134
- class ColumnList (val columnName : String , generators : => Seq [ColumnGenerator ]) extends ColumnGenerator {
158
+ class ColumnList (val columnName : String , generators : => Seq [ColumnGenerator ])
159
+ extends ColumnGenerator {
135
160
lazy val gen = generators
136
161
}
137
162
138
163
/**
139
- * ColumnGenerator - prevously Column; it is now the base class for all ColumnGenerators
164
+ * ColumnGenerator - prevously Column; it is now the base class for all
165
+ * ColumnGenerators.
140
166
*/
141
167
abstract class ColumnGenerator extends java.io.Serializable {
142
168
val columnName : String
0 commit comments