@@ -23,6 +23,7 @@ import za.co.absa.cobrix.spark.cobol.source.base.SparkTestBase
23
23
import za .co .absa .cobrix .spark .cobol .source .fixtures .BinaryFileFixture
24
24
25
25
class FixedLengthEbcdicWriterSuite extends AnyWordSpec with SparkTestBase with BinaryFileFixture {
26
+
26
27
import spark .implicits ._
27
28
28
29
private val copybookContents =
@@ -37,7 +38,7 @@ class FixedLengthEbcdicWriterSuite extends AnyWordSpec with SparkTestBase with B
37
38
val df = List ((" A" , " First" ), (" B" , " Scnd" ), (" C" , " Last" )).toDF(" A" , " B" )
38
39
39
40
val path = new Path (tempDir, " writer1" )
40
-
41
+
41
42
df.repartition(1 )
42
43
.orderBy(" A" )
43
44
.write
@@ -74,6 +75,117 @@ class FixedLengthEbcdicWriterSuite extends AnyWordSpec with SparkTestBase with B
74
75
}
75
76
}
76
77
}
78
+
79
+ " write data frames with different field order and null values" in {
80
+ withTempDirectory(" cobol_writer1" ) { tempDir =>
81
+ val df = List ((1 , " First" , " A" ), (2 , " Scnd" , " B" ), (3 , null , " C" )).toDF(" C" , " B" , " A" )
82
+
83
+ val path = new Path (tempDir, " writer1" )
84
+
85
+ df.repartition(1 )
86
+ .orderBy(" A" )
87
+ .write
88
+ .format(" cobol" )
89
+ .mode(SaveMode .Overwrite )
90
+ .option(" copybook_contents" , copybookContents)
91
+ .save(path.toString)
92
+
93
+ val fs = path.getFileSystem(spark.sparkContext.hadoopConfiguration)
94
+
95
+ assert(fs.exists(path), " Output directory should exist" )
96
+ val files = fs.listStatus(path)
97
+ .filter(_.getPath.getName.startsWith(" part-" ))
98
+ assert(files.nonEmpty, " Output directory should contain part files" )
99
+
100
+ val partFile = files.head.getPath
101
+ val data = fs.open(partFile)
102
+ val bytes = new Array [Byte ](files.head.getLen.toInt)
103
+ data.readFully(bytes)
104
+ data.close()
105
+
106
+ // Expected EBCDIC data for sample test data
107
+ val expected = Array [Byte ](
108
+ 0xC1 .toByte, 0xC6 .toByte, 0x89 .toByte, 0x99 .toByte, 0xa2 .toByte, 0xa3 .toByte, // A,First
109
+ 0xC2 .toByte, 0xE2 .toByte, 0x83 .toByte, 0x95 .toByte, 0x84 .toByte, 0x00 .toByte, // B,Scnd_
110
+ 0xC3 .toByte, 0x00 .toByte, 0x00 .toByte, 0x00 .toByte, 0x00 .toByte, 0x00 .toByte // C,Last_
111
+ )
112
+
113
+ if (! bytes.sameElements(expected)) {
114
+ println(s " Expected bytes: ${expected.map(" %02X" format _).mkString(" " )}" )
115
+ println(s " Actual bytes: ${bytes.map(" %02X" format _).mkString(" " )}" )
116
+
117
+ assert(bytes.sameElements(expected), " Written data should match expected EBCDIC encoding" )
118
+ }
119
+ }
120
+ }
121
+
122
+ " write should fail with save mode append and the path exists" in {
123
+ withTempDirectory(" cobol_writer3" ) { tempDir =>
124
+ val df = List ((" A" , " First" ), (" B" , " Scnd" ), (" C" , " Last" )).toDF(" A" , " B" )
125
+
126
+ val path = new Path (tempDir, " writer2" )
127
+
128
+ df.write
129
+ .format(" cobol" )
130
+ .mode(SaveMode .Append )
131
+ .option(" copybook_contents" , copybookContents)
132
+ .save(path.toString)
133
+
134
+ assertThrows[IllegalArgumentException ] {
135
+ df.write
136
+ .format(" cobol" )
137
+ .mode(SaveMode .Append )
138
+ .option(" copybook_contents" , copybookContents)
139
+ .save(path.toString)
140
+ }
141
+ }
142
+ }
143
+
144
+ " write should fail with save mode fail if exists and the path exists" in {
145
+ withTempDirectory(" cobol_writer3" ) { tempDir =>
146
+ val df = List ((" A" , " First" ), (" B" , " Scnd" ), (" C" , " Last" )).toDF(" A" , " B" )
147
+
148
+ val path = new Path (tempDir, " writer2" )
149
+
150
+ df.write
151
+ .format(" cobol" )
152
+ .mode(SaveMode .ErrorIfExists )
153
+ .option(" copybook_contents" , copybookContents)
154
+ .save(path.toString)
155
+
156
+ assertThrows[IllegalArgumentException ] {
157
+ df.write
158
+ .format(" cobol" )
159
+ .mode(SaveMode .ErrorIfExists )
160
+ .option(" copybook_contents" , copybookContents)
161
+ .save(path.toString)
162
+ }
163
+ }
164
+ }
165
+
166
+ " write should be ignored when save mode is ignore" in {
167
+ withTempDirectory(" cobol_writer3" ) { tempDir =>
168
+ val df = List ((" A" , " First" ), (" B" , " Scnd" ), (" C" , " Last" )).toDF(" A" , " B" )
169
+
170
+ val path = new Path (tempDir, " writer2" )
171
+
172
+ df.write
173
+ .format(" cobol" )
174
+ .mode(SaveMode .Ignore )
175
+ .option(" copybook_contents" , copybookContents)
176
+ .save(path.toString)
177
+
178
+ df.write
179
+ .format(" cobol" )
180
+ .mode(SaveMode .Ignore )
181
+ .option(" copybook_contents" , copybookContents)
182
+ .save(path.toString)
183
+
184
+ val fs = path.getFileSystem(spark.sparkContext.hadoopConfiguration)
185
+ assert(fs.exists(path), " Output directory should exist" )
186
+ }
187
+ }
188
+
77
189
}
78
190
79
191
}
0 commit comments