@@ -97,14 +97,17 @@ object CometNativeScan extends CometOperatorSerde[CometScanExec] with Logging {
9797 builder : Operator .Builder ,
9898 childOp : OperatorOuterClass .Operator * ): Option [OperatorOuterClass .Operator ] = {
9999 val nativeScanBuilder = OperatorOuterClass .NativeScan .newBuilder()
100- nativeScanBuilder.setSource(scan.simpleStringWithNodeId())
100+ val commonBuilder = OperatorOuterClass .NativeScanCommon .newBuilder()
101+
102+ // Set source in common (used as part of injection key)
103+ commonBuilder.setSource(scan.simpleStringWithNodeId())
101104
102105 val scanTypes = scan.output.flatten { attr =>
103106 serializeDataType(attr.dataType)
104107 }
105108
106109 if (scanTypes.length == scan.output.length) {
107- nativeScanBuilder .addAllFields(scanTypes.asJava)
110+ commonBuilder .addAllFields(scanTypes.asJava)
108111
109112 // Sink operators don't have children
110113 builder.clearChildren()
@@ -120,7 +123,7 @@ object CometNativeScan extends CometOperatorSerde[CometScanExec] with Logging {
120123 logWarning(s " Unsupported data filter $filter" )
121124 }
122125 }
123- nativeScanBuilder .addAllDataFilters(dataFilters.asJava)
126+ commonBuilder .addAllDataFilters(dataFilters.asJava)
124127 }
125128
126129 val possibleDefaultValues = getExistenceDefaultValues(scan.requiredSchema)
@@ -136,20 +139,15 @@ object CometNativeScan extends CometOperatorSerde[CometScanExec] with Logging {
136139 (Literal (expr), index.toLong.asInstanceOf [java.lang.Long ])
137140 }
138141 .unzip
139- nativeScanBuilder .addAllDefaultValues(
142+ commonBuilder .addAllDefaultValues(
140143 defaultValues.flatMap(exprToProto(_, scan.output)).toIterable.asJava)
141- nativeScanBuilder .addAllDefaultValuesIndexes(indexes.toIterable.asJava)
144+ commonBuilder .addAllDefaultValuesIndexes(indexes.toIterable.asJava)
142145 }
143146
147+ // Extract object store options from first file (S3 configs apply to all files in scan)
144148 var firstPartition : Option [PartitionedFile ] = None
145149 val filePartitions = scan.getFilePartitions()
146- val filePartitionsProto = filePartitions.map { partition =>
147- if (firstPartition.isEmpty) {
148- firstPartition = partition.files.headOption
149- }
150- partition2Proto(partition, scan.relation.partitionSchema)
151- }
152- nativeScanBuilder.addAllFilePartitions(filePartitionsProto.asJava)
150+ firstPartition = filePartitions.flatMap(_.files.headOption).headOption
153151
154152 val partitionSchema = schema2Proto(scan.relation.partitionSchema.fields)
155153 val requiredSchema = schema2Proto(scan.requiredSchema.fields)
@@ -166,31 +164,34 @@ object CometNativeScan extends CometOperatorSerde[CometScanExec] with Logging {
166164 val projectionVector = (dataSchemaIndexes ++ partitionSchemaIndexes).map(idx =>
167165 idx.toLong.asInstanceOf [java.lang.Long ])
168166
169- nativeScanBuilder .addAllProjectionVector(projectionVector.toIterable.asJava)
167+ commonBuilder .addAllProjectionVector(projectionVector.toIterable.asJava)
170168
171169 // In `CometScanRule`, we ensure partitionSchema is supported.
172170 assert(partitionSchema.length == scan.relation.partitionSchema.fields.length)
173171
174- nativeScanBuilder .addAllDataSchema(dataSchema.toIterable.asJava)
175- nativeScanBuilder .addAllRequiredSchema(requiredSchema.toIterable.asJava)
176- nativeScanBuilder .addAllPartitionSchema(partitionSchema.toIterable.asJava)
177- nativeScanBuilder .setSessionTimezone(scan.conf.getConfString(" spark.sql.session.timeZone" ))
178- nativeScanBuilder .setCaseSensitive(scan.conf.getConf[Boolean ](SQLConf .CASE_SENSITIVE ))
172+ commonBuilder .addAllDataSchema(dataSchema.toIterable.asJava)
173+ commonBuilder .addAllRequiredSchema(requiredSchema.toIterable.asJava)
174+ commonBuilder .addAllPartitionSchema(partitionSchema.toIterable.asJava)
175+ commonBuilder .setSessionTimezone(scan.conf.getConfString(" spark.sql.session.timeZone" ))
176+ commonBuilder .setCaseSensitive(scan.conf.getConf[Boolean ](SQLConf .CASE_SENSITIVE ))
179177
180178 // Collect S3/cloud storage configurations
181179 val hadoopConf = scan.relation.sparkSession.sessionState
182180 .newHadoopConfWithOptions(scan.relation.options)
183181
184- nativeScanBuilder .setEncryptionEnabled(CometParquetUtils .encryptionEnabled(hadoopConf))
182+ commonBuilder .setEncryptionEnabled(CometParquetUtils .encryptionEnabled(hadoopConf))
185183
186184 firstPartition.foreach { partitionFile =>
187185 val objectStoreOptions =
188186 NativeConfig .extractObjectStoreOptions(hadoopConf, partitionFile.pathUri)
189187 objectStoreOptions.foreach { case (key, value) =>
190- nativeScanBuilder .putObjectStoreOptions(key, value)
188+ commonBuilder .putObjectStoreOptions(key, value)
191189 }
192190 }
193191
192+ // Set common data in NativeScan (file_partition will be populated at execution time)
193+ nativeScanBuilder.setCommon(commonBuilder.build())
194+
194195 Some (builder.setNativeScan(nativeScanBuilder).build())
195196
196197 } else {
@@ -204,6 +205,6 @@ object CometNativeScan extends CometOperatorSerde[CometScanExec] with Logging {
204205 }
205206
206207 override def createExec (nativeOp : Operator , op : CometScanExec ): CometNativeExec = {
207- CometNativeScanExec (nativeOp, op.wrapped, op.session)
208+ CometNativeScanExec (nativeOp, op.wrapped, op.session, op )
208209 }
209210}
0 commit comments