@@ -29,7 +29,7 @@ class Analysis(apps: Seq[ApplicationInfo]) {
29
29
30
30
def getDurations (tcs : ArrayBuffer [TaskCase ]): (Long , Long , Long , Double ) = {
31
31
val durations = tcs.map(_.duration)
32
- if (durations.size > 0 ) {
32
+ if (durations.nonEmpty ) {
33
33
(durations.sum, durations.max, durations.min,
34
34
ToolUtils .calculateAverage(durations.sum, durations.size, 1 ))
35
35
} else {
@@ -49,22 +49,20 @@ class Analysis(apps: Seq[ApplicationInfo]) {
49
49
def jobAndStageMetricsAggregation (): Seq [JobStageAggTaskMetricsProfileResult ] = {
50
50
val allJobRows = apps.flatMap { app =>
51
51
app.jobIdToInfo.map { case (id, jc) =>
52
- val stageIdsInJob = jc.stageIds
53
52
val stagesInJob = app.stageIdToInfo.filterKeys { case (sid, _) =>
54
- stageIdsInJob .contains(sid)
55
- }.keys.map(_._1).toSeq
53
+ jc.stageIds .contains(sid)
54
+ }.keys.map(_._1).toSet
56
55
if (stagesInJob.isEmpty) {
57
56
None
58
57
} else {
59
58
val tasksInJob = app.taskEnd.filter { tc =>
60
59
stagesInJob.contains(tc.stageId)
61
60
}
62
61
// count duplicate task attempts
63
- val numTaskAttempt = tasksInJob.size
64
62
val (durSum, durMax, durMin, durAvg) = getDurations(tasksInJob)
65
63
Some (JobStageAggTaskMetricsProfileResult (app.index,
66
64
s " job_ $id" ,
67
- numTaskAttempt ,
65
+ tasksInJob.size ,
68
66
jc.duration,
69
67
tasksInJob.map(_.diskBytesSpilled).sum,
70
68
durSum,
@@ -100,9 +98,8 @@ class Analysis(apps: Seq[ApplicationInfo]) {
100
98
}
101
99
val allJobStageRows = apps.flatMap { app =>
102
100
app.jobIdToInfo.flatMap { case (_, jc) =>
103
- val stageIdsInJob = jc.stageIds
104
101
val stagesInJob = app.stageIdToInfo.filterKeys { case (sid, _) =>
105
- stageIdsInJob .contains(sid)
102
+ jc.stageIds .contains(sid)
106
103
}
107
104
if (stagesInJob.isEmpty) {
108
105
None
@@ -111,12 +108,10 @@ class Analysis(apps: Seq[ApplicationInfo]) {
111
108
val tasksInStage = app.taskEnd.filter { tc =>
112
109
tc.stageId == id
113
110
}
114
- // count duplicate task attempts
115
- val numAttempts = tasksInStage.size
116
111
val (durSum, durMax, durMin, durAvg) = getDurations(tasksInStage)
117
112
Some (JobStageAggTaskMetricsProfileResult (app.index,
118
113
s " stage_ $id" ,
119
- numAttempts ,
114
+ tasksInStage.size ,
120
115
sc.duration,
121
116
tasksInStage.map(_.diskBytesSpilled).sum,
122
117
durSum,
@@ -153,17 +148,16 @@ class Analysis(apps: Seq[ApplicationInfo]) {
153
148
}
154
149
// stages that are missing from a job, perhaps dropped events
155
150
val stagesWithoutJobs = apps.flatMap { app =>
156
- val allStageinJobs = app.jobIdToInfo.flatMap { case (_, jc) =>
157
- val stageIdsInJob = jc.stageIds
151
+ val allStageInJobs = app.jobIdToInfo.flatMap { case (_, jc) =>
158
152
app.stageIdToInfo.filterKeys { case (sid, _) =>
159
- stageIdsInJob .contains(sid)
153
+ jc.stageIds .contains(sid)
160
154
}
161
155
}
162
- val missing = app.stageIdToInfo.keys.toSeq .diff(allStageinJobs .keys.toSeq )
156
+ val missing = app.stageIdToInfo.keys.toSet .diff(allStageInJobs .keys.toSet )
163
157
if (missing.isEmpty) {
164
158
Seq .empty
165
159
} else {
166
- missing.map { case (( id, saId) ) =>
160
+ missing.map { case (id, saId) =>
167
161
val scOpt = app.stageIdToInfo.get((id, saId))
168
162
scOpt match {
169
163
case None =>
@@ -214,11 +208,11 @@ class Analysis(apps: Seq[ApplicationInfo]) {
214
208
}
215
209
216
210
val allRows = allJobRows ++ allJobStageRows ++ stagesWithoutJobs
217
- val filteredRows = allRows.filter(_.isDefined).map(_.get )
218
- if (filteredRows.size > 0 ) {
211
+ val filteredRows = allRows.flatMap(row => row )
212
+ if (filteredRows.nonEmpty ) {
219
213
val sortedRows = filteredRows.sortBy { cols =>
220
214
val sortDur = cols.duration.getOrElse(0L )
221
- (cols.appIndex, - ( sortDur) , cols.id)
215
+ (cols.appIndex, - sortDur, cols.id)
222
216
}
223
217
sortedRows
224
218
} else {
@@ -231,12 +225,12 @@ class Analysis(apps: Seq[ApplicationInfo]) {
231
225
val allRows = apps.flatMap { app =>
232
226
app.sqlIdToInfo.map { case (sqlId, sqlCase) =>
233
227
val jcs = app.jobIdToInfo.filter { case (_, jc) =>
234
- jc.sqlID.getOrElse( - 1 ) == sqlId
228
+ jc.sqlID.isDefined && jc.sqlID.get == sqlId
235
229
}
236
230
if (jcs.isEmpty) {
237
231
None
238
232
} else {
239
- val stageIdsForSQL = jcs.flatMap(_._2.stageIds).toSeq
233
+ val stageIdsForSQL = jcs.flatMap(_._2.stageIds).toSet
240
234
val tasksInSQL = app.taskEnd.filter { tc =>
241
235
stageIdsForSQL.contains(tc.stageId)
242
236
}
@@ -298,7 +292,7 @@ class Analysis(apps: Seq[ApplicationInfo]) {
298
292
}
299
293
}
300
294
}
301
- val allFiltered = allRows.filter(_.isDefined).map(_.get )
295
+ val allFiltered = allRows.flatMap(row => row )
302
296
if (allFiltered.size > 0 ) {
303
297
val sortedRows = allFiltered.sortBy { cols =>
304
298
val sortDur = cols.duration.getOrElse(0L )
@@ -314,12 +308,12 @@ class Analysis(apps: Seq[ApplicationInfo]) {
314
308
val allRows = apps.flatMap { app =>
315
309
app.sqlIdToInfo.map { case (sqlId, _) =>
316
310
val jcs = app.jobIdToInfo.filter { case (_, jc) =>
317
- jc.sqlID.getOrElse( - 1 ) == sqlId
311
+ jc.sqlID.isDefined && jc.sqlID.get == sqlId
318
312
}
319
313
if (jcs.isEmpty) {
320
314
None
321
315
} else {
322
- val stageIdsForSQL = jcs.flatMap(_._2.stageIds).toSeq
316
+ val stageIdsForSQL = jcs.flatMap(_._2.stageIds).toSet
323
317
324
318
val tasksInSQL = app.taskEnd.filter { tc =>
325
319
stageIdsForSQL.contains(tc.stageId)
@@ -344,7 +338,7 @@ class Analysis(apps: Seq[ApplicationInfo]) {
344
338
}
345
339
}
346
340
}
347
- val allFiltered = allRows.filter(_.isDefined).map(_.get )
341
+ val allFiltered = allRows.flatMap(row => row )
348
342
if (allFiltered.size > 0 ) {
349
343
val sortedRows = allFiltered.sortBy { cols =>
350
344
(cols.appIndex, cols.sqlId)
@@ -359,12 +353,12 @@ class Analysis(apps: Seq[ApplicationInfo]) {
359
353
apps.map { app =>
360
354
val maxOfSqls = app.sqlIdToInfo.map { case (sqlId, _) =>
361
355
val jcs = app.jobIdToInfo.filter { case (_, jc) =>
362
- jc.sqlID.getOrElse( - 1 ) == sqlId
356
+ jc.sqlID.isDefined && jc.sqlID.get == sqlId
363
357
}
364
358
if (jcs.isEmpty) {
365
359
0L
366
360
} else {
367
- val stageIdsForSQL = jcs.flatMap(_._2.stageIds).toSeq
361
+ val stageIdsForSQL = jcs.flatMap(_._2.stageIds).toSet
368
362
val tasksInSQL = app.taskEnd.filter { tc =>
369
363
stageIdsForSQL.contains(tc.stageId)
370
364
}
@@ -394,7 +388,7 @@ class Analysis(apps: Seq[ApplicationInfo]) {
394
388
sqlCase.sqlCpuTimePercent)
395
389
}
396
390
}
397
- if (allRows.size > 0 ) {
391
+ if (allRows.nonEmpty ) {
398
392
val sortedRows = allRows.sortBy { cols =>
399
393
val sortDur = cols.duration.getOrElse(0L )
400
394
(cols.appIndex, cols.sqlID, sortDur)
@@ -443,8 +437,8 @@ class Analysis(apps: Seq[ApplicationInfo]) {
443
437
}
444
438
}
445
439
446
- val allNonEmptyRows = allRows.filter(_.isDefined).map(_.get )
447
- if (allNonEmptyRows.size > 0 ) {
440
+ val allNonEmptyRows = allRows.flatMap(row => row )
441
+ if (allNonEmptyRows.nonEmpty ) {
448
442
val sortedRows = allNonEmptyRows.sortBy { cols =>
449
443
(cols.appIndex, cols.stageId, cols.stageAttemptId, cols.taskId, cols.taskAttemptId)
450
444
}
0 commit comments