56
56
logger = logging .getLogger (__name__ )
57
57
58
58
59
- def checkSuccessorReadyToRunMultiplePredecessors (successor , predecessor , jobStore , toilState ):
59
+ def checkSuccessorReadyToRunMultiplePredecessors (
60
+ successor : JobDescription ,
61
+ predecessor : JobDescription ,
62
+ jobStore : AbstractJobStore ,
63
+ toilState : ToilState ,
64
+ ) -> bool :
60
65
"""
61
66
Handle the special cases of checking if a successor job is
62
67
ready to run when there are multiple predecessors.
63
68
64
- :param toil.job.JobDescription successor: The successor which has failed.
65
- :param toil.job.JobDescription predecessor: The job which the successor comes after.
69
+ :param successor: The successor which has failed.
70
+ :param predecessor: The job which the successor comes after.
66
71
67
72
"""
68
73
# See implementation note at the top of this file for discussion of multiple predecessors
69
- logger .debug ("Successor job: %s of job: %s has multiple "
70
- "predecessors" , successor , predecessor )
71
- logger .debug ("Already finished predecessors are: %s" , successor .predecessorsFinished )
74
+ logger .debug (
75
+ "Successor job: %s of job: %s has multiple " "predecessors" ,
76
+ successor ,
77
+ predecessor ,
78
+ )
79
+ logger .debug (
80
+ "Already finished predecessors are: %s" , successor .predecessorsFinished
81
+ )
72
82
73
83
# Get the successor JobDescription, which is cached
74
84
if successor .jobStoreID not in toilState .jobsToBeScheduledWithMultiplePredecessors :
75
85
# TODO: We're loading from the job store in an ad-hoc way!
76
86
loaded = jobStore .load (successor .jobStoreID )
77
- toilState .jobsToBeScheduledWithMultiplePredecessors [successor .jobStoreID ] = loaded
87
+ toilState .jobsToBeScheduledWithMultiplePredecessors [
88
+ successor .jobStoreID
89
+ ] = loaded
78
90
# TODO: we're clobbering a JobDescription we're passing around by value.
79
- successor = toilState .jobsToBeScheduledWithMultiplePredecessors [successor .jobStoreID ]
91
+ successor = toilState .jobsToBeScheduledWithMultiplePredecessors [
92
+ successor .jobStoreID
93
+ ]
80
94
81
- logger .debug ("Already finished predecessors are (2) : %s" , successor .predecessorsFinished )
95
+ logger .debug (
96
+ "Already finished predecessors are (2) : %s" , successor .predecessorsFinished
97
+ )
82
98
83
99
# Add the predecessor as a finished predecessor to the successor
84
100
successor .predecessorsFinished .add (predecessor .jobStoreID )
85
101
86
-
87
- logger .debug ("Already finished predecessors are (3) : %s" , successor .predecessorsFinished )
102
+ logger .debug (
103
+ "Already finished predecessors are (3) : %s" , successor .predecessorsFinished
104
+ )
88
105
89
106
# If the successor job's predecessors have all not all completed then
90
107
# ignore the successor as is not yet ready to run
@@ -97,33 +114,43 @@ def checkSuccessorReadyToRunMultiplePredecessors(successor, predecessor, jobStor
97
114
return True
98
115
99
116
100
-
101
- def nextChainable (predecessor : JobDescription , jobStore : AbstractJobStore , toilState , config : Config ) -> Optional [JobDescription ]:
117
+ def nextChainable (
118
+ predecessor : JobDescription ,
119
+ jobStore : AbstractJobStore ,
120
+ toilState : ToilState ,
121
+ config : Config ,
122
+ ) -> Optional [JobDescription ]:
102
123
"""
103
124
Returns the next chainable job's JobDescription after the given predecessor
104
125
JobDescription, if one exists, or None if the chain must terminate.
105
126
106
127
:param predecessor: The job to chain from
107
128
:param jobStore: The JobStore to fetch JobDescriptions from.
108
129
:param config: The configuration for the current run.
109
- :param toil.toilState.ToilState toilState: A local toilState, for providing a mutatable stack
130
+ :param toil.toilState.ToilState toilState: A local toilState, for providing a mutatable stack
110
131
"""
111
132
#If no more jobs to run or services not finished, quit
112
133
if len (predecessor .stack ) == 0 or len (predecessor .services ) > 0 or (isinstance (predecessor , CheckpointJobDescription ) and predecessor .checkpoint != None ):
113
134
logger .debug ("Stopping running chain of jobs: length of stack: %s, services: %s, checkpoint: %s" ,
114
135
len (predecessor .stack ), len (predecessor .services ), (isinstance (predecessor , CheckpointJobDescription ) and predecessor .checkpoint != None ))
115
136
return None
116
137
117
- # logger.debug("Length of stack: %s",len(predecessor.stack))
118
- # logger.debug("Number of : %s",len(predecessor.stack))
119
- if len (predecessor .stack ) > 1 and len (predecessor .stack [- 1 ]) > 0 and len (predecessor .stack [- 2 ]) > 0 :
138
+ # logger.debug("Length of stack: %s",len(predecessor.stack))
139
+ # logger.debug("Number of : %s",len(predecessor.stack))
140
+ if (
141
+ len (predecessor .stack ) > 1
142
+ and len (predecessor .stack [- 1 ]) > 0
143
+ and len (predecessor .stack [- 2 ]) > 0
144
+ ):
120
145
# TODO: Without a real stack list we can freely mutate, we can't chain
121
146
# to a child, which may branch, and then go back and do the follow-ons
122
147
# of the original job.
123
148
# TODO: Go back to a free-form stack list and require some kind of
124
149
# stack build phase?
125
- #logger.debug("Job has both children and follow-ons - let's see if this breaks")
126
- logger .debug ("Stopping running chain of jobs because job has both children and follow-ons" )
150
+ # logger.debug("Job has both children and follow-ons - let's see if this breaks")
151
+ logger .debug (
152
+ "Stopping running chain of jobs because job has both children and follow-ons"
153
+ )
127
154
return None
128
155
129
156
#Get the next set of jobs to run
@@ -135,17 +162,17 @@ def nextChainable(predecessor: JobDescription, jobStore: AbstractJobStore, toilS
135
162
136
163
#If there are 2 or more jobs to run in parallel we quit
137
164
if len (jobs ) >= 2 :
138
- logger .debug ("No more jobs can run in series by this worker,"
139
- " it's got %i children" , len (jobs ))
165
+ logger .debug (
166
+ "No more jobs can run in series by this worker," " it's got %i children" ,
167
+ len (jobs ),
168
+ )
140
169
return None
141
170
142
171
# Grab the only job that should be there.
143
172
successorID = next (iter (jobs ))
144
173
145
174
# Load the successor JobDescription
146
175
successor = jobStore .load (successorID )
147
-
148
- #testresult = checkSuccessorReadyToRunMultiplePredecessors(successor, predecessor, jobStore, toilState)
149
176
150
177
#We check the requirements of the successor to see if we can run it
151
178
#within the current worker
@@ -161,14 +188,19 @@ def nextChainable(predecessor: JobDescription, jobStore: AbstractJobStore, toilS
161
188
if successor .preemptable != predecessor .preemptable :
162
189
logger .debug ("Preemptability is different for the next job, returning to the leader" )
163
190
return None
164
- # if (successor.predecessorNumber - len(successor.predecessorsFinished)) > 1:
165
- if not checkSuccessorReadyToRunMultiplePredecessors (successor , predecessor , jobStore , toilState ):
166
- logger .debug ("The next job has %i predecessors that are not yet "
167
- "recorded as finished; we must return to the leader." , successor .predecessorNumber )
191
+ # if (successor.predecessorNumber - len(successor.predecessorsFinished)) > 1:
192
+ if not checkSuccessorReadyToRunMultiplePredecessors (
193
+ successor , predecessor , jobStore , toilState
194
+ ):
195
+ logger .debug (
196
+ "The next job has %i predecessors that are not yet "
197
+ "recorded as finished; we must return to the leader." ,
198
+ successor .predecessorNumber ,
199
+ )
168
200
logger .debug (successor .predecessorsFinished )
169
201
return None
170
202
else :
171
- logger .debug (' all predecessors are finished, we can chain to the successor' )
203
+ logger .debug (" all predecessors are finished, we can chain to the successor" )
172
204
173
205
if len (successor .services ) > 0 :
174
206
logger .debug ("The next job requires services that will not yet be started; we must return to the leader." )
@@ -182,7 +214,15 @@ def nextChainable(predecessor: JobDescription, jobStore: AbstractJobStore, toilS
182
214
# Made it through! This job is chainable.
183
215
return successor
184
216
185
- def workerScript (jobStore : AbstractJobStore , config : Config , jobName : str , jobStoreID : str , parentToilState , redirectOutputToLogFile : bool = True ) -> int :
217
+
218
+ def workerScript (
219
+ jobStore : AbstractJobStore ,
220
+ config : Config ,
221
+ jobName : str ,
222
+ jobStoreID : str ,
223
+ parentToilState : Optional [str ],
224
+ redirectOutputToLogFile : bool = True ,
225
+ ) -> int :
186
226
"""
187
227
Worker process script, runs a job.
188
228
@@ -191,9 +231,9 @@ def workerScript(jobStore: AbstractJobStore, config: Config, jobName: str, jobSt
191
231
:param jobName: The "job name" (a user friendly name) of the job to be run
192
232
:param jobStoreID: The job store ID of the job to be run
193
233
194
- :param str parentToilState: Pickle containing the parent toilState
234
+ :param parentToilState: Pickle containing the parent toilState
195
235
196
- :return int: 1 if a job failed, or 0 if all jobs succeeded
236
+ :return 1 if a job failed, or 0 if all jobs succeeded
197
237
"""
198
238
199
239
configure_root_logger ()
@@ -357,7 +397,7 @@ def workerScript(jobStore: AbstractJobStore, config: Config, jobName: str, jobSt
357
397
if parentToilState :
358
398
logger .debug (parentToilState )
359
399
else :
360
- logger .debug (' parentToilState empty' )
400
+ logger .debug (" parentToilState empty" )
361
401
362
402
##########################################
363
403
#Connect to the deferred function system
@@ -424,16 +464,14 @@ def workerScript(jobStore: AbstractJobStore, config: Config, jobName: str, jobSt
424
464
425
465
startTime = time .time ()
426
466
427
-
428
- logger .debug (jobStore )
429
- # Get a snap shot of the current state of the jobs in the jobStore
467
+ logger .debug (jobStore )
468
+ # Get a snap shot of the current state of the jobs in the jobStore
430
469
# - creating a local version of the leader's ToilState
431
470
if parentToilState :
432
- toilState = pickle .loads (base64 .b64decode (parentToilState .encode (' utf-8' )))
471
+ toilState = pickle .loads (base64 .b64decode (parentToilState .encode (" utf-8" )))
433
472
else :
434
473
toilState = ToilState (jobStore , jobDesc , jobCache = None )
435
-
436
-
474
+
437
475
while True :
438
476
##########################################
439
477
#Run the job body, if there is one
@@ -725,9 +763,12 @@ def parse_args(args: List[str]) -> argparse.Namespace:
725
763
that the worker can then run before/after the job on the batch
726
764
system's behalf.""" )
727
765
728
- parser .add_argument ("--toilState" , default = None , type = str ,
729
- help = """Pickled, base64-encoded copy of the Toul leader's toilState.""" )
730
-
766
+ parser .add_argument (
767
+ "--toilState" ,
768
+ default = None ,
769
+ type = str ,
770
+ help = """Pickled, base64-encoded copy of the Toul leader's toilState.""" ,
771
+ )
731
772
732
773
return parser .parse_args (args )
733
774
@@ -779,10 +820,11 @@ def main(argv: Optional[List[str]] = None) -> None:
779
820
jobStore = Toil .resumeJobStore (options .jobStoreLocator )
780
821
config = jobStore .config
781
822
782
-
783
823
with in_contexts (options .context ):
784
824
# Call the worker
785
- exit_code = workerScript (jobStore , config , options .jobName , options .jobStoreID , options .toilState )
825
+ exit_code = workerScript (
826
+ jobStore , config , options .jobName , options .jobStoreID , options .toilState
827
+ )
786
828
787
829
# Exit with its return value
788
830
sys .exit (exit_code )
0 commit comments