@@ -9,92 +9,115 @@ import numpy as np
9
9
from fastspecfit .logger import log
10
10
11
11
12
- def run_fastspecfit (args , comm = None , fastphot = False , specprod_dir = None ,
13
- makeqa = False , samplefile = None , input_redshifts = False ,
14
- outdir_data = '.' , templates = None , templateversion = None ,
15
- fphotodir = None , fphotofile = None ):
12
+ def get_size (comm , mp = 1 ):
13
+ # Number of rank=0 ranks in all the subcommunicators; also the unique
14
+ # number of "colors".
15
+ size = int (np .ceil (comm .size / mp ))
16
+ return size
17
+
18
+
19
+ def run_fastspecfit (args , comm = None , fastphot = False , specprod_dir = None , makeqa = False ,
20
+ sample = None , input_redshifts = False , outdir_data = '.' , templates = None ,
21
+ templateversion = None , fphotodir = None , fphotofile = None ):
16
22
17
23
import sys
18
24
from desispec .parallel import stdouterr_redirected
19
25
from fastspecfit .mpi import plan
20
26
21
- if comm is None :
22
- rank , size = 0 , 1
27
+ if comm :
28
+ rank = comm .rank
29
+ #size = comm.size
30
+ size = get_size (comm , mp = args .mp )
31
+
32
+ # Split the MPI.COMM_WORLD communicator into size // args.mp
33
+ # subcommunicators so we can parallelize over objects in
34
+ # fastspecfit.fastspec (or fastspecfit.fastphot).
35
+ colors = np .arange (size ) // args .mp
36
+ subcomm = comm .Split (color = rank // args .mp , key = rank )
23
37
else :
24
- rank , size = comm .rank , comm .size
38
+ rank = 0
39
+ size = 1
40
+ colors = [0 ]
41
+ subcomm = None
42
+ print (comm .rank , comm .size , subcomm .rank , subcomm .size , size )
25
43
26
44
t0 = time .time ()
27
45
if rank == 0 :
28
- if args .samplefile is not None :
29
- import fitsio
30
- from astropy .table import Table
31
- if not os .path .isfile (args .samplefile ):
32
- log .warning (f'{ args .samplefile } does not exist.' )
33
- return
34
- try :
35
- readcols = ['SURVEY' , 'PROGRAM' , 'HEALPIX' , 'TARGETID' ]
36
- if input_redshifts :
37
- readcols += ['Z' ]
38
- sample = Table (fitsio .read (args .samplefile , columns = readcols ))
39
- except :
40
- if input_redshifts :
41
- errmsg = f'Sample file { args .samplefile } with --input-redshifts set missing required columns { SURVEY ,PROGRAM ,HEALPIX ,TARGETID ,Z } '
42
- else :
43
- errmsg = f'Sample file { args .samplefile } missing required columns { SURVEY ,PROGRAM ,HEALPIX ,TARGETID } '
44
- log .critical (errmsg )
45
- raise ValueError (errmsg )
46
-
47
- _ , zbestfiles , outfiles , groups , ntargets = plan (
46
+ if sample is not None :
47
+ _ , redrockfiles , outfiles , groups , ntargets = plan (
48
48
size = size , specprod = args .specprod , specprod_dir = specprod_dir ,
49
49
sample = sample , coadd_type = 'healpix' , makeqa = args .makeqa ,
50
- mp = args .mp , fastphot = args .fastphot ,
51
- outdir_data = outdir_data , overwrite = args .overwrite )
50
+ mp = args .mp , fastphot = args .fastphot , outdir_data = outdir_data ,
51
+ overwrite = args .overwrite )
52
52
else :
53
- sample = None
54
- _ , zbestfiles , outfiles , groups , ntargets = plan (
53
+ _ , redrockfiles , outfiles , groups , ntargets = plan (
55
54
size = size , specprod = args .specprod , specprod_dir = specprod_dir ,
56
55
coadd_type = args .coadd_type , survey = args .survey , program = args .program ,
57
56
healpix = args .healpix , tile = args .tile , night = args .night ,
58
57
makeqa = args .makeqa , mp = args .mp , fastphot = fastphot , outdir_data = outdir_data ,
59
58
overwrite = args .overwrite )
60
- log .info ('Planning took {:.2f} sec' . format ( time .time () - t0 ) )
59
+ log .info (f 'Planning took { time .time () - t0 :.2f } sec' )
61
60
else :
62
- sample = None
63
- zbestfiles , outfiles , groups , ntargets = [], [], [], []
64
-
65
- if comm :
66
- zbestfiles = comm .bcast (zbestfiles , root = 0 )
67
- outfiles = comm .bcast (outfiles , root = 0 )
68
- groups = comm .bcast (groups , root = 0 )
69
- ntargets = comm .bcast (ntargets , root = 0 )
70
- sample = comm .bcast (sample , root = 0 )
61
+ redrockfiles , outfiles , groups , ntargets = [], [], [], []
62
+
63
+ #if comm:
64
+ # groups = comm.bcast(groups, root=0)
65
+ # redrockfiles = comm.bcast(redrockfiles, root=0)
66
+ # outfiles = comm.bcast(outfiles, root=0)
67
+ # ntargets = comm.bcast(ntargets, root=0)
68
+ # sample = comm.bcast(sample, root=0)
69
+
70
+ print ('Size! ' , size )
71
+
72
+ # Make sure all the ranks in subcomm have the same work.
73
+ if subcomm :
74
+ if subcomm .rank == 0 :
75
+ for subrank in range (subcomm .size ):
76
+ subcomm .send (groups [rank ], dest = subrank )
77
+ subcomm .send (redrockfiles [groups [rank ]], dest = subrank )
78
+ subcomm .send (outfiles [groups [rank ]], dest = subrank )
79
+ subcomm .send (ntargets [groups [rank ]], dest = subrank )
80
+ else :
81
+ groups [rank ] = subcomm .recv (source = 0 )
82
+ redrockfiles [groups [rank ]] = subcomm .recv (source = 0 )
83
+ outfiles [groups [rank ]] = subcomm .recv (source = 0 )
84
+ ntargets [groups [rank ]] = subcomm .recv (source = 0 )
71
85
72
86
sys .stdout .flush ()
73
87
74
88
# all done
75
- if len (zbestfiles ) == 0 :
89
+ if len (redrockfiles ) == 0 :
76
90
return
77
91
78
- assert (len (groups ) == size )
79
- assert (len (np .concatenate (groups )) == len (zbestfiles ))
92
+ #assert(len(groups) == size)
93
+ #assert(len(np.concatenate(groups)) == len(redrockfiles))
94
+
95
+ """
96
+ 16 redrockfiles
97
+ size = 8
98
+ mp = 4
99
+ colors = np.arange(size) // mp --> [0, 0, 0, 0, 1, 1, 1, 1]
100
+ nsubcomm = int(np.ceil(size / mp)) --> 2
101
+ """
102
+ print (groups , rank , groups [rank ])
80
103
81
104
for ii in groups [rank ]:
82
105
log .debug (f'Rank { rank } started at { time .asctime ()} ' )
83
106
sys .stdout .flush ()
84
107
85
- # With --makeqa the desired output directories are in the 'zbestfiles '.
108
+ # With --makeqa the desired output directories are in the 'redrockfiles '.
86
109
if args .makeqa :
87
110
from fastspecfit .qa import fastqa as fast
88
111
cmd = 'fastqa'
89
- cmdargs = f'{ outfiles [ii ]} -o={ zbestfiles [ii ]} --mp={ args .mp } '
112
+ cmdargs = f'{ outfiles [ii ]} -o={ redrockfiles [ii ]} --mp={ args .mp } '
90
113
else :
91
114
if fastphot :
92
115
from fastspecfit .fastspecfit import fastphot as fast
93
116
cmd = 'fastphot'
94
117
else :
95
118
from fastspecfit .fastspecfit import fastspec as fast
96
119
cmd = 'fastspec'
97
- cmdargs = f'{ zbestfiles [ii ]} -o={ outfiles [ii ]} --mp={ args .mp } '
120
+ cmdargs = f'{ redrockfiles [ii ]} -o={ outfiles [ii ]} --mp={ args .mp } '
98
121
99
122
if args .ignore_quasarnet :
100
123
cmdargs += ' --ignore-quasarnet'
@@ -128,7 +151,7 @@ def run_fastspecfit(args, comm=None, fastphot=False, specprod_dir=None,
128
151
129
152
if sample is not None :
130
153
# assume healpix coadds; find the targetids to process
131
- _ , survey , program , healpix = os .path .basename (zbestfiles [ii ]).split ('-' )
154
+ _ , survey , program , healpix = os .path .basename (redrockfiles [ii ]).split ('-' )
132
155
healpix = int (healpix .split ('.' )[0 ])
133
156
I = (sample ['SURVEY' ] == survey ) * (sample ['PROGRAM' ] == program ) * (sample ['HEALPIX' ] == healpix )
134
157
targetids = ',' .join (sample [I ]['TARGETID' ].astype (str ))
@@ -141,7 +164,7 @@ def run_fastspecfit(args, comm=None, fastphot=False, specprod_dir=None,
141
164
cmdargs += f' --targetids={ args .targetids } '
142
165
143
166
if args .makeqa :
144
- logfile = os .path .join (zbestfiles [ii ], os .path .basename (outfiles [ii ]).replace ('.gz' , '' ).replace ('.fits' , '.log' ))
167
+ logfile = os .path .join (redrockfiles [ii ], os .path .basename (outfiles [ii ]).replace ('.gz' , '' ).replace ('.fits' , '.log' ))
145
168
else :
146
169
logfile = outfiles [ii ].replace ('.gz' , '' ).replace ('.fits' , '.log' )
147
170
@@ -166,17 +189,11 @@ def run_fastspecfit(args, comm=None, fastphot=False, specprod_dir=None,
166
189
if not os .path .isdir (outdir ):
167
190
os .makedirs (outdir , exist_ok = True )
168
191
169
- ## pure-MPI
170
- #if comm is not None:
171
- # subcomm = comm.Split(color=rank)
172
- #else:
173
- # subcomm = None
174
-
175
192
if args .nolog :
176
- fast (args = cmdargs .split ()) # , comm=subcomm)
193
+ fast (args = cmdargs .split (), comm = subcomm )
177
194
else :
178
195
with stdouterr_redirected (to = logfile , overwrite = args .overwrite ):
179
- fast (args = cmdargs .split ()) # , comm=subcomm)
196
+ fast (args = cmdargs .split (), comm = subcomm )
180
197
181
198
dt1 = time .time () - t1
182
199
log .info (f' rank { rank } done in { dt1 :.2f} sec' )
@@ -191,7 +208,7 @@ def run_fastspecfit(args, comm=None, fastphot=False, specprod_dir=None,
191
208
log .debug (f' rank { rank } is done' )
192
209
sys .stdout .flush ()
193
210
194
- if comm is not None :
211
+ if comm :
195
212
comm .barrier ()
196
213
197
214
if rank == 0 and not args .dry_run :
@@ -273,20 +290,21 @@ def main():
273
290
except ImportError :
274
291
comm = None
275
292
276
- if comm is None :
293
+ if comm :
294
+ rank = comm .rank
295
+ if comm .size > 1 and args .mp > 1 and comm .size < args .mp :
296
+ log .warning (f'Number of MPI tasks { comm .size } should be >{ args .mp } for MPI parallelism.' )
297
+ size = get_size (comm , mp = args .mp )
298
+ else :
277
299
rank , size = 0 , 1
278
300
279
301
# https://docs.nersc.gov/development/languages/python/parallel-python/#use-the-spawn-start-method
280
302
if args .mp > 1 and 'NERSC_HOST' in os .environ :
281
303
import multiprocessing
282
304
multiprocessing .set_start_method ('spawn' )
283
- else :
284
- rank , size = comm .rank , comm .size
285
305
286
- # Main rank is responsible for planning and merging.
306
+ # Rank 0 is responsible for planning and merging.
287
307
if rank == 0 :
288
- #from fastspecfit.logger import log
289
-
290
308
# check the input samplefile
291
309
if args .samplefile is not None :
292
310
import fitsio
@@ -296,8 +314,14 @@ def main():
296
314
return
297
315
try :
298
316
sample = Table (fitsio .read (args .samplefile , columns = ['SURVEY' , 'PROGRAM' , 'HEALPIX' , 'TARGETID' ]))
317
+ log .info (f'Read { len (sample )} rows from { args .samplefile } ' )
299
318
except :
300
- errmsg = f'Sample file { args .samplefile } missing required columns { SURVEY ,PROGRAM ,HEALPIX ,TARGETID } '
319
+ if args .input_redshifts :
320
+ errmsg = f'Sample file { args .samplefile } with --input-redshifts missing required columns ' + \
321
+ '{SURVEY,PROGRAM,HEALPIX,TARGETID,Z}'
322
+ else :
323
+ errmsg = f'Sample file { args .samplefile } missing required columns ' + \
324
+ '{SURVEY,PROGRAM,HEALPIX,TARGETID}'
301
325
log .critical (errmsg )
302
326
raise ValueError (errmsg )
303
327
@@ -376,10 +400,11 @@ def main():
376
400
outdir_data = outdir_data , overwrite = args .overwrite )
377
401
else :
378
402
run_fastspecfit (args , comm = comm , fastphot = args .fastphot , specprod_dir = specprod_dir ,
379
- makeqa = args .makeqa , outdir_data = outdir_data ,
380
- samplefile = args .samplefile , input_redshifts = args .input_redshifts ,
381
- templates = args .templates , templateversion = args .templateversion ,
382
- fphotodir = args .fphotodir , fphotofile = args .fphotofile )
403
+ makeqa = args .makeqa , outdir_data = outdir_data , sample = sample ,
404
+ input_redshifts = args .input_redshifts , templates = args .templates ,
405
+ templateversion = args .templateversion , fphotodir = args .fphotodir ,
406
+ fphotofile = args .fphotofile )
407
+
383
408
384
409
if __name__ == '__main__' :
385
410
main ()
0 commit comments