diff --git a/pyproject.toml b/pyproject.toml index 945715b..955418e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta" [project] name = "rda_python_miscs" -version = "2.0.0" +version = "2.0.1" authors = [ { name="Zaihua Ji", email="zji@ucar.edu" }, ] diff --git a/src/rda_python_miscs/rdakill.py b/src/rda_python_miscs/rdakill.py index 4691b72..d6497a1 100644 --- a/src/rda_python_miscs/rdakill.py +++ b/src/rda_python_miscs/rdakill.py @@ -26,7 +26,7 @@ def __init__(self): 'r': 0, # 1 - reserved for exclusive, working with -s PEND only 'u': None, # login user name 's': None, # batch status to kill - 'q': None # batch partition/queue for SLURM/PBS, rda for default + 'q': None # batch partition/queue for PBS, rda for default } # function to read parameters @@ -71,15 +71,7 @@ def start_actions(self): killloc = 1 if self.RDAKILL['h']: self.local_host_action(self.RDAKILL['h'], "kill processes", self.PGLOG['HOSTNAME'], self.LGEREX) - if not self.pgcmp(self.RDAKILL['h'], self.PGLOG['SLMNAME'], 1): - if not (self.RDAKILL['p'] or self.RDAKILL['s']): - self.pglog("Provide Batch ID or Job Status to kill SLURM jobs", self.LGEREX) - if self.RDAKILL['p']: - self.dakill_slurm_batch(self.RDAKILL['p']) - else: - self.rdakill_slurm_status(self.RDAKILL['s'], self.RDAKILL['q'], self.RDAKILL['u']) - killloc = 0 - elif not self.pgcmp(self.RDAKILL['h'], self.PGLOG['PBSNAME'], 1): + if not self.pgcmp(self.RDAKILL['h'], self.PGLOG['PBSNAME'], 1): if not (self.RDAKILL['p'] or self.RDAKILL['s']): self.pglog("Provide Batch ID or Job Status to kill PBS jobs", self.LGEREX) if self.RDAKILL['p']: @@ -127,7 +119,6 @@ def rdakill_processes(self, pid, ppid, aname = None, uname = None, level = 0): buf += "on " + self.RDAKILL['h'] else: buf += "locally" - if self.PGLOG['CURBID']: buf += "; add Option '-h SLURM' if SLURM batch ID provided" self.pglog(buf, self.LOGWRN) # a local child process @@ -139,41 +130,7 @@ def kill_local_child(self, pid, uid, line): elif self.check_process(pid): return self.pglog("Error Kill: {}\n{}".format(line, self.PGLOG['SYSERR']), self.LOGWRN) if not self.check_process(pid): self.pglog("Quit: " + line, self.LOGWRN) - - # kill a slurm batch job - def rdakill_slurm_batch(self, bid): - ret = 0 - stat = self.check_slurm_status(bid, self.LOGWRN) - if stat: - cmd = self.get_local_command("scancel {}".format(bid), stat['USER']) - ret = self.pgsystem(cmd, self.LOGWRN, 6) - if ret: self.record_dscheck_interrupt(bid, self.PGLOG['SLMNAME']) - else: - self.pglog("{}: cannot find SLURM batch ID".format(bid), self.LOGERR) - if not ret and self.PGLOG['SYSERR']: self.pglog(self.PGLOG['SYSERR'], self.LGEREX) - return ret - - # kill SLURM batch jobs for given status - def rdakill_slurm_status(self, stat, part, uname): - if not part: part = 'rda' - bcmd = "sacct -o jobid,user,state -r {} -".format(part) - bcmd += ("u " + uname if uname else 'a') - lines = self.get_slurm_multiple(bcmd) - bcnt = len(lines['JOBID']) if lines else 0 - pcnt = kcnt = 0 - for i in range(bcnt): - if lines['STATE'][i] == stat: - pcnt += 1 - kcnt += self.rdakill_slurm_batch(lines['JOBID'][i]) - if pcnt > 0: - s = 's' if pcnt > 1 else '' - line = "{} of {} SLURM '{}' job{} Killed".format(kcnt, pcnt, stat, s) - else: - line = "No SLURM '{}' job found to kill".format(stat) - line += " in Partition '{}'".format(part) - if uname: line += " for " + uname - self.pglog(line, self.LOGWRN) - + # kill a pbs batch job def rdakill_pbs_batch(self, bid): ret = 0 @@ -188,7 +145,7 @@ def rdakill_pbs_batch(self, bid): self.pglog("{}: cannot find PBS batch ID".format(bid), self.LOGERR) if not ret and self.PGLOG['SYSERR']: self.pglog(self.PGLOG['SYSERR'], self.LGEREX) return ret - + # kill PBS batch jobs for given status def rdakill_pbs_status(self, stat, queue, uname): if not queue: queue = 'rda' diff --git a/src/rda_python_miscs/rdakill.usg b/src/rda_python_miscs/rdakill.usg index 967e979..2a1430c 100644 --- a/src/rda_python_miscs/rdakill.usg +++ b/src/rda_python_miscs/rdakill.usg @@ -1,7 +1,7 @@ Kill one of multiple processes and theirs children for given local process ID or - other process information; kill one or multiple SLURM/PBS batch jobs for give batch - Job ID or Status. For killing SLURM/PBS batch jobs, you must login to cheyenne/caser + other process information; kill one or multiple PBS batch jobs for give batch + Job ID or Status. For killing PBS batch jobs, you must login to casper login nodes. Usage: rdakill [-h HostName] [-p ProcessID] [-P ParentProcessID] \ @@ -10,13 +10,13 @@ - Option -a, application name of the process; - Option -h, hostname the process is on. Omit it for local process, - but it is mandatory if the process id is a SLURM/PBS bactch id. + but it is mandatory if the process id is a PBS bactch id. - Option -p, the process id or batch job id to be stopped. - Option -P, the parent process id; - - Option -q, the SLURM Partition or PBS queue name. It defaults to 'rda'; + - Option -q, the PBS queue name. It defaults to 'rda'; - Option -s, the Batch Job Status; this is mantatory if batch id is not provided; diff --git a/src/rda_python_miscs/rdaps.py b/src/rda_python_miscs/rdaps.py index b1f606f..3261318 100644 --- a/src/rda_python_miscs/rdaps.py +++ b/src/rda_python_miscs/rdaps.py @@ -64,10 +64,7 @@ def start_actions(self): chkloc = 1 if self.RDAPS['h']: self.local_host_action(self.RDAPS['h'], "check processes", self.PGLOG['HOSTNAME'], self.LGEREX) - if not self.pgcmp(self.RDAPS['h'], self.PGLOG['SLMNAME'], 1): - self.slurm_snapshot() - chkloc = 0 - elif not self.pgcmp(self.RDAPS['h'], self.PGLOG['PBSNAME'], 1): + if not self.pgcmp(self.RDAPS['h'], self.PGLOG['PBSNAME'], 1): self.pbs_snapshot() chkloc = 0 if chkloc: self.process_snapshot() @@ -96,40 +93,7 @@ def process_snapshot(self): if self.RDAPS['P'] and self.RDAPS['P'] != ppid: continue if self.RDAPS['a'] and aname.find(self.RDAPS['a']) < 0: continue self.pglog(re.sub(r' +', ' ', line), self.LOGWRN) - - # get a snapshot of a SLURM batch process status - def slurm_snapshot(self): - qopts = '' - if self.RDAPS['u']: qopts += " -u " + self.RDAPS['u'] - if self.RDAPS['p']: - qopts += " -j {}".format(self.RDAPS['p']) - else: - qopts = " -p rda" - cmd = "squeue -l" + qopts - buf = self.pgsystem(cmd, self.LOGWRN, 272) - if not buf: - if self.PGLOG['SYSERR'] and self.PGLOG['SYSERR'].find('Invalid job id specified') < 0: - self.pglog(self.PGLOG['SYSERR'], self.LGEREX) - return - lines = re.split(r'\n', buf) - lcnt = len(lines) - if lcnt < 3: return - dochk = 1 - for line in lines: - if not line: continue - if dochk: - if re.match(r'^\s*JOBID\s', line): dochk = 0 - else: - vals = re.split(r'\s+', self.pgtrim(line)) - if self.RDAPS['a'] and vals[2] and self.RDAPS['a'] != vals[2]: continue - # move user name to front - val = vals[3] - vals[3] = vals[2] - vals[2] = vals[1] - vals[1] = vals[0] - vals[0] = val - self.pglog(' '.join(vals), self.LOGWRN) - + # get a snapshot of a PBS batch process status def pbs_snapshot(self): qopts = '' @@ -138,7 +102,7 @@ def pbs_snapshot(self): if self.RDAPS['p']: if qopts: qopts += ' ' qopts += str(self.RDAPS['p']) - if not qopts: qopts = 'rda' + if not qopts: qopts = 'gdex' stat = self.get_pbs_info(qopts, 1, self.LOGWRN) if not stat: if self.PGLOG['SYSERR']: self.pglog(self.PGLOG['SYSERR'], self.LGEREX) diff --git a/src/rda_python_miscs/rdaps.usg b/src/rda_python_miscs/rdaps.usg index 5caf1d9..c85a20c 100644 --- a/src/rda_python_miscs/rdaps.usg +++ b/src/rda_python_miscs/rdaps.usg @@ -7,7 +7,7 @@ - Option -a, application name of the process; - Option -h, hostname the process is on; omit it for local process, - but it is mandatory if the process id is a SLURM/PBS bactch id. + but it is mandatory if the process id is a PBS bactch id. - Option -p, the local process or batch job id to be checked;