Skip to content

Commit 237c8cb

Browse files
committed
BF: CS-1594 closing all file descriptors (e.g. in sge_execd / sge_shepherd) depends on the open files limit and can take quite some time
1 parent db409cd commit 237c8cb

File tree

4 files changed

+323
-36
lines changed

4 files changed

+323
-36
lines changed

source/libs/uti/sge_os.cc

Lines changed: 141 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,13 @@
2727
*
2828
* All Rights Reserved.
2929
*
30-
* Portions of this software are Copyright (c) 2023-2024 HPC-Gridware GmbH
30+
* Portions of this software are Copyright (c) 2023-2025 HPC-Gridware GmbH
3131
*
3232
************************************************************************/
3333
/*___INFO__MARK_END__*/
34+
#include <filesystem>
35+
#include <string>
36+
3437
#include <cstdio>
3538
#include <cstdlib>
3639
#include <cstring>
@@ -39,6 +42,7 @@
3942
#include <cctype>
4043
#include <fcntl.h>
4144
#include <sys/stat.h>
45+
#include <dirent.h>
4246

4347
#ifdef SIGTSTP
4448

@@ -63,8 +67,6 @@
6367

6468
#include "msg_common.h"
6569

66-
static int fd_compare(const void *fd1, const void *fd2);
67-
6870
static void sge_close_fd(int fd);
6971

7072
/****** uti/os/sge_get_pids() *************************************************
@@ -378,6 +380,7 @@ int sge_get_max_fd() {
378380
return sysconf(_SC_OPEN_MAX);
379381
}
380382

383+
#if not defined(LINUX) && not defined(SOLARIS)
381384
/****** uti/os/fd_compare() ****************************************************
382385
* NAME
383386
* fd_compare() -- file descriptor compare function for qsort()
@@ -431,7 +434,80 @@ static int fd_compare(const void *fd1, const void *fd2) {
431434
}
432435
return 0;
433436
}
437+
#endif
438+
439+
#if defined(LINUX) || defined(SOLARIS)
440+
/**
441+
* @brief Get all open file descriptors for a process
442+
*
443+
* Retrieves all currently open file descriptors for the specified process
444+
* by reading the /proc/[pid]/fdinfo directory (or /proc/self/fdinfo if pid is 0).
445+
* The function uses opendir() to iterate over the directory entries and
446+
* automatically excludes the file descriptor used by the directory stream itself.
447+
*
448+
* @param pid Process id (0 for current process/self)
449+
* @return std::set<int> Set containing all open file descriptor numbers
450+
* @note MT-SAFE: get_all_fds() is MT safe
451+
* @see sge_close_all_fds()
452+
*/
453+
std::set<int> get_all_fds(pid_t pid) {
454+
DENTER(TOP_LAYER);
434455

456+
std::set<int> fds;
457+
458+
std::filesystem::path proc_path = "/proc/";
459+
if (pid == 0) {
460+
proc_path += "self/fdinfo";
461+
} else {
462+
proc_path += std::to_string(pid) + "/fdinfo";
463+
}
464+
#if 0
465+
// Unfortunately, we cannot use the C++ directory_iterator for iterating over /proc/*/fdinfo:
466+
// It opens itself a file handle which shows up in the /proc/*/fdinfo - but we cannot figure
467+
// out which one belongs to the iterator!
468+
// With opendir() we can use dirfd() to get the fd of the directory stream itself and can ignore it.
469+
try {
470+
std::filesystem::directory_iterator iter{proc_path};
471+
for (const auto &entry : iter) {
472+
int fd = std::stoi(entry.path().filename());
473+
fds.insert(fd);
474+
}
475+
} catch (std::filesystem::filesystem_error &e) {
476+
// this should never happen
477+
}
478+
#else
479+
DIR *cwd = opendir(proc_path.c_str());
480+
if (cwd == nullptr) {
481+
DPRINTF("get_all_fds(): cannot open directory %s: %s", proc_path.c_str(), strerror(errno));
482+
} else {
483+
// Iterate over the directory stream.
484+
// Do not use readdir.2 or readdir_r - they are deprecated.
485+
// See readdir.3 man page.
486+
int cwd_fd = dirfd(cwd);
487+
dirent *dent;
488+
while ((dent = readdir(cwd)) != nullptr) {
489+
if (dent->d_name[0] == '\0') {
490+
DPRINTF("get_all_fds(): empty filename in directory %s", proc_path.c_str());
491+
} else if (strcmp(dent->d_name, "..") == 0 || strcmp(dent->d_name, ".") == 0) {
492+
DPRINTF("get_all_fds(): skipping %s", dent->d_name);
493+
} else {
494+
int fd = std::stoi(dent->d_name);
495+
if (fd == cwd_fd) {
496+
DPRINTF("get_all_fds(): skipping opendir() internal fd %d", fd);
497+
} else {
498+
// This is a valid fd, store it in the returned set.
499+
fds.insert(fd);
500+
}
501+
}
502+
}
503+
closedir(cwd);
504+
}
505+
506+
#endif
507+
508+
DRETURN(fds);
509+
}
510+
#endif
435511

436512
/****** uti/os/sge_close_fd() **************************************************
437513
* NAME
@@ -455,43 +531,74 @@ static int fd_compare(const void *fd1, const void *fd2) {
455531
* uti/os/sge_close_all_fds()
456532
*******************************************************************************/
457533
static void sge_close_fd(int fd) {
534+
DENTER(TOP_LAYER);
458535
#ifdef __INSURE__
459536
if (_insure_is_internal_fd(fd)) {
460537
return;
461538
}
462539
#endif
463-
close(fd);
540+
#if 1
541+
if (close(fd) == -1) {
542+
DPRINTF("close(%d) failed: %s\n", fd, strerror(errno));
543+
}
544+
#else
545+
// @todo When closing all fds is called for fork()/exec(), we could just set the FD_CLOEXEC flag
546+
// on the file descriptor. This will not close the fd, but just mark it to be closed on any
547+
// of the various exec() calls.
548+
// A short test showed that setting the flag instead of closing the fd is about 30% faster.
549+
// It would be worth doing only if the caller has a high number of file descriptors open,
550+
// which is not the case on the execution side, but might be in sge_qmaster,
551+
// e.g., when starting a JSV script in a big cluster.
552+
if (fcntl(fd, F_SETFD, FD_CLOEXEC) == -1) {
553+
DPRINTF("fcntl(%d, F_SETFD, FD_CLOEXEC) failed: %s\n", fd, strerror(errno));
554+
}
555+
#endif
556+
DRETURN_VOID;
557+
}
558+
559+
/**
560+
* @brief close all open file descriptors
561+
*
562+
* This function is used to close all open file descriptors for
563+
* the current process.
564+
*
565+
* It is possible to pass a list (int array) of file descriptors which shall
566+
* be kept open.
567+
*
568+
* The algorithm used depends on the operating system:
569+
* * On Linux and Solaris we can figure out the actually open file descriptors
570+
* by looping over all files in `/proc/self/fdinfo` and closing them.
571+
* * On other OSes we loop from 0 to the maximum possible file descriptor
572+
* and (try to) close them.
573+
* * On newer Linux versions we could possibly use the `close_range()` function,
574+
* ideally using the flag CLOSE_RANGE_CLOEXEC which will not immediately close
575+
* the fd but just mark it to be closed on `exec()` calls - which is
576+
* a situation where we call `close_all_fds()`.
577+
* But it is not yet available on our default build platform, CentOS 8.
578+
*
579+
* @param keep_open - optionally: int array of file descriptors to keep open
580+
* @param nr_of_keep_open_entries - number of entries in keep_open
581+
* @todo What about fcntl(fd, F_SETFD, FD_CLOEXEC)? See comment in sge_close_fd().
582+
*/
583+
#if defined(LINUX) || defined(SOLARIS)
584+
static bool keep_fd_open(int *keep_open, unsigned long nr_of_keep_open_entries, int fd) {
585+
for (unsigned long keep_open_array_index = 0; keep_open_array_index < nr_of_keep_open_entries; keep_open_array_index++) {
586+
if (keep_open[keep_open_array_index] == fd) {
587+
return true;
588+
}
589+
}
590+
return false;
464591
}
465592

466-
/****** uti/os/sge_close_all_fds() *********************************************
467-
* NAME
468-
* sge_close_all_fds() -- close all open file descriptors
469-
*
470-
* SYNOPSIS
471-
* void sge_close_all_fds(int* keep_open, unsigned long nr_of_fds)
472-
*
473-
* FUNCTION
474-
* This function is used to close all possible open file descriptors for
475-
* the current process. This is done by getting the max. possible file
476-
* descriptor count and looping over all file descriptors and calling
477-
* sge_close_fd().
478-
*
479-
* It is possible to specify a file descriptor set which should not be
480-
* closed.
481-
*
482-
* INPUTS
483-
* int* keep_open - integer array which contains file descriptor
484-
* ids which should not be closed
485-
* - if this value is set to nullptr nr_of_fds is
486-
* ignored
487-
* unsigned long nr_of_fds - nr of filedescriptors in the keep_open array
488-
*
489-
* RESULT
490-
* void - no result
491-
*
492-
* SEE ALSO
493-
* uti/os/sge_close_fd()
494-
*******************************************************************************/
593+
void sge_close_all_fds(int *keep_open, unsigned long nr_of_keep_open_entries) {
594+
std::set all_fds = get_all_fds();
595+
for (auto fd : all_fds) {
596+
if (keep_open == nullptr || !keep_fd_open(keep_open, nr_of_keep_open_entries, fd)) {
597+
sge_close_fd(fd);
598+
}
599+
}
600+
}
601+
#else
495602
void sge_close_all_fds(int *keep_open, unsigned long nr_of_keep_open_entries) {
496603
int maxfd = sge_get_max_fd();
497604
if (keep_open == nullptr) {
@@ -533,6 +640,7 @@ void sge_close_all_fds(int *keep_open, unsigned long nr_of_keep_open_entries) {
533640
}
534641
}
535642
}
643+
#endif
536644

537645
/****** uti/os/sge_dup_fd_above_stderr() **************************************
538646
* NAME

source/libs/uti/sge_os.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,9 @@
3333
************************************************************************/
3434
/*___INFO__MARK_END__*/
3535

36-
#include <sys/time.h>
37-
#include <unistd.h>
36+
#include <set>
37+
38+
#include <sys/types.h>
3839

3940
#include "sge_getloadavg.h"
4041
#include "sge_loadmem.h"
@@ -73,3 +74,7 @@ int sge_get_max_fd();
7374
int sge_dup_fd_above_stderr(int *fd);
7475

7576
int sge_occupy_first_three();
77+
78+
#if defined(LINUX) || defined(SOLARIS)
79+
std::set<int> get_all_fds(pid_t pid = 0);
80+
#endif

test/libs/uti/CMakeLists.txt

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#___INFO__MARK_BEGIN_NEW__
22
###########################################################################
33
#
4-
# Copyright 2024 HPC-Gridware GmbH
4+
# Copyright 2024-2025 HPC-Gridware GmbH
55
#
66
# Licensed under the Apache License, Version 2.0 (the "License");
77
# you may not use this file except in compliance with the License.
@@ -67,6 +67,11 @@ if (WITH_MUNGE)
6767
add_test(NAME test_uti_munge COMMAND test_uti_munge)
6868
endif()
6969

70+
add_executable(test_uti_os_fds test_uti_os_fds.cc)
71+
target_include_directories(test_uti_os_fds PRIVATE "./")
72+
target_link_libraries(test_uti_os_fds PRIVATE uti commlists ${SGE_LIBS})
73+
add_test(NAME test_uti_os_fds COMMAND test_uti_os_fds)
74+
7075
add_executable(test_uti_profiling test_uti_profiling.cc)
7176
target_include_directories(test_uti_profiling PRIVATE "./")
7277
target_link_libraries(test_uti_profiling PRIVATE uti commlists ${SGE_LIBS})
@@ -124,6 +129,7 @@ if (INSTALL_SGE_TEST)
124129
if (WITH_MUNGE)
125130
install(TARGETS test_uti_munge DESTINATION testbin/${SGE_ARCH})
126131
endif()
132+
install(TARGETS test_uti_os_fds DESTINATION testbin/${SGE_ARCH})
127133
install(TARGETS test_uti_profiling DESTINATION testbin/${SGE_ARCH})
128134
install(TARGETS test_uti_recursive DESTINATION testbin/${SGE_ARCH})
129135
install(TARGETS test_uti_sl DESTINATION testbin/${SGE_ARCH})

0 commit comments

Comments
 (0)