Skip to content

Commit

Permalink
Sending SIGINT to server process if error occurs
Browse files Browse the repository at this point in the history
  • Loading branch information
DylanKierans committed Jul 16, 2024
1 parent dfe257e commit 3f9d1af
Show file tree
Hide file tree
Showing 4 changed files with 110 additions and 12 deletions.
42 changes: 40 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,51 @@ Using `PMPMEAS (Poor Man's Performance Measurement tool)` to interface with papi

## Installation

Install [`libotf2`](https://www.vi-hps.org/projects/score-p/), and [`zeromq`](https://github.com/zeromq) dependency. Then install `rTrace` from github with:
Install [`libotf2`](https://www.vi-hps.org/projects/score-p/), and [`zeromq`](https://github.com/zeromq) required dependency.

Install [`papi`](https://hpc.llnl.gov/software/development-environment-software/papi-performance-application-programming-interface) optional (but recommended) dependency.

Then install `rTrace` from github with:

```R
install.packages("devtools") # if not yet installed
devtools::install_github("DylanKierans/rTrace")
```

Refer to #debugging if you are having errors.

### DEBUGGING

Dependencies not found during installation. Either dependencies are not installed or are in non-standard directories.
```
configure: error: Unable to find FOO.h
ERROR: configuration failed for package ‘rTrace’
```

1. Error - `configure: error: Unable to find zmq.h`. Solution:

```R
devtools::install_github("DylanKierans/rTrace", configure.args="--with-zmq=/path/to/zeromq/directory")
```

2. Error - `configure: error: Unable to find papi.h`. Solution:

```R
devtools::install_github("DylanKierans/rTrace", configure.args="--with-papi=/path/to/papi/directory")
```

3. Error - `configure: error: Unable to find linux/perf_event.h`. Solution:

```R
devtools::install_github("DylanKierans/rTrace", configure.args="--with-perf=/path/to/perf/directory")
```

If you are receiving error about `devtools` not installed, first install with:

```R
install.packages("devtools") # if not yet installed
```


## Usage

```R
Expand Down
2 changes: 1 addition & 1 deletion cleanup
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/bash
rm -f src/*.o src/*.so */*~ *~
rm -f src/pmpmeas/*.o src/pmpmeas/lib/*.o src/pmpmeas/src/lib/*.o
rm -f src/pmpmeas/src/lib/*.o
rm -f config.* src/Makevars src/config.h src/stamp-h1
74 changes: 65 additions & 9 deletions src/rTrace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,12 @@ OTF2_GlobalDefWriter* global_def_writer;
OTF2_TimeStamp epoch_start, epoch_end; // OTF2_GlobalDefWriter_WriteClockProperties
OTF2_EvtWriter** evt_writers;

// ZeroMQ sockets
// ZeroMQ objects
bool IS_LOGGER=false; ///* Used by report_and_exit() to abtain exit behaviour
void *context; ///* zmq context - clients and server
void *requester; ///* zmq socket - master(5555) and slaves(5559) (comm with responder for globalDefWriter)
void *pusher; ///* zmq socket - clients (comm with puller for EvtWriter)
void *puller; ///* zmq socket - clients (comm with pusher for EvtWriter)

// Counters
const OTF2_StringRef OFFSET_NUM_STRINGREF=10; ///* Offset for NUM_STRINGREF to avoid overwriting
Expand All @@ -74,9 +75,13 @@ int locationRef=0; ///* LocationRef of current client proc
// PMPMEAS - Metric collection
bool COLLECT_METRICS = false;
int NUM_METRICS=0; ///* Number of metrics, only applicable if COLLECT_METRICS is defined/enabled
long long *pmpmeas_vals;
int pmpmeas_n;
OTF2_Type *typeIDs;
long long *pmpmeas_vals; ///* Array of counter values
int pmpmeas_n; ///* Number of counters
OTF2_Type *typeIDs; ///* OTF2 type of counters (equivalent to typeof(pmpmeas_vals))

// Other
pid_t child_pid; ///* Process ID returned by fork() for server
sighandler_t default_sigint_handler;


///////////////////////////////
Expand Down Expand Up @@ -126,12 +131,56 @@ static OTF2_FlushCallbacks flush_callbacks =
// TODO: Review usage of SIGHUP during R makeCluster()
// signal_hup_handler
// @description This was introduced due to R procs being sent SIGHUP during forking
void signal_hup_handler(int signal) {
void sighup_handler(int signal) {
// Make sure only catching intended signal, else rethrow
if (signal == SIGHUP) { /*ignore*/; }
else { raise(signal); }
}

// @TODO Cleanup zmq buffers correctly
// @note Forked server process belongs to same process group, so forked process should
// be able to kill server with pid=0 also
// @description Signal handler for SIGRTRACE, clean up and exit.
// SIGRTRACE thrown by any proc (clients/server) when an error occurs in the
// native C portion of the rTrace code
void sigrtrace_handler(int signal) {
if (signal==SIGRTRACE){
if (IS_LOGGER){
if (fp==NULL){ // Open log file if needed
fp = fopen(log_filename, "a");
}
fprintf(fp, "sigfoo_handler: %d, pid: %d, ppid: %d, pgid: %d, child_pid: %d\n",
signal, getpid(), getppid(), getpgid(getppid()), child_pid);
fprintf(fp, "CLOSING DUE TO SIGRTRACE\n");
fclose(fp);

// Cleanup zmq - process pipeline then close
//finalize_zmq_server();

kill(getpid(), SIGTERM);
} else {
Rcpp::Rcout << "sigfoo_handler: " << signal << ", pid: " << getpid() <<
", ppid: " << getppid() << ", child_pid: " << child_pid << "\n";

// Cleanup zmq - process pipeline then close
//finalize_zmq_client();

if (child_pid != 0){ kill(child_pid, SIGRTRACE); }
Rcpp::stop("CLOSING DUE TO SIGRTRACE - check previous error message if sent from rTrace client\n");
//kill(getpid(), SIGTERM);
}
} else { raise(signal); }
}

// sigint_handler
// @description Signal handler for SIGINT (Ctrl+C), throws SIGRTRACE
void sigint_handler(int sig){
if (sig==SIGINT){
raise(SIGRTRACE);
signal(SIGINT, default_sigint_handler); // Reset to default handler and rethrow
raise(SIGINT);
} else { raise(sig); }
}

//////////////////////////////////////
// Spawn otf2 process, and give task list
Expand All @@ -151,7 +200,9 @@ RcppExport int init_otf2_logger(int max_nprocs, Rcpp::String archivePath,
bool flag_print_pids)
{
// TODO: Verify this acts as intended to save child proc
signal(SIGHUP, signal_hup_handler);
signal(SIGHUP, sighup_handler);
signal(SIGRTRACE, sigrtrace_handler);
default_sigint_handler = signal(SIGINT, sigint_handler);

// Set COLLECT_METRICS global on server and client before fork
if (collect_metrics){
Expand All @@ -161,7 +212,7 @@ RcppExport int init_otf2_logger(int max_nprocs, Rcpp::String archivePath,
}
COLLECT_METRICS = collect_metrics;

pid_t child_pid = fork();
child_pid = fork();
if (child_pid == (pid_t) -1 ){ // ERROR
report_and_exit("Forking logger process", NULL);
return(1);
Expand Down Expand Up @@ -1022,7 +1073,6 @@ void globalDefWriter_metrics_server()
// @param flag_lgo Log all events in log file
// and logged
void run_EvtWriters_server(bool flag_log){
void *puller; ///< Recv otf2 eventlog
void *new_proc_rep;
int zmq_ret, rc; // Debugging recv/sends and socket
Zmq_otf2_data buffer;
Expand Down Expand Up @@ -1224,6 +1274,10 @@ void report_and_exit(const char* msg, void *socket){
fprintf(fp, "File closing\n");
fclose(fp);

// Send signal to parent (master), then to self, to close
kill(getppid(), SIGRTRACE);
kill(getpid(), SIGRTRACE);

} else { // Print to Rcout (recommend using logfile for makeCluster)
FILE *fp;
char filename[20];
Expand All @@ -1241,7 +1295,9 @@ void report_and_exit(const char* msg, void *socket){
fprintf(fp, "ERROR INFO - Errno: %d\n", errno);
fclose(fp);

kill(0, SIGTERM);
// Send signal to all procs in group to close
//kill(0, SIGTERM);
kill(0, SIGRTRACE);
}

}
Expand Down
4 changes: 4 additions & 0 deletions src/rTrace.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

//#define DEBUG /* Uncomment to enable verbose debug info */
#define MAX_FUNCTION_NAME_LEN 40 // Max length of R function
#define SIGRTRACE SIGUSR1

// Different events during entry collection phase
typedef enum {
Expand Down Expand Up @@ -116,5 +117,8 @@ void set_collectMetrics(bool);

// Universal functions
OTF2_TimeStamp get_time();
void sighup_handler(int signal);
void sigrtrace_handler(int signal);
void sigint_handler(int signal);

#endif /* Include guards rTrace.h */

0 comments on commit 3f9d1af

Please sign in to comment.