1
1
#include " Timers.hxx"
2
2
3
+ #include " sdpb_util/Proc_Meminfo.hxx"
4
+
3
5
namespace
4
6
{
5
- // TODO move to separate file
7
+ // Convert bytes to gigabytes
8
+ double to_GB (size_t bytes)
9
+ {
10
+ return static_cast <double >(bytes) / 1024 / 1024 / 1024 ;
11
+ }
12
+
13
+ // TODO print_statm() is currently unused
6
14
7
15
// /proc/self/statm displays the following quantities:
8
16
// size resident shared text lib data dt
@@ -18,100 +26,36 @@ namespace
18
26
El::Output (prefix, stats);
19
27
}
20
28
}
29
+ }
21
30
22
- // /proc/meminfo can be different on different OSs.
23
- // Usually (e.g. on CentOS) it looks like
24
- // MemTotal: 131189996 kB
25
- // MemFree: 24211752 kB
26
- // MemAvailable: 69487008 kB
27
- // ...
28
- // We print MemAvailable (RAM available for allocation)
29
- // and MemUsed defined as MemUsed = MemTotal - MemAvailable.
30
- // MemUsed is RAM that is occupied by all processes and cannot be released
31
- // (i.e. it doesn't include cache)
32
- void print_meminfo (const std::string &prefix)
33
- {
34
- const char *proc_meminfo_path = " /proc/meminfo" ;
35
- std::ifstream meminfo_file (proc_meminfo_path);
36
-
37
- if (!meminfo_file.good ())
38
- return ;
39
-
40
- const char *mem_total_prefix = " MemTotal:" ;
41
- const char *mem_available_prefix = " MemAvailable:" ;
42
- size_t memTotalKB = 0 ;
43
- size_t memAvailableKB = 0 ;
44
- std::string line;
45
- while (std::getline (meminfo_file, line))
46
- {
47
- std::istringstream iss (line);
48
- std::string name;
49
- size_t size;
50
- std::string kB ;
51
- if (iss >> name >> size >> kB )
52
- {
53
- if (kB != " kB" && kB != " KB" )
54
- {
55
- El::Output (proc_meminfo_path,
56
- " : expected \" kB\" at the end of line: " , line);
57
- return ;
58
- }
59
- if (name == mem_total_prefix)
60
- memTotalKB = size;
61
- else if (name == mem_available_prefix)
62
- memAvailableKB = size;
63
- if (memTotalKB > 0 && memAvailableKB > 0 )
64
- break ;
65
- }
66
- else
67
- {
68
- El::Output (proc_meminfo_path, " : cannot parse line: " , line);
69
- return ;
70
- }
71
- }
72
-
73
- if (memTotalKB == 0 )
74
- {
75
- El::Output (proc_meminfo_path, " : " , mem_total_prefix, " not found" );
76
- return ;
77
- }
78
- if (memAvailableKB == 0 )
79
- {
80
- El::Output (proc_meminfo_path, " : " , mem_available_prefix,
81
- " not found" );
82
- return ;
83
- }
84
- auto memAvailableGB = (double )memAvailableKB / 1024 / 1024 ;
85
- auto memUsedGB = (double )(memTotalKB - memAvailableKB) / 1024 / 1024 ;
86
- El::Output (prefix, " MemAvailable, GB: " , memAvailableGB);
87
- El::Output (prefix, " MemUsed, GB: " , memUsedGB);
88
- }
31
+ Timers::Timers (bool debug) : debug(debug)
32
+ {
33
+ MPI_Comm_split_type (MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, 0 , MPI_INFO_NULL,
34
+ &comm_shared_mem.comm );
35
+ }
89
36
90
- void print_debug_info (const std::string &name)
91
- {
92
- std::ostringstream ss;
93
- ss << El::mpi::Rank () << " " << name << " " ;
94
- auto prefix = ss.str ();
95
-
96
- print_statm (prefix);
97
-
98
- // /proc/meminfo is the same for all processes in node,
99
- // so we print it only for rank 0.
100
- // TODO: print meminfo for a first process of each node
101
- // (makes sense if RAM is not distributed equally among the nodes)
102
- if (El::mpi::Rank () == 0 )
103
- print_meminfo (prefix);
104
- }
37
+ Timers::~Timers () noexcept
38
+ {
39
+ try
40
+ {
41
+ if (debug)
42
+ print_max_mem_used ();
43
+ }
44
+ catch (...)
45
+ {
46
+ // destructors should never throw exceptions
47
+ }
105
48
}
106
49
107
- Timers::Timers (bool debug) : debug(debug) {}
108
50
Timer &Timers::add_and_start (const std::string &name)
109
51
{
110
52
std::string full_name = prefix + name;
53
+
111
54
if (debug)
112
- print_debug_info (full_name);
113
- emplace_back (full_name, Timer ());
114
- return back ().second ;
55
+ print_meminfo (full_name);
56
+
57
+ named_timers.emplace_back (full_name, Timer ());
58
+ return named_timers.back ().second ;
115
59
}
116
60
void Timers::write_profile (const std::filesystem::path &path) const
117
61
{
@@ -120,11 +64,11 @@ void Timers::write_profile(const std::filesystem::path &path) const
120
64
std::ofstream f (path);
121
65
122
66
f << " {" << ' \n ' ;
123
- for (auto it (begin ()); it != end ();)
67
+ for (auto it (named_timers. begin ()); it != named_timers. end ();)
124
68
{
125
69
f << " {\" " << it->first << " \" , " << it->second << " }" ;
126
70
++it;
127
- if (it != end ())
71
+ if (it != named_timers. end ())
128
72
{
129
73
f << " ," ;
130
74
}
@@ -139,13 +83,67 @@ void Timers::write_profile(const std::filesystem::path &path) const
139
83
}
140
84
int64_t Timers::elapsed_milliseconds (const std::string &s) const
141
85
{
142
- auto iter (std::find_if (rbegin (), rend (),
86
+ auto iter (std::find_if (named_timers. rbegin (), named_timers. rend (),
143
87
[&s](const std::pair<std::string, Timer> &timer) {
144
88
return timer.first == s;
145
89
}));
146
- if (iter == rend ())
90
+ if (iter == named_timers. rend ())
147
91
{
148
92
throw std::runtime_error (" Could not find timing for " + s);
149
93
}
150
94
return iter->second .elapsed_milliseconds ();
151
95
}
96
+
97
+ void Timers::print_max_mem_used () const
98
+ {
99
+ if (max_mem_used > 0 && !max_mem_used_name.empty ())
100
+ {
101
+ El::Output (El::mpi::Rank (), " max MemUsed: " , to_GB (max_mem_used),
102
+ " GB at \" " , max_mem_used_name, " \" " );
103
+ }
104
+ }
105
+
106
+ void Timers::print_meminfo (const std::string &name)
107
+ {
108
+ // Print data from /proc/meminfo only for a first rank of each node
109
+ if (comm_shared_mem.Rank () != 0 )
110
+ return ;
111
+
112
+ auto prefix = El::BuildString (El::mpi::Rank (), " " , name, " " );
113
+
114
+ // Print memory usage for the current node (from the first rank).
115
+ // If we cannot parse /proc/meminfo, then simply print timer name.
116
+
117
+ if (!can_read_meminfo)
118
+ {
119
+ El::Output (prefix);
120
+ return ;
121
+ }
122
+
123
+ bool result;
124
+ constexpr bool print_error_msg = true ;
125
+ const auto meminfo = Proc_Meminfo::try_read (result, print_error_msg);
126
+ if (!result)
127
+ {
128
+ can_read_meminfo = false ;
129
+ El::Output (" Printing RAM usage will be disabled." );
130
+ El::Output (prefix);
131
+ return ;
132
+ }
133
+
134
+ // MemTotal is constant, thus we print it only once, when adding first timer
135
+ if (named_timers.empty ())
136
+ {
137
+ El::Output (prefix, " --- MemTotal: " , to_GB (meminfo.mem_total ), " GB" );
138
+ }
139
+
140
+ // Print MemUsed each time
141
+ El::Output (prefix, " --- MemUsed: " , to_GB (meminfo.mem_used ()), " GB" );
142
+
143
+ // Update max MemUsed info
144
+ if (meminfo.mem_used () > max_mem_used)
145
+ {
146
+ max_mem_used = meminfo.mem_used ();
147
+ max_mem_used_name = name;
148
+ }
149
+ }
0 commit comments