Skip to content

Commit 20c61e3

Browse files
committed
BF: CS-1288: Core binding (explicit strategy) does not reserve cores correctly
1 parent 17e9190 commit 20c61e3

File tree

1 file changed

+31
-14
lines changed

1 file changed

+31
-14
lines changed

source/libs/sgeobj/sge_binding.cc

Lines changed: 31 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@
3535
/*___INFO__MARK_END__*/
3636

3737
#include "uti/sge_binding_hlp.h"
38+
#include "uti/sge_log.h"
39+
#include "uti/sge_rmon_macros.h"
3840

3941
#if defined(BINDING_SOLARIS)
4042
# include <sys/processor.h>
@@ -1270,10 +1272,12 @@ static bool account_job_on_topology(char** topology, const int topology_length,
12701272
* SEE ALSO
12711273
* ???/???
12721274
*******************************************************************************/
1273-
bool binding_explicit_check_and_account(const int* list_of_sockets, const int samount,
1274-
const int* list_of_cores, const int score, char** topo_used_by_job,
1275-
int* topo_used_by_job_length)
1275+
bool
1276+
binding_explicit_check_and_account(const int* list_of_sockets, const int samount,
1277+
const int* list_of_cores, const int score,
1278+
char** topo_used_by_job, int* topo_used_by_job_length)
12761279
{
1280+
DENTER(TOP_LAYER);
12771281
int i;
12781282

12791283
/* position of <socket>,<core> in topology string */
@@ -1282,8 +1286,7 @@ bool binding_explicit_check_and_account(const int* list_of_sockets, const int sa
12821286
bool possible = true;
12831287

12841288
/* input parameter validation */
1285-
if (samount != score || samount <= 0 || list_of_sockets == nullptr
1286-
|| list_of_cores == nullptr) {
1289+
if (samount != score || samount <= 0 || list_of_sockets == nullptr || list_of_cores == nullptr) {
12871290
return false;
12881291
}
12891292

@@ -1295,28 +1298,39 @@ bool binding_explicit_check_and_account(const int* list_of_sockets, const int sa
12951298
return false;
12961299
}
12971300
}
1301+
1302+
DPRINTF("binding_explicit_check_and_account: logical_used_topology=%s; %d\n", logical_used_topology, logical_used_topology_length);
12981303

12991304
/* create output string */
13001305
get_execd_topology(topo_used_by_job, topo_used_by_job_length);
13011306

1307+
DPRINTF("binding_explicit_check_and_account: topo_used_by_job=%s; %d\n", *topo_used_by_job, *topo_used_by_job_length);
1308+
13021309
/* go through the <socket>,<core> pair list */
13031310
for (i = 0; i < samount; i++) {
13041311

1312+
DPRINTF("binding_explicit_check_and_account: checking socket %d, core %d\n", list_of_sockets[i], list_of_cores[i]);
1313+
13051314
/* get position in topology string */
1306-
if ((pos = get_position_in_topology(list_of_sockets[i], list_of_cores[i],
1307-
logical_used_topology, logical_used_topology_length)) < 0) {
1308-
/* the <socket>,<core> does not exist */
1309-
possible = false;
1310-
break;
1311-
}
1315+
if ((pos = get_position_in_topology(list_of_sockets[i], list_of_cores[i],
1316+
logical_used_topology, logical_used_topology_length)) < 0) {
1317+
/* the <socket>,<core> does not exist */
1318+
DPRINTF("binding_explicit_check_and_account: position in logical_used_topology not found for socket %d, core %d\n", list_of_sockets[i], list_of_cores[i]);
1319+
possible = false;
1320+
break;
1321+
}
1322+
1323+
DPRINTF("binding_explicit_check_and_account: position in logical_used_topology string is %d\n", pos);
13121324

13131325
/* check if this core is available (DG TODO introduce threads) */
13141326
if (logical_used_topology[pos] == 'C') {
13151327
/* do temporarily account it */
13161328
(*topo_used_by_job)[pos] = 'c';
13171329
/* thread binding: account threads here */
13181330
account_all_threads_after_core(topo_used_by_job, pos);
1331+
DPRINTF("topo_used_by_job after accounting found core at pos %d: %s\n", pos, *topo_used_by_job);
13191332
} else {
1333+
DPRINTF("binding_explicit_check_and_account: core at position %d is not available\n", pos);
13201334
/* core not usable -> early abort */
13211335
possible = false;
13221336
break;
@@ -1327,10 +1341,13 @@ bool binding_explicit_check_and_account(const int* list_of_sockets, const int sa
13271341
if (possible) {
13281342
if (!account_job_on_topology(&logical_used_topology, logical_used_topology_length,
13291343
*topo_used_by_job, *topo_used_by_job_length)) {
1344+
DPRINTF("binding_explicit_check_and_account: accounting on logical_used_topology failed\n");
13301345
possible = false;
13311346
}
13321347
}
13331348

1349+
DPRINTF("binding_explicit_check_and_account: logical_used_topology after accounting: %s\n", logical_used_topology);
1350+
13341351
/* free memory when unsuccessful */
13351352
if (!possible) {
13361353
sge_free(topo_used_by_job);
@@ -3266,16 +3283,16 @@ static int get_position_in_topology(const int socket, const int core,
32663283
}
32673284

32683285
for (i = 0; i < topology_length && topology[i] != '\0'; i++) {
3269-
if (topology[i] == 'S') {
3286+
if (topology[i] == 'S' || topology[i] == 's') {
32703287
/* we've got a new socket */
32713288
s++;
32723289
/* invalidate core counter */
32733290
c = -1;
3274-
} else if (topology[i] == 'C') {
3291+
} else if (topology[i] == 'C' || topology[i] == 'c') {
32753292
/* we've got a new core */
32763293
c++;
32773294
/* invalidate thread counter */
3278-
} else if (topology[i] == 'T') {
3295+
} else if (topology[i] == 'T' || topology[i] == 't') {
32793296
/* we've got a new thread */
32803297
}
32813298
/* check if we are at the position seeking for */

0 commit comments

Comments
 (0)