Skip to content

Commit a2f2e87

Browse files
authored
linstor: improve heartbeat check with also asking linstor (#10105)
If a node doesn't have a DRBD connection to another node, additionally ask Linstor-Controller if the node is alive. Otherwise we would have simply said no and the node might still be alive. This is always the case in a non hyperconverged setup.
1 parent 8639ba8 commit a2f2e87

File tree

3 files changed

+57
-5
lines changed

3 files changed

+57
-5
lines changed

plugins/storage/volume/linstor/CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,12 @@ All notable changes to Linstor CloudStack plugin will be documented in this file
55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
66
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
77

8+
## [2024-12-13]
9+
10+
### Fixed
11+
12+
- Linstor heartbeat check now also ask linstor-controller if there is no connection between nodes
13+
814
## [2024-10-28]
915

1016
### Fixed

plugins/storage/volume/linstor/src/main/java/com/cloud/hypervisor/kvm/storage/LinstorStorageAdaptor.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
import com.linbit.linstor.api.DevelopersApi;
4646
import com.linbit.linstor.api.model.ApiCallRc;
4747
import com.linbit.linstor.api.model.ApiCallRcList;
48+
import com.linbit.linstor.api.model.Node;
4849
import com.linbit.linstor.api.model.Properties;
4950
import com.linbit.linstor.api.model.ProviderKind;
5051
import com.linbit.linstor.api.model.Resource;
@@ -712,4 +713,19 @@ public long getUsed(LinstorStoragePool pool) {
712713
throw new CloudRuntimeException(apiEx.getBestMessage(), apiEx);
713714
}
714715
}
716+
717+
public boolean isNodeOnline(LinstorStoragePool pool, String nodeName) {
718+
DevelopersApi linstorApi = getLinstorAPI(pool);
719+
try {
720+
List<Node> node = linstorApi.nodeList(Collections.singletonList(nodeName), Collections.emptyList(), null, null);
721+
if (node == null || node.isEmpty()) {
722+
return false;
723+
}
724+
725+
return Node.ConnectionStatusEnum.ONLINE.equals(node.get(0).getConnectionStatus());
726+
} catch (ApiException apiEx) {
727+
s_logger.error(apiEx.getMessage());
728+
throw new CloudRuntimeException(apiEx.getBestMessage(), apiEx);
729+
}
730+
}
715731
}

plugins/storage/volume/linstor/src/main/java/com/cloud/hypervisor/kvm/storage/LinstorStoragePool.java

Lines changed: 35 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -279,22 +279,52 @@ private String executeDrbdSetupStatus(OutputInterpreter.AllLinesParser parser) {
279279
return sc.execute(parser);
280280
}
281281

282+
private boolean checkLinstorNodeOnline(String nodeName) {
283+
return ((LinstorStorageAdaptor)_storageAdaptor).isNodeOnline(this, nodeName);
284+
}
285+
286+
/**
287+
* Checks output of drbdsetup status output if this node has any valid connection to the specified
288+
* otherNodeName.
289+
* If there is no connection, ask the Linstor controller if the node is seen online and return false if not.
290+
* If there is a connection but not connected(valid) return false.
291+
* @param output Output of the drbdsetup status --json command
292+
* @param otherNodeName Name of the node to check against
293+
* @return true if we could say that this node thinks the node in question is reachable, otherwise false.
294+
*/
282295
private boolean checkDrbdSetupStatusOutput(String output, String otherNodeName) {
283296
JsonParser jsonParser = new JsonParser();
284297
JsonArray jResources = (JsonArray) jsonParser.parse(output);
298+
boolean connectionFound = false;
285299
for (JsonElement jElem : jResources) {
286300
JsonObject jRes = (JsonObject) jElem;
287301
JsonArray jConnections = jRes.getAsJsonArray("connections");
288302
for (JsonElement jConElem : jConnections) {
289303
JsonObject jConn = (JsonObject) jConElem;
290-
if (jConn.getAsJsonPrimitive("name").getAsString().equals(otherNodeName)
291-
&& jConn.getAsJsonPrimitive("connection-state").getAsString().equalsIgnoreCase("Connected")) {
292-
return true;
304+
if (jConn.getAsJsonPrimitive("name").getAsString().equals(otherNodeName))
305+
{
306+
connectionFound = true;
307+
if (jConn.getAsJsonPrimitive("connection-state").getAsString()
308+
.equalsIgnoreCase("Connected")) {
309+
return true;
310+
}
293311
}
294312
}
295313
}
296-
s_logger.warn(String.format("checkDrbdSetupStatusOutput: no resource connected to %s.", otherNodeName));
297-
return false;
314+
boolean otherNodeOnline = false;
315+
if (connectionFound) {
316+
s_logger.warn(String.format(
317+
"checkingHeartBeat: connection found, but not in state 'Connected' to %s", otherNodeName));
318+
} else {
319+
s_logger.warn(String.format(
320+
"checkingHeartBeat: no resource connected to %s, checking LINSTOR", otherNodeName));
321+
otherNodeOnline = checkLinstorNodeOnline(otherNodeName);
322+
}
323+
s_logger.info(String.format(
324+
"checkingHeartBeat: other node %s is %s.",
325+
otherNodeName,
326+
otherNodeOnline ? "online on controller" : "down"));
327+
return otherNodeOnline;
298328
}
299329

300330
private String executeDrbdEventsNow(OutputInterpreter.AllLinesParser parser) {

0 commit comments

Comments
 (0)