From d206f0bd376451a61d161a49072c3b10062681fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Enrique=20Guti=C3=A9rrez=20Maz=C3=B3n?= Date: Wed, 14 Feb 2024 13:35:42 +0100 Subject: [PATCH] vdsm: get gluster volume info from any gluster peer The function _get_gluster_volinfo query the glusterfs volume info the the storage server, this is translated to the gluster client adding the parameter --remote-host which limits the query to one server, so we are converting the storage server as a single point of failure, if it is not available, it can led to cluster outtage. The proposed changed let the cluster cli to use any available gluster peer. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: José Enrique Gutiérrez Mazón --- lib/vdsm/storage/storageServer.py | 18 ++++++++++++--- tests/storage/storageserver_test.py | 34 +++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 3 deletions(-) diff --git a/lib/vdsm/storage/storageServer.py b/lib/vdsm/storage/storageServer.py index 3585ba8bef..3461bfee4d 100644 --- a/lib/vdsm/storage/storageServer.py +++ b/lib/vdsm/storage/storageServer.py @@ -387,9 +387,21 @@ def _get_gluster_volinfo(self): volinfo = superVdsmProxy.glusterVolumeInfo(self._volname, self._volfileserver) return volinfo[self._volname] - except ge.GlusterCmdExecFailedException as e: - log.warning("Failed to get volume info: %s", e) - return {} + except ge.GlusterException as e: + # The remote host may be down. + # If we are running on a hyperconverged system the gluster client + # can use one of the other gluster servers. + log.info("Failed to get volume info from remote server %s: %s", + self._volfileserver, e) + log.debug("Trying to get volume info from backup servers: %s", + self._options) + try: + volinfo = superVdsmProxy.glusterVolumeInfo(self._volname) + return volinfo[self._volname] + except ge.GlusterException as e: + log.warning( + "Failed to get volume info from backup servers: %s", e) + return {} class NFSConnection(Connection): diff --git a/tests/storage/storageserver_test.py b/tests/storage/storageserver_test.py index 0932080f8b..3fba78ac49 100644 --- a/tests/storage/storageserver_test.py +++ b/tests/storage/storageserver_test.py @@ -250,6 +250,40 @@ def test_glusterfs_cli_missing(self, monkeypatch): gluster = GlusterFSConnection(id="id", spec="192.168.122.1:/music") assert gluster.options == "" + @pytest.mark.parametrize("userMountOptions", [ + '', + 'backup-volfile-servers=192.168.122.1:192.168.122.2', + ]) + def test_glusterfs_retry_withotut_volfile_server(self, monkeypatch, + userMountOptions): + """ + The test will fail if called with volfileserver, simulating the case + when a remote server is down. If the caller try again without + volfileserver the call will succeed, simulating the case when running + on a hyperconverged system when the local glsuter glsuter client can + use one of the connected servers. + """ + monkeypatch.setattr(storageServer, 'supervdsm', FakeSupervdsm()) + monkeypatch.setattr(gluster_cli, 'exists', lambda: True) + + def glusterVolumeInfo(volname=None, volfileServer=None): + assert volname == "music" + if volfileServer is not None: + raise ge.GlusterException() + return {'music': {'brickCount': '2', + 'bricks': ['192.168.122.1:/tmp/music', + '192.168.122.2:/tmp/music'] + } + } + storageServer.supervdsm.glusterVolumeInfo = glusterVolumeInfo + gluster = GlusterFSConnection(id="id", spec="192.168.122.3:/music", + options=userMountOptions) + expected_volinfo = {'brickCount': '2', + 'bricks': ['192.168.122.1:/tmp/music', + '192.168.122.2:/tmp/music'] + } + assert gluster.volinfo == expected_volinfo + class TestGlusterFSNotAccessibleConnection: