Skip to content

Commit

Permalink
WIP test RPU
Browse files Browse the repository at this point in the history
FIXME: had to keep a copy of perform_upgrade() after its functionality
was moved to vm_booted_with_installer fixture, must find a proper solution
for that
  • Loading branch information
ydirson committed Sep 18, 2024
1 parent 020c7ae commit a737cc5
Show file tree
Hide file tree
Showing 4 changed files with 177 additions and 15 deletions.
68 changes: 67 additions & 1 deletion lib/installer.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
import logging
import os
import time
import xml.etree.ElementTree as ET

from lib.commands import ssh, SSHCommandFailed
from lib import pxe
from lib.commands import local_cmd, scp, ssh, SSHCommandFailed
from lib.common import wait_for

from data import ISOSR_SRV, ISOSR_PATH

class AnswerFile:
def __init__(self, kind, /):
from data import BASE_ANSWERFILES
Expand Down Expand Up @@ -128,6 +132,68 @@ def monitor_upgrade(*, ip):
).returncode == 1,
"Wait for installer to terminate")

# FIXME essentially duplicates vm_booted_with_installer fixture
def perform_upgrade(*, iso, host_vm):
vif = host_vm.vifs()[0]
mac_address = vif.param_get('MAC')
logging.info("Host VM has MAC %s", mac_address)

# unique filename on server, has to work on FreeBSD-based NAS
# too, and even v14 has no tool allowing mktemp suffixes
remote_iso = ssh(ISOSR_SRV,
["python3", "-c",
'"import os, tempfile; '
f"f = tempfile.mkstemp(suffix='.iso', dir='{ISOSR_PATH}')[1];"
"os.chmod(f, 0o644);"
'print(f);"'
])
logging.info("Uploading to ISO-SR %s as %s", iso, os.path.basename(remote_iso))
try:
scp(ISOSR_SRV, iso, remote_iso)
# FIXME: run sr-scan
host_vm.insert_cd(os.path.basename(remote_iso))

try:
pxe.arp_clear_for(mac_address)

host_vm.start()
wait_for(host_vm.is_running, "Wait for host VM running")

# catch host-vm IP address
wait_for(lambda: pxe.arp_addresses_for(mac_address),
"Wait for DHCP server to see Host VM in ARP tables",
timeout_secs=10 * 60)
ips = pxe.arp_addresses_for(mac_address)
logging.info("Host VM has IPs %s", ips)
assert len(ips) == 1
host_vm.ip = ips[0]

# host may not be up if ARP cache was filled
wait_for(lambda: local_cmd(["ping", "-c1", host_vm.ip], check=False),
"Wait for host up", timeout_secs=10 * 60, retry_delay_secs=10)
wait_for(lambda: local_cmd(["nc", "-zw5", host_vm.ip, "22"], check=False),
"Wait for ssh up on host", timeout_secs=10 * 60, retry_delay_secs=5)

yield host_vm

logging.info("Shutting down Host VM")
poweroff(host_vm.ip)
wait_for(host_vm.is_halted, "Wait for host VM halted")

except Exception as e:
logging.critical("caught exception %s", e)
host_vm.shutdown(force=True)
raise
except KeyboardInterrupt:
logging.warning("keyboard interrupt")
host_vm.shutdown(force=True)
raise

host_vm.eject_cd()
finally:
logging.info("Removing %s from ISO-SR server", os.path.basename(remote_iso))
ssh(ISOSR_SRV, ["rm", remote_iso])

def monitor_restore(*, ip):
# wait for "yum install" phase to start
wait_for(lambda: ssh(ip, ["grep",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
tests/install/test_pool.py::test_pool_rpu[uefi-821.1-83nightly]
1 change: 0 additions & 1 deletion tests/install/test-sequences/uefi-82-nosr+pool.lst
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
tests/install/test.py::TestNested::test_tune_firstboot[None-uefi-821.1-host2-iso-nosr]
tests/install/test.py::TestNested::test_boot_inst[uefi-821.1-host2-iso-nosr]
tests/install/test_pool.py::test_join_pool[uefi-821.1]
122 changes: 109 additions & 13 deletions tests/install/test_pool.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,21 @@
import os
import pytest

from lib import pxe
from lib.common import wait_for
from lib import commands, installer, pxe
from lib.common import wait_for, vm_image
from lib.installer import AnswerFile
from lib.pool import Pool

from data import HOSTS_IP_CONFIG
from data import HOSTS_IP_CONFIG, NFS_DEVICE_CONFIG

MAINTESTS = "tests/install/test.py::TestNested"

# FIXME without --ignore-unknown-dependency, SKIPPED
# "because it depends on tests/install/test.py::TestNested::test_firstboot_install[uefi-821.1-host1-iso-nosr]"
@pytest.mark.usefixtures("xcpng_chained")
@pytest.mark.parametrize("mode", (
"821.1",
))
@pytest.mark.parametrize(("orig_version", "iso_version"), [
("821.1", "83nightly"),
])
@pytest.mark.parametrize("firmware", ("uefi", "bios"))
@pytest.mark.continuation_of(
lambda params, firmware: [
Expand All @@ -27,35 +28,130 @@
image_test=f"{MAINTESTS}::test_firstboot_install[{firmware}-{params}-host2-iso-nosr]",
scope="session"),
],
param_mapping={"params": "mode", "firmware": "firmware"})
def test_join_pool(firmware, mode, create_vms):
param_mapping={"params": "orig_version", "firmware": "firmware"})
@pytest.mark.installer_iso(
lambda iso_version: iso_version,
param_mapping={"iso_version": "iso_version"})
@pytest.mark.answerfile(
lambda firmware: AnswerFile("UPGRADE").top_append(
{"TAG": "source", "type": "local"},
{"TAG": "existing-installation", "CONTENTS": {"uefi": "nvme0n1", "bios": "sda"}[firmware]},
),
param_mapping={"firmware": "firmware"})
def test_pool_rpu(remastered_iso, create_vms,
firmware, orig_version, iso_version):
(master_vm, slave_vm) = create_vms
master_mac = master_vm.vifs()[0].param_get('MAC')
logging.info("Master VM has MAC %s", master_mac)
slave_mac = slave_vm.vifs()[0].param_get('MAC')
logging.info("Slave VM has MAC %s", slave_mac)

pxe.arp_clear_for(master_mac)
master_vm.start()
pxe.arp_clear_for(slave_mac)
slave_vm.start()
wait_for(master_vm.is_running, "Wait for master VM running")
wait_for(slave_vm.is_running, "Wait for slave VM running")

master_vm.ip = HOSTS_IP_CONFIG['HOSTS']['DEFAULT']
logging.info("Expecting master VM to have IP %s", master_vm.ip)

slave_vm.ip = HOSTS_IP_CONFIG['HOSTS']['host2']
logging.info("Expecting slave VM to have IP %s", slave_vm.ip)

wait_for(lambda: not os.system(f"nc -zw5 {master_vm.ip} 22"),
"Wait for ssh up on Master VM", retry_delay_secs=5)
wait_for(lambda: not os.system(f"nc -zw5 {slave_vm.ip} 22"),
"Wait for ssh up on Slave VM", retry_delay_secs=5)

pool = Pool(master_vm.ip)

# create pool with shared SR

slave = Pool(slave_vm.ip).master
slave.join_pool(pool)

slave.shutdown()
sr = pool.master.sr_create("nfs", "NFS Shared SR", NFS_DEVICE_CONFIG,
shared=True, verify=True)

# create and start VMs
vms = (
pool.master.import_vm(vm_image('mini-linux-x86_64-bios'), sr_uuid=sr.uuid),
pool.master.import_vm(vm_image('mini-linux-x86_64-bios'), sr_uuid=sr.uuid),
)

for vm in vms:
vm.start()

wait_for(lambda: all(vm.is_running() for vm in vms), "Wait for VMs running")
wait_for(lambda: all(vm.try_get_and_store_ip() for vm in vms),
"Wait for VM IPs", timeout_secs=5 * 60)
wait_for(lambda: all(vm.is_management_agent_up() for vm in vms),
"Wait for management agents up")

logging.info("VMs dispatched as %s", [vm.get_residence_host().uuid for vm in vms])

# do RPU

# evacuate master
vms_to_migrate = [vm for vm in vms if vm.get_residence_host().uuid == pool.master.uuid]
logging.info("Expecting migration of %s", ([vm.uuid for vm in vms_to_migrate],))
pool.master.xe("host-evacuate", {"host": pool.master.uuid})
wait_for(lambda: all(vm.get_residence_host().uuid != pool.master.uuid for vm in vms_to_migrate),
"Wait for VM migration")

# upgrade master
pool.master.shutdown()
wait_for(lambda: master_vm.is_halted(), "Wait for Master VM to be halted", timeout_secs=5 * 60)
installer.perform_upgrade(iso=remastered_iso, host_vm=master_vm)
pxe.arp_clear_for(master_mac)
master_vm.start()
wait_for(master_vm.is_running, "Wait for Master VM running")

wait_for(lambda: slave_vm.is_halted(), "Wait for Slave VM to be halted")
wait_for(lambda: master_vm.is_halted(), "Wait for Master VM to be halted")
wait_for(lambda: pxe.arp_addresses_for(master_mac),
"Wait for DHCP server to see Master VM in ARP tables",
timeout_secs=10 * 60)
ips = pxe.arp_addresses_for(master_mac)
logging.info("Master VM has IPs %s", ips)
assert len(ips) == 1
master_vm.ip = ips[0]

wait_for(lambda: not os.system(f"nc -zw5 {master_vm.ip} 22"),
"Wait for ssh back up on Master VM", retry_delay_secs=5)
wait_for(pool.master.is_enabled, "Wait for XAPI to be ready", timeout_secs=30 * 60)

# evacuate slave
vms_to_migrate = vms
logging.info("Expecting migration of %s", ([vm.uuid for vm in vms_to_migrate],))
pool.master.xe("host-evacuate", {"host": slave.uuid})
wait_for(lambda: all(vm.get_residence_host().uuid != slave.uuid for vm in vms),
"Wait for VM migration")

# upgrade slave
slave.shutdown()
wait_for(lambda: slave_vm.is_halted(), "Wait for Slave VM to be halted", timeout_secs=5 * 60)
installer.perform_upgrade(iso=remastered_iso, host_vm=slave_vm)
pxe.arp_clear_for(slave_mac)
slave_vm.start()
wait_for(slave_vm.is_running, "Wait for Slave VM running")

wait_for(lambda: pxe.arp_addresses_for(slave_mac),
"Wait for DHCP server to see Slave VM in ARP tables",
timeout_secs=10 * 60)
ips = pxe.arp_addresses_for(slave_mac)
logging.info("Slave VM has IPs %s", ips)
assert len(ips) == 1
slave_vm.ip = ips[0]

wait_for(lambda: not os.system(f"nc -zw5 {slave_vm.ip} 22"),
"Wait for ssh back up on Slave VM", retry_delay_secs=5)
wait_for(slave.is_enabled, "Wait for XAPI to be ready", timeout_secs=30 * 60)

logging.info("Migrating a VM back to slave")
vms[1].migrate(slave)

# cleanup

slave.shutdown()
pool.master.shutdown()
wait_for(lambda: slave_vm.is_halted(), "Wait for Slave VM to be halted", timeout_secs=5 * 60)
wait_for(lambda: master_vm.is_halted(), "Wait for Master VM to be halted", timeout_secs=5 * 60)
# FIXME destroy shared SR contents

0 comments on commit a737cc5

Please sign in to comment.