Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

tests : add check on the max nb of TDs that can be run in parralel #340

Merged
merged 2 commits into from
Mar 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 22 additions & 10 deletions tests/lib/Qemu.py
Original file line number Diff line number Diff line change
Expand Up @@ -629,33 +629,45 @@ def run_and_wait(self):
self.run()
QemuMonitor(self)

def communicate(self):
def communicate(self, timeout=60):
"""
Wait for qemu to exit
"""
self.out, self.err = self.proc.communicate(timeout=60)
self.out, self.err = self.proc.communicate(timeout=timeout)
if self.proc.returncode != 0:
print(self.err.decode())
return self.out, self.err

def stop(self):
def shutdown(self):
"""
Stop qemu process
Send shutdown command to the VM
Do not wait for the VM to exit
Return false if the VM is already terminated
"""
if self.proc is None:
return
return False
if self.proc.returncode is not None:
return
return False

try:
mon = QemuMonitor(self)
mon.powerdown()
except Exception as e:
pass

# self.proc.returncode == None -> not yet terminated
return True

def stop(self):
"""
Stop qemu process
"""
if not self.shutdown():
return

try:
# try to shutdown the VM properly, this is important to avoid
# rootfs corruption if we want to run the guest again
# catch exception and ignore it since we are stopping .... no need to fail the test
mon = QemuMonitor(self)
mon.powerdown()

self.communicate()
return
except Exception as e:
Expand Down
10 changes: 10 additions & 0 deletions tests/lib/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,16 @@ def tcp_port_available():
return port

def get_max_td_vms():
"""
MKTME encryption engine is used both for legacy MKTME operation and TDX operation
The key space is partitionned in 3 ranges:
- first key
- shared keys
- TDX keys
So if we have 128 keys and we decide to split this range into 2 equal sets (in BIOS)
TDX key space will only have 63 keys instead of 64.
The nb of TDX key space can be read from the IA32_MKTME_KEYID_PARTITIONING MSR (0x87)
"""
cmd = ['rdmsr', '0x87']
rc = subprocess.run(cmd, capture_output=True)
assert rc.returncode == 0, "Failed getting max td vms"
Expand Down
48 changes: 47 additions & 1 deletion tests/tests/stress/test_stress_resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import subprocess
import time
import multiprocessing
import pytest

import Qemu
import util
Expand Down Expand Up @@ -70,15 +71,47 @@ def test_stress_max_vcpus(qm):

qm.stop()

def check_qemu_fail_to_start(qm, error_msg=None):
try:
_, err = qm.communicate(timeout=5)
except:
# if timeout, that means the QEMU is running fine
# try to connect with ssh to make sure the TD is running fine
try:
ssh = Qemu.QemuSSH(qm)
except:
# the qemu is running but we cannot connect to SSH
# we consider that the check is OK
qm.stop()
return
pytest.fail('The TD is running !')
if error_msg:
assert error_msg in err.decode()

def test_stress_max_guests():
"""
Test max guests (No Intel Case ID)

There is a limit on the number of TDs that can be run in parralel.
This limit can be due to several factors, but the most prevalent factor
is the number of keys the CPU can allocate to TDs.
In fact, TDX takes advantage of an existing CPU feature called MK-TME
(Multi-key Total Memory Encryption) to encrypt the VM memory. It enables
the CPU to encrypt each TD’s memory with a unique Advanced Encryption Standard (AES) key.
MK-TME offers a number of keys and this key space is partionned into 2 sets:
Shared (VMM) and Private (TDX). The number of key in the Private space defines the
maximum number of TDs we can run in parralel.

This test verifies that we can run TDs up to this limit and any new TD creation
is refused by qemu in a nice way.
"""

# get max number of TD VMs we can create (max - current)
max_td_vms = util.get_max_td_vms() - util.get_current_td_vms()
assert max_td_vms > 0, "No available space for TD VMs"

print(f'The limit number of TDs is : {max_td_vms}')

qm = [None] * max_td_vms

# initialize machines
Expand All @@ -95,8 +128,21 @@ def test_stress_max_guests():
print("Waiting for machine %d" % (i))
ssh = Qemu.QemuSSH(qm[i])

# try to run a new TD
# expect qemu quit immediately with a specific error message
with Qemu.QemuMachine() as one_more:
one_more.run()
check_qemu_fail_to_start(one_more, error_msg="KVM_TDX_INIT_VM failed: No space left on device")

# stop all machines
for i in range(max_td_vms):
print("Stopping machine %d" % (i))
qm[i].stop()
qm[i].shutdown()

# wait for all machines to exit
for i in range(max_td_vms):
print("Stopping machine %d" % (i))
try:
qm[i].communicate()
except:
pass