From 518148ca3761a24f7ca56d4fb7e59752bf5f235e Mon Sep 17 00:00:00 2001 From: Kadin Sayani Date: Tue, 1 Oct 2024 07:53:08 -0600 Subject: [PATCH 01/10] api: Add `vm_limits_cpu_pin_strategy` API extension. Signed-off-by: Kadin Sayani --- doc/api-extensions.md | 4 ++++ shared/version/api.go | 1 + 2 files changed, 5 insertions(+) diff --git a/doc/api-extensions.md b/doc/api-extensions.md index 7692d284bf1f..9ec7dcfe6a63 100644 --- a/doc/api-extensions.md +++ b/doc/api-extensions.md @@ -2516,3 +2516,7 @@ Adds support for using a bridge network with a specified VLAN ID as an OVN uplin Adds `logical_cpus` field to `GET /1.0/cluster/members/{name}/state` which contains the total available logical CPUs available when LXD started. + +## `vm_limits_cpu_pin_strategy` + +Adds a new {config:option}`instance-resource-limits:limits.cpu.pin_strategy` configuration option for virtual machines. This option controls the CPU pinning strategy. When set to `none`, CPU auto pinning is disabled. When set to `auto`, CPU auto pinning is enabled. diff --git a/shared/version/api.go b/shared/version/api.go index 9bc460e5bbca..84a109007f67 100644 --- a/shared/version/api.go +++ b/shared/version/api.go @@ -422,6 +422,7 @@ var APIExtensions = []string{ "network_allocations_ovn_uplink", "network_ovn_uplink_vlan", "state_logical_cpus", + "vm_limits_cpu_pin_strategy", } // APIExtensionsCount returns the number of available API extensions. From 80da1ef4d7b533385bd53303dedfb6edad5f236a Mon Sep 17 00:00:00 2001 From: Kadin Sayani Date: Thu, 26 Sep 2024 13:11:18 -0600 Subject: [PATCH 02/10] lxd/instance: Add `limits.cpu.pin_strategy` config key for virtual machines Signed-off-by: Kadin Sayani --- lxd/instance/instancetype/instance.go | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/lxd/instance/instancetype/instance.go b/lxd/instance/instancetype/instance.go index 9fa602e35cbf..983cb264a263 100644 --- a/lxd/instance/instancetype/instance.go +++ b/lxd/instance/instancetype/instance.go @@ -970,6 +970,19 @@ var InstanceConfigKeysVM = map[string]func(value string) error{ // shortdesc: Whether to back the instance using huge pages "limits.memory.hugepages": validate.Optional(validate.IsBool), + // lxdmeta:generate(entities=instance; group=resource-limits; key=limits.cpu.pin_strategy) + // Specify the strategy for VM CPU auto pinning. + // Possible values: `none` (disables CPU auto pinning) and `auto` (enables CPU auto pinning). + // + // See {ref}`instance-options-limits-cpu-vm` for more information. + // --- + // type: string + // defaultdesc: `none` + // liveupdate: no + // condition: virtual machine + // shortdesc: VM CPU auto pinning strategy + "limits.cpu.pin_strategy": validate.Optional(validate.IsOneOf("none", "auto")), + // lxdmeta:generate(entities=instance; group=migration; key=migration.stateful) // Enabling this option prevents the use of some features that are incompatible with it. // --- From 487d16a9454e25c78e6802225b09d3e6a6fa27f8 Mon Sep 17 00:00:00 2001 From: Kadin Sayani Date: Thu, 26 Sep 2024 13:12:16 -0600 Subject: [PATCH 03/10] metadata: Run make update-metadata Signed-off-by: Kadin Sayani --- doc/metadata.txt | 12 ++++++++++++ lxd/metadata/configuration.json | 10 ++++++++++ 2 files changed, 22 insertions(+) diff --git a/doc/metadata.txt b/doc/metadata.txt index b5e833818995..cc71a52e0646 100644 --- a/doc/metadata.txt +++ b/doc/metadata.txt @@ -1978,6 +1978,18 @@ A comma-separated list of NUMA node IDs or ranges to place the instance CPUs on. See {ref}`instance-options-limits-cpu-container` for more information. ``` +```{config:option} limits.cpu.pin_strategy instance-resource-limits +:condition: "virtual machine" +:defaultdesc: "`none`" +:liveupdate: "no" +:shortdesc: "VM CPU auto pinning strategy" +:type: "string" +Specify the strategy for VM CPU auto pinning. +Possible values: `none` (disables CPU auto pinning) and `auto` (enables CPU auto pinning). + +See {ref}`instance-options-limits-cpu-vm` for more information. +``` + ```{config:option} limits.cpu.priority instance-resource-limits :condition: "container" :defaultdesc: "`10` (maximum)" diff --git a/lxd/metadata/configuration.json b/lxd/metadata/configuration.json index ff23764bf2f4..cab699884da7 100644 --- a/lxd/metadata/configuration.json +++ b/lxd/metadata/configuration.json @@ -2251,6 +2251,16 @@ "type": "string" } }, + { + "limits.cpu.pin_strategy": { + "condition": "virtual machine", + "defaultdesc": "`none`", + "liveupdate": "no", + "longdesc": "Specify the strategy for VM CPU auto pinning.\nPossible values: `none` (disables CPU auto pinning) and `auto` (enables CPU auto pinning).\n\nSee {ref}`instance-options-limits-cpu-vm` for more information.", + "shortdesc": "VM CPU auto pinning strategy", + "type": "string" + } + }, { "limits.cpu.priority": { "condition": "container", From 846cec4ec879f96aacace14fb354e7fec25d5d13 Mon Sep 17 00:00:00 2001 From: Kadin Sayani Date: Thu, 26 Sep 2024 17:49:12 -0600 Subject: [PATCH 04/10] lxd/instance: Add input validation for `limits.cpu.pin_strategy` Signed-off-by: Kadin Sayani --- lxd/instance/instance_utils.go | 9 +++++++++ shared/validate/validate.go | 9 +++++++++ 2 files changed, 18 insertions(+) diff --git a/lxd/instance/instance_utils.go b/lxd/instance/instance_utils.go index 03d6a6da1241..515d09f8b079 100644 --- a/lxd/instance/instance_utils.go +++ b/lxd/instance/instance_utils.go @@ -36,6 +36,7 @@ import ( "github.com/canonical/lxd/shared/logger" "github.com/canonical/lxd/shared/osarch" "github.com/canonical/lxd/shared/revert" + "github.com/canonical/lxd/shared/validate" "github.com/canonical/lxd/shared/version" ) @@ -111,6 +112,14 @@ func ValidConfig(sysOS *sys.OS, config map[string]string, expanded bool, instanc return fmt.Errorf("nvidia.runtime is incompatible with privileged containers") } + // Validate pinning strategy when limits.cpu specifies static pinning. + cpuPinStrategy := config["limits.cpu.pin_strategy"] + cpuLimit := config["limits.cpu"] + err = validate.IsStaticCPUPinning(cpuLimit) + if err == nil && !expanded && cpuPinStrategy == "auto" { + return fmt.Errorf(`CPU pinning specified, but pinning strategy is set to "auto"`) + } + return nil } diff --git a/shared/validate/validate.go b/shared/validate/validate.go index 338be680e3ff..c747c54281ad 100644 --- a/shared/validate/validate.go +++ b/shared/validate/validate.go @@ -884,3 +884,12 @@ func IsMultipleOfUnit(unit string) func(value string) error { return nil } } + +// IsStaticCPUPinning validates a static CPU pinning strategy. +func IsStaticCPUPinning(value string) error { + if strings.ContainsAny(value, ",-") { + return nil + } + + return fmt.Errorf("Invalid static CPU pinning strategy: %s", value) +} From 411f0c397852e1e484b58c85ff1118d088a2e036 Mon Sep 17 00:00:00 2001 From: Kadin Sayani Date: Thu, 26 Sep 2024 14:42:13 -0600 Subject: [PATCH 05/10] lxd/devices: Disable VM CPU auto pinning by default Unless `limits.cpu.pin_strategy` is set to auto, VM CPU auto pinning is disabled Signed-off-by: Kadin Sayani --- lxd/devices.go | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/lxd/devices.go b/lxd/devices.go index ab1d6bae7c1f..8edbfeea2e92 100644 --- a/lxd/devices.go +++ b/lxd/devices.go @@ -21,6 +21,7 @@ import ( "github.com/canonical/lxd/lxd/state" "github.com/canonical/lxd/shared" "github.com/canonical/lxd/shared/logger" + "github.com/canonical/lxd/shared/validate" ) type deviceTaskCPU struct { @@ -484,6 +485,14 @@ func deviceTaskBalance(s *state.State) { } } + // Determine CPU pinning strategy and static pinning settings. + // When pinning strategy does not equal auto (none or empty), don't auto pin CPUs. + cpuPinStrategy := conf["limits.cpu.pin_strategy"] + err = validate.IsStaticCPUPinning(cpulimit) + if err != nil && c.Type() == instancetype.VM && cpuPinStrategy != "auto" { + continue + } + // Check that the instance is running. // We use InitPID here rather than IsRunning because this task can be triggered during the container's // onStart hook, which is during the time that the start lock is held, which causes IsRunning to From 963006dfdd0d5b6258cfc047d9b680f984cf7395 Mon Sep 17 00:00:00 2001 From: Kadin Sayani Date: Fri, 27 Sep 2024 12:01:08 -0600 Subject: [PATCH 06/10] doc: Update CPU pinning documentation for VMs Signed-off-by: Kadin Sayani --- doc/reference/instance_options.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/doc/reference/instance_options.md b/doc/reference/instance_options.md index 652691b56a08..5055a6ce0efb 100644 --- a/doc/reference/instance_options.md +++ b/doc/reference/instance_options.md @@ -98,6 +98,9 @@ You have different options to limit CPU usage: - Set {config:option}`instance-resource-limits:limits.cpu.allowance` to restrict the load an instance can put on the available CPUs. This option is available only for containers. See {ref}`instance-options-limits-cpu-container` for how to set this option. +- Set {config:option}`instance-resource-limits:limits.cpu.pin_strategy` to specify the strategy for virtual-machine CPU auto pinning. + This option is available only for virtual machines. + See {ref}`instance-options-limits-cpu-vm` for how to set this option. It is possible to set both options at the same time to restrict both which CPUs are visible to the instance and the allowed usage of those instances. However, if you use {config:option}`instance-resource-limits:limits.cpu.allowance` with a time limit, you should avoid using {config:option}`instance-resource-limits:limits.cpu` in addition, because that puts a lot of constraints on the scheduler and might lead to less efficient allocations. @@ -116,6 +119,7 @@ You can specify either which CPUs or how many CPUs are visible and available to - If you specify a number (for example, `4`) of CPUs, LXD will do dynamic load-balancing of all instances that aren't pinned to specific CPUs, trying to spread the load on the machine. Instances are re-balanced every time an instance starts or stops, as well as whenever a CPU is added to the system. +(instance-options-limits-cpu-vm)= ##### CPU limits for virtual machines ```{note} @@ -127,10 +131,10 @@ Depending on the guest operating system, you might need to either restart the in LXD virtual machines default to having just one vCPU allocated, which shows up as matching the host CPU vendor and type, but has a single core and no threads. When {config:option}`instance-resource-limits:limits.cpu` is set to a single integer, LXD allocates multiple vCPUs and exposes them to the guest as full cores. -Those vCPUs are not pinned to specific physical cores on the host. +Unless {config:option}`instance-resource-limits:limits.cpu.pin_strategy` is set to `auto`, those vCPUs are not pinned to specific cores on the host. The number of vCPUs can be updated while the VM is running. -When {config:option}`instance-resource-limits:limits.cpu` is set to a range or comma-separated list of CPU IDs (as provided by [`lxc info --resources`](lxc_info.md)), the vCPUs are pinned to those physical cores. +When {config:option}`instance-resource-limits:limits.cpu` is set to a range or comma-separated list of CPU IDs (as provided by [`lxc info --resources`](lxc_info.md)), the vCPUs are pinned to those cores. In this scenario, LXD checks whether the CPU configuration lines up with a realistic hardware topology and if it does, it replicates that topology in the guest. When doing CPU pinning, it is not possible to change the configuration while the VM is running. From 7eee0040dfb5abb5f0bc469449d0eae528203324 Mon Sep 17 00:00:00 2001 From: Kadin Sayani Date: Fri, 4 Oct 2024 11:29:04 -0600 Subject: [PATCH 07/10] lxd/cgroup: Add named results to `ParseCPU` and remove unnecesary type casting to `int64` Signed-off-by: Kadin Sayani --- lxd/cgroup/cgroup_cpu.go | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/lxd/cgroup/cgroup_cpu.go b/lxd/cgroup/cgroup_cpu.go index 593eb2f21581..803604de2374 100644 --- a/lxd/cgroup/cgroup_cpu.go +++ b/lxd/cgroup/cgroup_cpu.go @@ -20,9 +20,7 @@ func TaskSchedulerTrigger(srcType string, srcName string, srcStatus string) { } // ParseCPU parses CPU allowances. -func ParseCPU(cpuAllowance string, cpuPriority string) (int64, int64, int64, error) { - var err error - +func ParseCPU(cpuAllowance string, cpuPriority string) (cpuShares int64, cpuCfsQuota int64, cpuCfsPeriod int64, err error) { // Max shares depending on backend. maxShares := int64(1024) if cgControllers["cpu"] == V2 { @@ -30,7 +28,7 @@ func ParseCPU(cpuAllowance string, cpuPriority string) (int64, int64, int64, err } // Parse priority - cpuShares := int64(0) + cpuShares = 0 cpuPriorityInt := 10 if cpuPriority != "" { cpuPriorityInt, err = strconv.Atoi(cpuPriority) @@ -41,8 +39,8 @@ func ParseCPU(cpuAllowance string, cpuPriority string) (int64, int64, int64, err cpuShares -= int64(10 - cpuPriorityInt) // Parse allowance - cpuCfsQuota := int64(-1) - cpuCfsPeriod := int64(100000) + cpuCfsQuota = -1 + cpuCfsPeriod = 100000 if cgControllers["cpu"] == V2 { cpuCfsPeriod = -1 } From 6027e2a122bd409d7759ee2559baa6860f8220af Mon Sep 17 00:00:00 2001 From: Kadin Sayani Date: Wed, 9 Oct 2024 08:37:43 -0600 Subject: [PATCH 08/10] lxd/cgroup: Refactor `srcType` parameter in `TaskSchedulerTrigger` to use `instancetype.Type` instead of `string` Signed-off-by: Kadin Sayani --- lxd/cgroup/cgroup_cpu.go | 6 ++++-- lxd/instance/drivers/driver_lxc.go | 6 +++--- lxd/instance/drivers/driver_qemu.go | 6 +++--- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/lxd/cgroup/cgroup_cpu.go b/lxd/cgroup/cgroup_cpu.go index 803604de2374..e81ed919a8ca 100644 --- a/lxd/cgroup/cgroup_cpu.go +++ b/lxd/cgroup/cgroup_cpu.go @@ -4,16 +4,18 @@ import ( "fmt" "strconv" "strings" + + "github.com/canonical/lxd/lxd/instance/instancetype" ) // DeviceSchedRebalance channel for scheduling a CPU rebalance. var DeviceSchedRebalance = make(chan []string, 2) // TaskSchedulerTrigger triggers a CPU rebalance. -func TaskSchedulerTrigger(srcType string, srcName string, srcStatus string) { +func TaskSchedulerTrigger(srcType instancetype.Type, srcName string, srcStatus string) { // Spawn a go routine which then triggers the scheduler select { - case DeviceSchedRebalance <- []string{srcType, srcName, srcStatus}: + case DeviceSchedRebalance <- []string{srcType.String(), srcName, srcStatus}: default: // Channel is full, drop the event } diff --git a/lxd/instance/drivers/driver_lxc.go b/lxd/instance/drivers/driver_lxc.go index cd86fe3dae9c..801c1d9fed2b 100644 --- a/lxd/instance/drivers/driver_lxc.go +++ b/lxd/instance/drivers/driver_lxc.go @@ -2576,7 +2576,7 @@ func (d *lxc) onStart(_ map[string]string) error { } // Trigger a rebalance - cgroup.TaskSchedulerTrigger("container", d.name, "started") + cgroup.TaskSchedulerTrigger(d.dbType, d.name, "started") // Record last start state. err = d.recordLastState() @@ -3055,7 +3055,7 @@ func (d *lxc) onStop(args map[string]string) error { } // Trigger a rebalance - cgroup.TaskSchedulerTrigger("container", d.name, "stopped") + cgroup.TaskSchedulerTrigger(d.dbType, d.name, "stopped") // Destroy ephemeral containers if d.ephemeral { @@ -4872,7 +4872,7 @@ func (d *lxc) Update(args db.InstanceArgs, userRequested bool) error { if cpuLimitWasChanged { // Trigger a scheduler re-run - cgroup.TaskSchedulerTrigger("container", d.name, "changed") + cgroup.TaskSchedulerTrigger(d.dbType, d.name, "changed") } if userRequested { diff --git a/lxd/instance/drivers/driver_qemu.go b/lxd/instance/drivers/driver_qemu.go index d1f43c98730c..0cf73aa8325d 100644 --- a/lxd/instance/drivers/driver_qemu.go +++ b/lxd/instance/drivers/driver_qemu.go @@ -1668,7 +1668,7 @@ func (d *qemu) start(stateful bool, op *operationlock.InstanceOperation) error { } // Trigger a rebalance procedure which will set vCPU affinity (pinning) (explicit or implicit) - cgroup.TaskSchedulerTrigger("virtual-machine", d.name, "started") + cgroup.TaskSchedulerTrigger(d.dbType, d.name, "started") // Run monitor hooks from devices. for _, monHook := range monHooks { @@ -4936,7 +4936,7 @@ func (d *qemu) Stop(stateful bool) error { } // Trigger a rebalance - cgroup.TaskSchedulerTrigger("virtual-machine", d.name, "stopped") + cgroup.TaskSchedulerTrigger(d.dbType, d.name, "stopped") return nil } @@ -5806,7 +5806,7 @@ func (d *qemu) Update(args db.InstanceArgs, userRequested bool) error { if cpuLimitWasChanged { // Trigger a scheduler re-run - cgroup.TaskSchedulerTrigger("virtual-machine", d.name, "changed") + cgroup.TaskSchedulerTrigger(d.dbType, d.name, "changed") } if isRunning { From 62cfe929e17d4bf74f16b3026a87edc656e991c4 Mon Sep 17 00:00:00 2001 From: Kadin Sayani Date: Wed, 16 Oct 2024 13:03:31 -0600 Subject: [PATCH 09/10] lxd/shared/validate: Add named results to `ParseNetworkVLANRange` and `ParseUint32Range` Signed-off-by: Kadin Sayani --- shared/validate/validate.go | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/shared/validate/validate.go b/shared/validate/validate.go index c747c54281ad..9adf78f97735 100644 --- a/shared/validate/validate.go +++ b/shared/validate/validate.go @@ -89,7 +89,7 @@ func IsUint32(value string) error { // ParseUint32Range parses a uint32 range in the form "number" or "start-end". // Returns the start number and the size of the range. -func ParseUint32Range(value string) (uint32, uint32, error) { +func ParseUint32Range(value string) (start uint32, rangeSize uint32, err error) { rangeParts := strings.SplitN(value, "-", 2) rangeLen := len(rangeParts) if rangeLen != 1 && rangeLen != 2 { @@ -101,7 +101,7 @@ func ParseUint32Range(value string) (uint32, uint32, error) { return 0, 0, fmt.Errorf("Invalid number %q", value) } - var rangeSize uint32 = 1 + rangeSize = 1 if rangeLen == 2 { endNum, err := strconv.ParseUint(rangeParts[1], 10, 32) @@ -116,7 +116,9 @@ func ParseUint32Range(value string) (uint32, uint32, error) { rangeSize += uint32(endNum) - uint32(startNum) } - return uint32(startNum), rangeSize, nil + start = uint32(startNum) + + return start, rangeSize, nil } // IsUint32Range validates whether the string is a uint32 range in the form "number" or "start-end". @@ -674,8 +676,8 @@ func IsAbsFilePath(value string) error { // ParseNetworkVLANRange parses a VLAN range in the form "number" or "start-end". // Returns the start number and the number of items in the range. -func ParseNetworkVLANRange(vlan string) (int, int, error) { - err := IsNetworkVLAN(vlan) +func ParseNetworkVLANRange(vlan string) (vlanRangeStart int, rangeSize int, err error) { + err = IsNetworkVLAN(vlan) if err == nil { vlanRangeStart, err := strconv.Atoi(vlan) if err != nil { @@ -694,7 +696,7 @@ func ParseNetworkVLANRange(vlan string) (int, int, error) { return -1, -1, fmt.Errorf("Invalid VLAN range boundary. start:%s, end:%s", vlanRange[0], vlanRange[1]) } - vlanRangeStart, err := strconv.Atoi(vlanRange[0]) + vlanRangeStart, err = strconv.Atoi(vlanRange[0]) if err != nil { return -1, -1, err } @@ -708,7 +710,9 @@ func ParseNetworkVLANRange(vlan string) (int, int, error) { return -1, -1, fmt.Errorf("Invalid VLAN range boundary. start:%d is higher than end:%d", vlanRangeStart, vlanRangeEnd) } - return vlanRangeStart, vlanRangeEnd - vlanRangeStart + 1, nil + rangeSize = vlanRangeEnd - vlanRangeStart + 1 + + return vlanRangeStart, rangeSize, nil } // IsHostname checks the string is valid DNS hostname. From f6dc8903ea36c0348eb7d4e1aed5be110d5331a6 Mon Sep 17 00:00:00 2001 From: Kadin Sayani Date: Fri, 18 Oct 2024 08:50:20 -0600 Subject: [PATCH 10/10] tests: Add tests for `limits.cpu.pin_strategy` and `limits.cpu` config and profile settings Signed-off-by: Kadin Sayani --- test/suites/config.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/test/suites/config.sh b/test/suites/config.sh index 1c98339bdcc4..d72e29dcc080 100644 --- a/test/suites/config.sh +++ b/test/suites/config.sh @@ -181,6 +181,12 @@ test_config_profiles() { lxc profile device list onenic | grep eth0 lxc profile device show onenic | grep p2p + # test setting limits.cpu.pin_strategy at the local config and profile level + ! lxc config set c1 limits.cpu.pin_strategy=auto || false + lxc profile set default limits.cpu.pin_strategy=auto + ! lxc profile set default limits.cpu=1-2 || false # test adding a cpu limit with limits.cpu.pin_strategy set (should fail) + lxc profile unset default limits.cpu.pin_strategy + # test live-adding a nic veth_host_name="veth$$" lxc start foo