diff --git a/doc/api-extensions.md b/doc/api-extensions.md index 7692d284bf1f..9ec7dcfe6a63 100644 --- a/doc/api-extensions.md +++ b/doc/api-extensions.md @@ -2516,3 +2516,7 @@ Adds support for using a bridge network with a specified VLAN ID as an OVN uplin Adds `logical_cpus` field to `GET /1.0/cluster/members/{name}/state` which contains the total available logical CPUs available when LXD started. + +## `vm_limits_cpu_pin_strategy` + +Adds a new {config:option}`instance-resource-limits:limits.cpu.pin_strategy` configuration option for virtual machines. This option controls the CPU pinning strategy. When set to `none`, CPU auto pinning is disabled. When set to `auto`, CPU auto pinning is enabled. diff --git a/doc/metadata.txt b/doc/metadata.txt index b5e833818995..cc71a52e0646 100644 --- a/doc/metadata.txt +++ b/doc/metadata.txt @@ -1978,6 +1978,18 @@ A comma-separated list of NUMA node IDs or ranges to place the instance CPUs on. See {ref}`instance-options-limits-cpu-container` for more information. ``` +```{config:option} limits.cpu.pin_strategy instance-resource-limits +:condition: "virtual machine" +:defaultdesc: "`none`" +:liveupdate: "no" +:shortdesc: "VM CPU auto pinning strategy" +:type: "string" +Specify the strategy for VM CPU auto pinning. +Possible values: `none` (disables CPU auto pinning) and `auto` (enables CPU auto pinning). + +See {ref}`instance-options-limits-cpu-vm` for more information. +``` + ```{config:option} limits.cpu.priority instance-resource-limits :condition: "container" :defaultdesc: "`10` (maximum)" diff --git a/doc/reference/instance_options.md b/doc/reference/instance_options.md index 652691b56a08..5055a6ce0efb 100644 --- a/doc/reference/instance_options.md +++ b/doc/reference/instance_options.md @@ -98,6 +98,9 @@ You have different options to limit CPU usage: - Set {config:option}`instance-resource-limits:limits.cpu.allowance` to restrict the load an instance can put on the available CPUs. This option is available only for containers. See {ref}`instance-options-limits-cpu-container` for how to set this option. +- Set {config:option}`instance-resource-limits:limits.cpu.pin_strategy` to specify the strategy for virtual-machine CPU auto pinning. + This option is available only for virtual machines. + See {ref}`instance-options-limits-cpu-vm` for how to set this option. It is possible to set both options at the same time to restrict both which CPUs are visible to the instance and the allowed usage of those instances. However, if you use {config:option}`instance-resource-limits:limits.cpu.allowance` with a time limit, you should avoid using {config:option}`instance-resource-limits:limits.cpu` in addition, because that puts a lot of constraints on the scheduler and might lead to less efficient allocations. @@ -116,6 +119,7 @@ You can specify either which CPUs or how many CPUs are visible and available to - If you specify a number (for example, `4`) of CPUs, LXD will do dynamic load-balancing of all instances that aren't pinned to specific CPUs, trying to spread the load on the machine. Instances are re-balanced every time an instance starts or stops, as well as whenever a CPU is added to the system. +(instance-options-limits-cpu-vm)= ##### CPU limits for virtual machines ```{note} @@ -127,10 +131,10 @@ Depending on the guest operating system, you might need to either restart the in LXD virtual machines default to having just one vCPU allocated, which shows up as matching the host CPU vendor and type, but has a single core and no threads. When {config:option}`instance-resource-limits:limits.cpu` is set to a single integer, LXD allocates multiple vCPUs and exposes them to the guest as full cores. -Those vCPUs are not pinned to specific physical cores on the host. +Unless {config:option}`instance-resource-limits:limits.cpu.pin_strategy` is set to `auto`, those vCPUs are not pinned to specific cores on the host. The number of vCPUs can be updated while the VM is running. -When {config:option}`instance-resource-limits:limits.cpu` is set to a range or comma-separated list of CPU IDs (as provided by [`lxc info --resources`](lxc_info.md)), the vCPUs are pinned to those physical cores. +When {config:option}`instance-resource-limits:limits.cpu` is set to a range or comma-separated list of CPU IDs (as provided by [`lxc info --resources`](lxc_info.md)), the vCPUs are pinned to those cores. In this scenario, LXD checks whether the CPU configuration lines up with a realistic hardware topology and if it does, it replicates that topology in the guest. When doing CPU pinning, it is not possible to change the configuration while the VM is running. diff --git a/lxd/cgroup/cgroup_cpu.go b/lxd/cgroup/cgroup_cpu.go index 593eb2f21581..e81ed919a8ca 100644 --- a/lxd/cgroup/cgroup_cpu.go +++ b/lxd/cgroup/cgroup_cpu.go @@ -4,25 +4,25 @@ import ( "fmt" "strconv" "strings" + + "github.com/canonical/lxd/lxd/instance/instancetype" ) // DeviceSchedRebalance channel for scheduling a CPU rebalance. var DeviceSchedRebalance = make(chan []string, 2) // TaskSchedulerTrigger triggers a CPU rebalance. -func TaskSchedulerTrigger(srcType string, srcName string, srcStatus string) { +func TaskSchedulerTrigger(srcType instancetype.Type, srcName string, srcStatus string) { // Spawn a go routine which then triggers the scheduler select { - case DeviceSchedRebalance <- []string{srcType, srcName, srcStatus}: + case DeviceSchedRebalance <- []string{srcType.String(), srcName, srcStatus}: default: // Channel is full, drop the event } } // ParseCPU parses CPU allowances. -func ParseCPU(cpuAllowance string, cpuPriority string) (int64, int64, int64, error) { - var err error - +func ParseCPU(cpuAllowance string, cpuPriority string) (cpuShares int64, cpuCfsQuota int64, cpuCfsPeriod int64, err error) { // Max shares depending on backend. maxShares := int64(1024) if cgControllers["cpu"] == V2 { @@ -30,7 +30,7 @@ func ParseCPU(cpuAllowance string, cpuPriority string) (int64, int64, int64, err } // Parse priority - cpuShares := int64(0) + cpuShares = 0 cpuPriorityInt := 10 if cpuPriority != "" { cpuPriorityInt, err = strconv.Atoi(cpuPriority) @@ -41,8 +41,8 @@ func ParseCPU(cpuAllowance string, cpuPriority string) (int64, int64, int64, err cpuShares -= int64(10 - cpuPriorityInt) // Parse allowance - cpuCfsQuota := int64(-1) - cpuCfsPeriod := int64(100000) + cpuCfsQuota = -1 + cpuCfsPeriod = 100000 if cgControllers["cpu"] == V2 { cpuCfsPeriod = -1 } diff --git a/lxd/devices.go b/lxd/devices.go index ab1d6bae7c1f..8edbfeea2e92 100644 --- a/lxd/devices.go +++ b/lxd/devices.go @@ -21,6 +21,7 @@ import ( "github.com/canonical/lxd/lxd/state" "github.com/canonical/lxd/shared" "github.com/canonical/lxd/shared/logger" + "github.com/canonical/lxd/shared/validate" ) type deviceTaskCPU struct { @@ -484,6 +485,14 @@ func deviceTaskBalance(s *state.State) { } } + // Determine CPU pinning strategy and static pinning settings. + // When pinning strategy does not equal auto (none or empty), don't auto pin CPUs. + cpuPinStrategy := conf["limits.cpu.pin_strategy"] + err = validate.IsStaticCPUPinning(cpulimit) + if err != nil && c.Type() == instancetype.VM && cpuPinStrategy != "auto" { + continue + } + // Check that the instance is running. // We use InitPID here rather than IsRunning because this task can be triggered during the container's // onStart hook, which is during the time that the start lock is held, which causes IsRunning to diff --git a/lxd/instance/drivers/driver_lxc.go b/lxd/instance/drivers/driver_lxc.go index cd86fe3dae9c..801c1d9fed2b 100644 --- a/lxd/instance/drivers/driver_lxc.go +++ b/lxd/instance/drivers/driver_lxc.go @@ -2576,7 +2576,7 @@ func (d *lxc) onStart(_ map[string]string) error { } // Trigger a rebalance - cgroup.TaskSchedulerTrigger("container", d.name, "started") + cgroup.TaskSchedulerTrigger(d.dbType, d.name, "started") // Record last start state. err = d.recordLastState() @@ -3055,7 +3055,7 @@ func (d *lxc) onStop(args map[string]string) error { } // Trigger a rebalance - cgroup.TaskSchedulerTrigger("container", d.name, "stopped") + cgroup.TaskSchedulerTrigger(d.dbType, d.name, "stopped") // Destroy ephemeral containers if d.ephemeral { @@ -4872,7 +4872,7 @@ func (d *lxc) Update(args db.InstanceArgs, userRequested bool) error { if cpuLimitWasChanged { // Trigger a scheduler re-run - cgroup.TaskSchedulerTrigger("container", d.name, "changed") + cgroup.TaskSchedulerTrigger(d.dbType, d.name, "changed") } if userRequested { diff --git a/lxd/instance/drivers/driver_qemu.go b/lxd/instance/drivers/driver_qemu.go index d1f43c98730c..0cf73aa8325d 100644 --- a/lxd/instance/drivers/driver_qemu.go +++ b/lxd/instance/drivers/driver_qemu.go @@ -1668,7 +1668,7 @@ func (d *qemu) start(stateful bool, op *operationlock.InstanceOperation) error { } // Trigger a rebalance procedure which will set vCPU affinity (pinning) (explicit or implicit) - cgroup.TaskSchedulerTrigger("virtual-machine", d.name, "started") + cgroup.TaskSchedulerTrigger(d.dbType, d.name, "started") // Run monitor hooks from devices. for _, monHook := range monHooks { @@ -4936,7 +4936,7 @@ func (d *qemu) Stop(stateful bool) error { } // Trigger a rebalance - cgroup.TaskSchedulerTrigger("virtual-machine", d.name, "stopped") + cgroup.TaskSchedulerTrigger(d.dbType, d.name, "stopped") return nil } @@ -5806,7 +5806,7 @@ func (d *qemu) Update(args db.InstanceArgs, userRequested bool) error { if cpuLimitWasChanged { // Trigger a scheduler re-run - cgroup.TaskSchedulerTrigger("virtual-machine", d.name, "changed") + cgroup.TaskSchedulerTrigger(d.dbType, d.name, "changed") } if isRunning { diff --git a/lxd/instance/instance_utils.go b/lxd/instance/instance_utils.go index 03d6a6da1241..515d09f8b079 100644 --- a/lxd/instance/instance_utils.go +++ b/lxd/instance/instance_utils.go @@ -36,6 +36,7 @@ import ( "github.com/canonical/lxd/shared/logger" "github.com/canonical/lxd/shared/osarch" "github.com/canonical/lxd/shared/revert" + "github.com/canonical/lxd/shared/validate" "github.com/canonical/lxd/shared/version" ) @@ -111,6 +112,14 @@ func ValidConfig(sysOS *sys.OS, config map[string]string, expanded bool, instanc return fmt.Errorf("nvidia.runtime is incompatible with privileged containers") } + // Validate pinning strategy when limits.cpu specifies static pinning. + cpuPinStrategy := config["limits.cpu.pin_strategy"] + cpuLimit := config["limits.cpu"] + err = validate.IsStaticCPUPinning(cpuLimit) + if err == nil && !expanded && cpuPinStrategy == "auto" { + return fmt.Errorf(`CPU pinning specified, but pinning strategy is set to "auto"`) + } + return nil } diff --git a/lxd/instance/instancetype/instance.go b/lxd/instance/instancetype/instance.go index 9fa602e35cbf..983cb264a263 100644 --- a/lxd/instance/instancetype/instance.go +++ b/lxd/instance/instancetype/instance.go @@ -970,6 +970,19 @@ var InstanceConfigKeysVM = map[string]func(value string) error{ // shortdesc: Whether to back the instance using huge pages "limits.memory.hugepages": validate.Optional(validate.IsBool), + // lxdmeta:generate(entities=instance; group=resource-limits; key=limits.cpu.pin_strategy) + // Specify the strategy for VM CPU auto pinning. + // Possible values: `none` (disables CPU auto pinning) and `auto` (enables CPU auto pinning). + // + // See {ref}`instance-options-limits-cpu-vm` for more information. + // --- + // type: string + // defaultdesc: `none` + // liveupdate: no + // condition: virtual machine + // shortdesc: VM CPU auto pinning strategy + "limits.cpu.pin_strategy": validate.Optional(validate.IsOneOf("none", "auto")), + // lxdmeta:generate(entities=instance; group=migration; key=migration.stateful) // Enabling this option prevents the use of some features that are incompatible with it. // --- diff --git a/lxd/metadata/configuration.json b/lxd/metadata/configuration.json index ff23764bf2f4..cab699884da7 100644 --- a/lxd/metadata/configuration.json +++ b/lxd/metadata/configuration.json @@ -2251,6 +2251,16 @@ "type": "string" } }, + { + "limits.cpu.pin_strategy": { + "condition": "virtual machine", + "defaultdesc": "`none`", + "liveupdate": "no", + "longdesc": "Specify the strategy for VM CPU auto pinning.\nPossible values: `none` (disables CPU auto pinning) and `auto` (enables CPU auto pinning).\n\nSee {ref}`instance-options-limits-cpu-vm` for more information.", + "shortdesc": "VM CPU auto pinning strategy", + "type": "string" + } + }, { "limits.cpu.priority": { "condition": "container", diff --git a/shared/validate/validate.go b/shared/validate/validate.go index 338be680e3ff..9adf78f97735 100644 --- a/shared/validate/validate.go +++ b/shared/validate/validate.go @@ -89,7 +89,7 @@ func IsUint32(value string) error { // ParseUint32Range parses a uint32 range in the form "number" or "start-end". // Returns the start number and the size of the range. -func ParseUint32Range(value string) (uint32, uint32, error) { +func ParseUint32Range(value string) (start uint32, rangeSize uint32, err error) { rangeParts := strings.SplitN(value, "-", 2) rangeLen := len(rangeParts) if rangeLen != 1 && rangeLen != 2 { @@ -101,7 +101,7 @@ func ParseUint32Range(value string) (uint32, uint32, error) { return 0, 0, fmt.Errorf("Invalid number %q", value) } - var rangeSize uint32 = 1 + rangeSize = 1 if rangeLen == 2 { endNum, err := strconv.ParseUint(rangeParts[1], 10, 32) @@ -116,7 +116,9 @@ func ParseUint32Range(value string) (uint32, uint32, error) { rangeSize += uint32(endNum) - uint32(startNum) } - return uint32(startNum), rangeSize, nil + start = uint32(startNum) + + return start, rangeSize, nil } // IsUint32Range validates whether the string is a uint32 range in the form "number" or "start-end". @@ -674,8 +676,8 @@ func IsAbsFilePath(value string) error { // ParseNetworkVLANRange parses a VLAN range in the form "number" or "start-end". // Returns the start number and the number of items in the range. -func ParseNetworkVLANRange(vlan string) (int, int, error) { - err := IsNetworkVLAN(vlan) +func ParseNetworkVLANRange(vlan string) (vlanRangeStart int, rangeSize int, err error) { + err = IsNetworkVLAN(vlan) if err == nil { vlanRangeStart, err := strconv.Atoi(vlan) if err != nil { @@ -694,7 +696,7 @@ func ParseNetworkVLANRange(vlan string) (int, int, error) { return -1, -1, fmt.Errorf("Invalid VLAN range boundary. start:%s, end:%s", vlanRange[0], vlanRange[1]) } - vlanRangeStart, err := strconv.Atoi(vlanRange[0]) + vlanRangeStart, err = strconv.Atoi(vlanRange[0]) if err != nil { return -1, -1, err } @@ -708,7 +710,9 @@ func ParseNetworkVLANRange(vlan string) (int, int, error) { return -1, -1, fmt.Errorf("Invalid VLAN range boundary. start:%d is higher than end:%d", vlanRangeStart, vlanRangeEnd) } - return vlanRangeStart, vlanRangeEnd - vlanRangeStart + 1, nil + rangeSize = vlanRangeEnd - vlanRangeStart + 1 + + return vlanRangeStart, rangeSize, nil } // IsHostname checks the string is valid DNS hostname. @@ -884,3 +888,12 @@ func IsMultipleOfUnit(unit string) func(value string) error { return nil } } + +// IsStaticCPUPinning validates a static CPU pinning strategy. +func IsStaticCPUPinning(value string) error { + if strings.ContainsAny(value, ",-") { + return nil + } + + return fmt.Errorf("Invalid static CPU pinning strategy: %s", value) +} diff --git a/shared/version/api.go b/shared/version/api.go index 9bc460e5bbca..84a109007f67 100644 --- a/shared/version/api.go +++ b/shared/version/api.go @@ -422,6 +422,7 @@ var APIExtensions = []string{ "network_allocations_ovn_uplink", "network_ovn_uplink_vlan", "state_logical_cpus", + "vm_limits_cpu_pin_strategy", } // APIExtensionsCount returns the number of available API extensions. diff --git a/test/suites/config.sh b/test/suites/config.sh index 1c98339bdcc4..d72e29dcc080 100644 --- a/test/suites/config.sh +++ b/test/suites/config.sh @@ -181,6 +181,12 @@ test_config_profiles() { lxc profile device list onenic | grep eth0 lxc profile device show onenic | grep p2p + # test setting limits.cpu.pin_strategy at the local config and profile level + ! lxc config set c1 limits.cpu.pin_strategy=auto || false + lxc profile set default limits.cpu.pin_strategy=auto + ! lxc profile set default limits.cpu=1-2 || false # test adding a cpu limit with limits.cpu.pin_strategy set (should fail) + lxc profile unset default limits.cpu.pin_strategy + # test live-adding a nic veth_host_name="veth$$" lxc start foo