diff --git a/aws/infrastructure.tf b/aws/infrastructure.tf index 3ed1eaeb..db7094b8 100644 --- a/aws/infrastructure.tf +++ b/aws/infrastructure.tf @@ -204,10 +204,11 @@ locals { prefix = values.prefix tags = values.tags specs = { - cpus = data.aws_ec2_instance_type.instance_type[values.prefix].default_vcpus - ram = data.aws_ec2_instance_type.instance_type[values.prefix].memory_size - gpus = try(one(data.aws_ec2_instance_type.instance_type[values.prefix].gpus).count, 0) - mig = lookup(values, "mig", null) + cpus = data.aws_ec2_instance_type.instance_type[values.prefix].default_vcpus + ram = data.aws_ec2_instance_type.instance_type[values.prefix].memory_size + gpus = try(one(data.aws_ec2_instance_type.instance_type[values.prefix].gpus).count, 0) + mig = lookup(values, "mig", null) + shard = lookup(values, "shard", null) } } } diff --git a/azure/infrastructure.tf b/azure/infrastructure.tf index 7c27b4b3..f66373e8 100644 --- a/azure/infrastructure.tf +++ b/azure/infrastructure.tf @@ -167,10 +167,11 @@ locals { prefix = values.prefix tags = values.tags specs = { - cpus = local.vmsizes[values.type].vcpus - ram = local.vmsizes[values.type].ram - gpus = local.vmsizes[values.type].gpus - mig = lookup(values, "mig", null) + cpus = local.vmsizes[values.type].vcpus + ram = local.vmsizes[values.type].ram + gpus = local.vmsizes[values.type].gpus + mig = lookup(values, "mig", null) + shard = lookup(values, "shard", null) } } } diff --git a/common/configuration/main.tf b/common/configuration/main.tf index c8c544cd..057f1ded 100644 --- a/common/configuration/main.tf +++ b/common/configuration/main.tf @@ -167,4 +167,4 @@ output "bastions" { for host, values in var.inventory: host => values if contains(values.tags, var.bastion_tag) && contains(values.tags, "public") && (!contains(values.tags, "pool")) } -} \ No newline at end of file +} diff --git a/docs/README.md b/docs/README.md index a4897c3e..05accde1 100644 --- a/docs/README.md +++ b/docs/README.md @@ -560,6 +560,7 @@ the operating system and service software ``` This is only functional with [MIG supported GPUs](https://docs.nvidia.com/datacenter/tesla/mig-user-guide/index.html#supported-gpus), and with x86-64 processors (see [NVIDIA/mig-parted issue #30](https://github.com/NVIDIA/mig-parted/issues/30)). +6. `shard`: total number of [Sharding](https://slurm.schedmd.com/gres.html#Sharding) on the node. Sharding allows sharing the same GPU on multiple jobs. The total number of shards is evenly distributed across all GPUs on the node. For some cloud providers, it possible to define additional attributes. The following sections present the available attributes per provider. diff --git a/gcp/infrastructure.tf b/gcp/infrastructure.tf index 6355c0aa..5e2d2622 100644 --- a/gcp/infrastructure.tf +++ b/gcp/infrastructure.tf @@ -177,10 +177,11 @@ locals { prefix = values.prefix tags = values.tags specs = { - cpus = data.external.machine_type[values["prefix"]].result["vcpus"] - ram = data.external.machine_type[values["prefix"]].result["ram"] - gpus = try(data.external.machine_type[values["prefix"]].result["gpus"], lookup(values, "gpu_count", 0)) - mig = lookup(values, "mig", null) + cpus = data.external.machine_type[values["prefix"]].result["vcpus"] + ram = data.external.machine_type[values["prefix"]].result["ram"] + gpus = try(data.external.machine_type[values["prefix"]].result["gpus"], lookup(values, "gpu_count", 0)) + mig = lookup(values, "mig", null) + shard = lookup(values, "shard", null) } } } diff --git a/openstack/infrastructure.tf b/openstack/infrastructure.tf index 7a062505..6b0bf354 100644 --- a/openstack/infrastructure.tf +++ b/openstack/infrastructure.tf @@ -131,13 +131,14 @@ locals { prefix = values.prefix tags = values.tags specs = { - cpus = data.openstack_compute_flavor_v2.flavors[values.prefix].vcpus - ram = data.openstack_compute_flavor_v2.flavors[values.prefix].ram - gpus = sum([ + cpus = data.openstack_compute_flavor_v2.flavors[values.prefix].vcpus + ram = data.openstack_compute_flavor_v2.flavors[values.prefix].ram + gpus = sum([ parseint(lookup(data.openstack_compute_flavor_v2.flavors[values.prefix].extra_specs, "resources:VGPU", "0"), 10), parseint(split(":", lookup(data.openstack_compute_flavor_v2.flavors[values.prefix].extra_specs, "pci_passthrough:alias", "gpu:0"))[1], 10) ]) - mig = lookup(values, "mig", null) + mig = lookup(values, "mig", null) + shard = lookup(values, "shard", null) } } } @@ -146,4 +147,4 @@ locals { host => merge(module.configuration.inventory[host], {id=openstack_compute_instance_v2.instances[host].id}) if contains(module.configuration.inventory[host].tags, "public") } -} \ No newline at end of file +}