Adding placement_policy for GKE nodepools (ex: GPU compact placement or TPU topology) (#2405)
* Adding placement policy to gke-nodepool module * Adding placement policy for GKE nodepool * updated README * variables for placement_policy * formatting * Updated README * fixing typo * removing useless trys --------- Co-authored-by: Aurélien Legrand <legranda@google.com>
This commit is contained in:
parent
287fee275c
commit
1f07cb72f2
|
@ -143,7 +143,7 @@ module "cluster-1-nodepool-gpu-1" {
|
||||||
|---|---|:---:|:---:|:---:|
|
|---|---|:---:|:---:|:---:|
|
||||||
| [cluster_name](variables.tf#L23) | Cluster name. | <code>string</code> | ✓ | |
|
| [cluster_name](variables.tf#L23) | Cluster name. | <code>string</code> | ✓ | |
|
||||||
| [location](variables.tf#L48) | Cluster location. | <code>string</code> | ✓ | |
|
| [location](variables.tf#L48) | Cluster location. | <code>string</code> | ✓ | |
|
||||||
| [project_id](variables.tf#L177) | Cluster project id. | <code>string</code> | ✓ | |
|
| [project_id](variables.tf#L181) | Cluster project id. | <code>string</code> | ✓ | |
|
||||||
| [cluster_id](variables.tf#L17) | Cluster id. Optional, but providing cluster_id is recommended to prevent cluster misconfiguration in some of the edge cases. | <code>string</code> | | <code>null</code> |
|
| [cluster_id](variables.tf#L17) | Cluster id. Optional, but providing cluster_id is recommended to prevent cluster misconfiguration in some of the edge cases. | <code>string</code> | | <code>null</code> |
|
||||||
| [gke_version](variables.tf#L28) | Kubernetes nodes version. Ignored if auto_upgrade is set in management_config. | <code>string</code> | | <code>null</code> |
|
| [gke_version](variables.tf#L28) | Kubernetes nodes version. Ignored if auto_upgrade is set in management_config. | <code>string</code> | | <code>null</code> |
|
||||||
| [k8s_labels](variables.tf#L34) | Kubernetes labels applied to each node. | <code>map(string)</code> | | <code>{}</code> |
|
| [k8s_labels](variables.tf#L34) | Kubernetes labels applied to each node. | <code>map(string)</code> | | <code>{}</code> |
|
||||||
|
@ -153,13 +153,13 @@ module "cluster-1-nodepool-gpu-1" {
|
||||||
| [node_config](variables.tf#L65) | Node-level configuration. | <code title="object({ boot_disk_kms_key = optional(string) disk_size_gb = optional(number) disk_type = optional(string) ephemeral_ssd_count = optional(number) gcfs = optional(bool, false) guest_accelerator = optional(object({ count = number type = string gpu_driver = optional(object({ version = string partition_size = optional(string) max_shared_clients_per_gpu = optional(number) })) })) local_nvme_ssd_count = optional(number) gvnic = optional(bool, false) image_type = optional(string) kubelet_config = optional(object({ cpu_manager_policy = string cpu_cfs_quota = optional(bool) cpu_cfs_quota_period = optional(string) pod_pids_limit = optional(number) })) linux_node_config = optional(object({ sysctls = optional(map(string)) cgroup_mode = optional(string) })) local_ssd_count = optional(number) machine_type = optional(string) metadata = optional(map(string)) min_cpu_platform = optional(string) preemptible = optional(bool) sandbox_config_gvisor = optional(bool) shielded_instance_config = optional(object({ enable_integrity_monitoring = optional(bool) enable_secure_boot = optional(bool) })) spot = optional(bool) workload_metadata_config_mode = optional(string) })">object({…})</code> | | <code title="{ disk_type = "pd-balanced" }">{…}</code> |
|
| [node_config](variables.tf#L65) | Node-level configuration. | <code title="object({ boot_disk_kms_key = optional(string) disk_size_gb = optional(number) disk_type = optional(string) ephemeral_ssd_count = optional(number) gcfs = optional(bool, false) guest_accelerator = optional(object({ count = number type = string gpu_driver = optional(object({ version = string partition_size = optional(string) max_shared_clients_per_gpu = optional(number) })) })) local_nvme_ssd_count = optional(number) gvnic = optional(bool, false) image_type = optional(string) kubelet_config = optional(object({ cpu_manager_policy = string cpu_cfs_quota = optional(bool) cpu_cfs_quota_period = optional(string) pod_pids_limit = optional(number) })) linux_node_config = optional(object({ sysctls = optional(map(string)) cgroup_mode = optional(string) })) local_ssd_count = optional(number) machine_type = optional(string) metadata = optional(map(string)) min_cpu_platform = optional(string) preemptible = optional(bool) sandbox_config_gvisor = optional(bool) shielded_instance_config = optional(object({ enable_integrity_monitoring = optional(bool) enable_secure_boot = optional(bool) })) spot = optional(bool) workload_metadata_config_mode = optional(string) })">object({…})</code> | | <code title="{ disk_type = "pd-balanced" }">{…}</code> |
|
||||||
| [node_count](variables.tf#L124) | Number of nodes per instance group. Initial value can only be changed by recreation, current is ignored when autoscaling is used. | <code title="object({ current = optional(number) initial = number })">object({…})</code> | | <code title="{ initial = 1 }">{…}</code> |
|
| [node_count](variables.tf#L124) | Number of nodes per instance group. Initial value can only be changed by recreation, current is ignored when autoscaling is used. | <code title="object({ current = optional(number) initial = number })">object({…})</code> | | <code title="{ initial = 1 }">{…}</code> |
|
||||||
| [node_locations](variables.tf#L136) | Node locations. | <code>list(string)</code> | | <code>null</code> |
|
| [node_locations](variables.tf#L136) | Node locations. | <code>list(string)</code> | | <code>null</code> |
|
||||||
| [nodepool_config](variables.tf#L142) | Nodepool-level configuration. | <code title="object({ autoscaling = optional(object({ location_policy = optional(string) max_node_count = optional(number) min_node_count = optional(number) use_total_nodes = optional(bool, false) })) management = optional(object({ auto_repair = optional(bool) auto_upgrade = optional(bool) })) upgrade_settings = optional(object({ max_surge = number max_unavailable = number })) })">object({…})</code> | | <code>null</code> |
|
| [nodepool_config](variables.tf#L142) | Nodepool-level configuration. | <code title="object({ autoscaling = optional(object({ location_policy = optional(string) max_node_count = optional(number) min_node_count = optional(number) use_total_nodes = optional(bool, false) })) management = optional(object({ auto_repair = optional(bool) auto_upgrade = optional(bool) })) placement_policy = optional(object({ type = string policy_name = optional(string) tpu_topology = optional(string) })) upgrade_settings = optional(object({ max_surge = number max_unavailable = number })) })">object({…})</code> | | <code>null</code> |
|
||||||
| [pod_range](variables.tf#L164) | Pod secondary range configuration. | <code title="object({ secondary_pod_range = object({ name = string cidr = optional(string) create = optional(bool) enable_private_nodes = optional(bool) }) })">object({…})</code> | | <code>null</code> |
|
| [pod_range](variables.tf#L168) | Pod secondary range configuration. | <code title="object({ secondary_pod_range = object({ name = string cidr = optional(string) create = optional(bool) enable_private_nodes = optional(bool) }) })">object({…})</code> | | <code>null</code> |
|
||||||
| [reservation_affinity](variables.tf#L182) | Configuration of the desired reservation which instances could take capacity from. | <code title="object({ consume_reservation_type = string key = optional(string) values = optional(list(string)) })">object({…})</code> | | <code>null</code> |
|
| [reservation_affinity](variables.tf#L186) | Configuration of the desired reservation which instances could take capacity from. | <code title="object({ consume_reservation_type = string key = optional(string) values = optional(list(string)) })">object({…})</code> | | <code>null</code> |
|
||||||
| [service_account](variables.tf#L192) | Nodepool service account. If this variable is set to null, the default GCE service account will be used. If set and email is null, a service account will be created. If scopes are null a default will be used. | <code title="object({ create = optional(bool, false) email = optional(string) oauth_scopes = optional(list(string)) })">object({…})</code> | | <code>{}</code> |
|
| [service_account](variables.tf#L196) | Nodepool service account. If this variable is set to null, the default GCE service account will be used. If set and email is null, a service account will be created. If scopes are null a default will be used. | <code title="object({ create = optional(bool, false) email = optional(string) oauth_scopes = optional(list(string)) })">object({…})</code> | | <code>{}</code> |
|
||||||
| [sole_tenant_nodegroup](variables.tf#L203) | Sole tenant node group. | <code>string</code> | | <code>null</code> |
|
| [sole_tenant_nodegroup](variables.tf#L207) | Sole tenant node group. | <code>string</code> | | <code>null</code> |
|
||||||
| [tags](variables.tf#L209) | Network tags applied to nodes. | <code>list(string)</code> | | <code>null</code> |
|
| [tags](variables.tf#L213) | Network tags applied to nodes. | <code>list(string)</code> | | <code>null</code> |
|
||||||
| [taints](variables.tf#L215) | Kubernetes taints applied to all nodes. | <code title="map(object({ value = string effect = string }))">map(object({…}))</code> | | <code>{}</code> |
|
| [taints](variables.tf#L219) | Kubernetes taints applied to all nodes. | <code title="map(object({ value = string effect = string }))">map(object({…}))</code> | | <code>{}</code> |
|
||||||
|
|
||||||
## Outputs
|
## Outputs
|
||||||
|
|
||||||
|
|
|
@ -77,7 +77,6 @@ resource "google_container_node_pool" "nodepool" {
|
||||||
initial_node_count = var.node_count.initial
|
initial_node_count = var.node_count.initial
|
||||||
node_count = var.node_count.current
|
node_count = var.node_count.current
|
||||||
node_locations = var.node_locations
|
node_locations = var.node_locations
|
||||||
# placement_policy = var.nodepool_config.placement_policy
|
|
||||||
|
|
||||||
dynamic "autoscaling" {
|
dynamic "autoscaling" {
|
||||||
for_each = (
|
for_each = (
|
||||||
|
@ -129,6 +128,15 @@ resource "google_container_node_pool" "nodepool" {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
dynamic "placement_policy" {
|
||||||
|
for_each = try(var.nodepool_config.placement_policy, null) != null ? [""] : []
|
||||||
|
content {
|
||||||
|
type = var.nodepool_config.placement_policy.type
|
||||||
|
policy_name = var.nodepool_config.placement_policy.policy_name
|
||||||
|
tpu_topology = var.nodepool_config.placement_policy.tpu_topology
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
node_config {
|
node_config {
|
||||||
boot_disk_kms_key = var.node_config.boot_disk_kms_key
|
boot_disk_kms_key = var.node_config.boot_disk_kms_key
|
||||||
disk_size_gb = var.node_config.disk_size_gb
|
disk_size_gb = var.node_config.disk_size_gb
|
||||||
|
|
|
@ -152,7 +152,11 @@ variable "nodepool_config" {
|
||||||
auto_repair = optional(bool)
|
auto_repair = optional(bool)
|
||||||
auto_upgrade = optional(bool)
|
auto_upgrade = optional(bool)
|
||||||
}))
|
}))
|
||||||
# placement_policy = optional(bool)
|
placement_policy = optional(object({
|
||||||
|
type = string
|
||||||
|
policy_name = optional(string)
|
||||||
|
tpu_topology = optional(string)
|
||||||
|
}))
|
||||||
upgrade_settings = optional(object({
|
upgrade_settings = optional(object({
|
||||||
max_surge = number
|
max_surge = number
|
||||||
max_unavailable = number
|
max_unavailable = number
|
||||||
|
|
Loading…
Reference in New Issue