From 9f23d504eca01bdedb11c48f576d0c23859ef327 Mon Sep 17 00:00:00 2001 From: Oliver Frolovs Date: Mon, 4 Sep 2023 16:43:59 +0100 Subject: [PATCH] gke-cluster-autopilot: add monitoring configuration (#1646) * gke-cluster-autopilot: add monitoring configuration block (monitoring_config) --- modules/gke-cluster-autopilot/README.md | 46 +++++++++++++++---- modules/gke-cluster-autopilot/main.tf | 15 +++++- modules/gke-cluster-autopilot/variables.tf | 15 ++++++ .../monitoring-config-control-plane.yaml | 27 +++++++++++ 4 files changed, 94 insertions(+), 9 deletions(-) create mode 100644 tests/modules/gke_cluster_autopilot/examples/monitoring-config-control-plane.yaml diff --git a/modules/gke-cluster-autopilot/README.md b/modules/gke-cluster-autopilot/README.md index c4176a1d..da639066 100644 --- a/modules/gke-cluster-autopilot/README.md +++ b/modules/gke-cluster-autopilot/README.md @@ -87,6 +87,35 @@ module "cluster-1" { # tftest modules=1 resources=1 inventory=logging-config.yaml ``` +### Monitoring configuration + +This example shows how to [configure collection of Kubernetes control plane metrics](https://cloud.google.com/stackdriver/docs/solutions/gke/managing-metrics#enable-control-plane-metrics). The metrics for these components are not collected by default. + +> **Note** +> System metrics collection is pre-configured for Autopilot clusters and cannot be disabled. + +> **Warning** +> GKE **workload metrics** is deprecated and removed in GKE 1.24 and later. Workload metrics is replaced by [Google Cloud Managed Service for Prometheus](https://cloud.google.com/stackdriver/docs/managed-prometheus), which is Google's recommended way to monitor Kubernetes applications by using Cloud Monitoring. + +```hcl +module "cluster-1" { + source = "./fabric/modules/gke-cluster-autopilot" + project_id = var.project_id + name = "cluster-1" + location = "europe-west1" + vpc_config = { + network = var.vpc.self_link + subnetwork = var.subnet.self_link + } + monitoring_config = { + enable_api_server_metrics = true + enable_controller_manager_metrics = true + enable_scheduler_metrics = true + } +} +# tftest modules=1 resources=1 inventory=monitoring-config-control-plane.yaml +``` + ### Backup for GKE This example shows how to [enable the Backup for GKE agent and configure a Backup Plan](https://cloud.google.com/kubernetes-engine/docs/add-on/backup-for-gke/concepts/backup-for-gke) for GKE Standard clusters. @@ -120,9 +149,9 @@ module "cluster-1" { | name | description | type | required | default | |---|---|:---:|:---:|:---:| | [location](variables.tf#L110) | Autopilot cluster are always regional. | string | ✓ | | -| [name](variables.tf#L155) | Cluster name. | string | ✓ | | -| [project_id](variables.tf#L181) | Cluster project id. | string | ✓ | | -| [vpc_config](variables.tf#L209) | VPC-level configuration. | object({…}) | ✓ | | +| [name](variables.tf#L170) | Cluster name. | string | ✓ | | +| [project_id](variables.tf#L196) | Cluster project id. | string | ✓ | | +| [vpc_config](variables.tf#L224) | VPC-level configuration. | object({…}) | ✓ | | | [backup_configs](variables.tf#L17) | Configuration for Backup for GKE. | object({…}) | | {} | | [description](variables.tf#L37) | Cluster description. | string | | null | | [enable_addons](variables.tf#L43) | Addons enabled in the cluster (true means enabled). | object({…}) | | {…} | @@ -132,11 +161,12 @@ module "cluster-1" { | [logging_config](variables.tf#L115) | Logging configuration. | object({…}) | | {} | | [maintenance_config](variables.tf#L126) | Maintenance window configuration. | object({…}) | | {…} | | [min_master_version](variables.tf#L149) | Minimum version of the master, defaults to the version of the most recent official release. | string | | null | -| [node_locations](variables.tf#L160) | Zones in which the cluster's nodes are located. | list(string) | | [] | -| [private_cluster_config](variables.tf#L167) | Private cluster configuration. | object({…}) | | null | -| [release_channel](variables.tf#L186) | Release channel for GKE upgrades. Clusters created in the Autopilot mode must use a release channel. Choose between \"RAPID\", \"REGULAR\", and \"STABLE\". | string | | "REGULAR" | -| [service_account](variables.tf#L197) | The Google Cloud Platform Service Account to be used by the node VMs created by GKE Autopilot. | string | | null | -| [tags](variables.tf#L203) | Network tags applied to nodes. | list(string) | | null | +| [monitoring_config](variables.tf#L155) | Monitoring configuration. System metrics collection cannot be disabled for Autopilot clusters. Control plane metrics are optional. Google Cloud Managed Service for Prometheus is enabled by default. | object({…}) | | {} | +| [node_locations](variables.tf#L175) | Zones in which the cluster's nodes are located. | list(string) | | [] | +| [private_cluster_config](variables.tf#L182) | Private cluster configuration. | object({…}) | | null | +| [release_channel](variables.tf#L201) | Release channel for GKE upgrades. Clusters created in the Autopilot mode must use a release channel. Choose between \"RAPID\", \"REGULAR\", and \"STABLE\". | string | | "REGULAR" | +| [service_account](variables.tf#L212) | The Google Cloud Platform Service Account to be used by the node VMs created by GKE Autopilot. | string | | null | +| [tags](variables.tf#L218) | Network tags applied to nodes. | list(string) | | null | ## Outputs diff --git a/modules/gke-cluster-autopilot/main.tf b/modules/gke-cluster-autopilot/main.tf index 7ae4d044..330c4993 100644 --- a/modules/gke-cluster-autopilot/main.tf +++ b/modules/gke-cluster-autopilot/main.tf @@ -203,6 +203,20 @@ resource "google_container_cluster" "cluster" { } } + monitoring_config { + enable_components = toset(compact([ + # System metrics collection cannot be disabled for Autopilot clusters. + "SYSTEM_COMPONENTS", + # Control plane metrics. + var.monitoring_config.enable_api_server_metrics ? "APISERVER" : null, + var.monitoring_config.enable_controller_manager_metrics ? "CONTROLLER_MANAGER" : null, + var.monitoring_config.enable_scheduler_metrics ? "SCHEDULER" : null, + ])) + managed_prometheus { + enabled = var.monitoring_config.enable_managed_prometheus + } + } + dynamic "notification_config" { for_each = var.enable_features.upgrade_notifications != null ? [""] : [] content { @@ -305,7 +319,6 @@ resource "google_gke_backup_backup_plan" "backup_plan" { } } - resource "google_compute_network_peering_routes_config" "gke_master" { count = ( try(var.private_cluster_config.peering_config, null) != null ? 1 : 0 diff --git a/modules/gke-cluster-autopilot/variables.tf b/modules/gke-cluster-autopilot/variables.tf index 9a30b5bf..52896bbd 100644 --- a/modules/gke-cluster-autopilot/variables.tf +++ b/modules/gke-cluster-autopilot/variables.tf @@ -152,6 +152,21 @@ variable "min_master_version" { default = null } +variable "monitoring_config" { + description = "Monitoring configuration. System metrics collection cannot be disabled for Autopilot clusters. Control plane metrics are optional. Google Cloud Managed Service for Prometheus is enabled by default." + type = object({ + # Control plane metrics + enable_api_server_metrics = optional(bool, false) + enable_controller_manager_metrics = optional(bool, false) + enable_scheduler_metrics = optional(bool, false) + # Google Cloud Managed Service for Prometheus + # GKE Autopilot clusters running GKE version 1.25 or greater must have this on. + enable_managed_prometheus = optional(bool, true) + }) + default = {} + nullable = false +} + variable "name" { description = "Cluster name." type = string diff --git a/tests/modules/gke_cluster_autopilot/examples/monitoring-config-control-plane.yaml b/tests/modules/gke_cluster_autopilot/examples/monitoring-config-control-plane.yaml new file mode 100644 index 00000000..b3108770 --- /dev/null +++ b/tests/modules/gke_cluster_autopilot/examples/monitoring-config-control-plane.yaml @@ -0,0 +1,27 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +values: + module.cluster-1.google_container_cluster.cluster: + monitoring_config: + - enable_components: + - APISERVER + - CONTROLLER_MANAGER + - SCHEDULER + - SYSTEM_COMPONENTS + managed_prometheus: + - enabled: true + +counts: + google_container_cluster: 1