gke-cluster-autopilot: add monitoring configuration (#1646)
* gke-cluster-autopilot: add monitoring configuration block (monitoring_config)
This commit is contained in:
parent
42ebbccad9
commit
9f23d504ec
|
@ -87,6 +87,35 @@ module "cluster-1" {
|
|||
# tftest modules=1 resources=1 inventory=logging-config.yaml
|
||||
```
|
||||
|
||||
### Monitoring configuration
|
||||
|
||||
This example shows how to [configure collection of Kubernetes control plane metrics](https://cloud.google.com/stackdriver/docs/solutions/gke/managing-metrics#enable-control-plane-metrics). The metrics for these components are not collected by default.
|
||||
|
||||
> **Note**
|
||||
> System metrics collection is pre-configured for Autopilot clusters and cannot be disabled.
|
||||
|
||||
> **Warning**
|
||||
> GKE **workload metrics** is deprecated and removed in GKE 1.24 and later. Workload metrics is replaced by [Google Cloud Managed Service for Prometheus](https://cloud.google.com/stackdriver/docs/managed-prometheus), which is Google's recommended way to monitor Kubernetes applications by using Cloud Monitoring.
|
||||
|
||||
```hcl
|
||||
module "cluster-1" {
|
||||
source = "./fabric/modules/gke-cluster-autopilot"
|
||||
project_id = var.project_id
|
||||
name = "cluster-1"
|
||||
location = "europe-west1"
|
||||
vpc_config = {
|
||||
network = var.vpc.self_link
|
||||
subnetwork = var.subnet.self_link
|
||||
}
|
||||
monitoring_config = {
|
||||
enable_api_server_metrics = true
|
||||
enable_controller_manager_metrics = true
|
||||
enable_scheduler_metrics = true
|
||||
}
|
||||
}
|
||||
# tftest modules=1 resources=1 inventory=monitoring-config-control-plane.yaml
|
||||
```
|
||||
|
||||
### Backup for GKE
|
||||
|
||||
This example shows how to [enable the Backup for GKE agent and configure a Backup Plan](https://cloud.google.com/kubernetes-engine/docs/add-on/backup-for-gke/concepts/backup-for-gke) for GKE Standard clusters.
|
||||
|
@ -120,9 +149,9 @@ module "cluster-1" {
|
|||
| name | description | type | required | default |
|
||||
|---|---|:---:|:---:|:---:|
|
||||
| [location](variables.tf#L110) | Autopilot cluster are always regional. | <code>string</code> | ✓ | |
|
||||
| [name](variables.tf#L155) | Cluster name. | <code>string</code> | ✓ | |
|
||||
| [project_id](variables.tf#L181) | Cluster project id. | <code>string</code> | ✓ | |
|
||||
| [vpc_config](variables.tf#L209) | VPC-level configuration. | <code title="object({ network = string subnetwork = string master_ipv4_cidr_block = optional(string) secondary_range_blocks = optional(object({ pods = string services = string })) secondary_range_names = optional(object({ pods = string services = string }), { pods = "pods", services = "services" }) master_authorized_ranges = optional(map(string)) stack_type = optional(string) })">object({…})</code> | ✓ | |
|
||||
| [name](variables.tf#L170) | Cluster name. | <code>string</code> | ✓ | |
|
||||
| [project_id](variables.tf#L196) | Cluster project id. | <code>string</code> | ✓ | |
|
||||
| [vpc_config](variables.tf#L224) | VPC-level configuration. | <code title="object({ network = string subnetwork = string master_ipv4_cidr_block = optional(string) secondary_range_blocks = optional(object({ pods = string services = string })) secondary_range_names = optional(object({ pods = string services = string }), { pods = "pods", services = "services" }) master_authorized_ranges = optional(map(string)) stack_type = optional(string) })">object({…})</code> | ✓ | |
|
||||
| [backup_configs](variables.tf#L17) | Configuration for Backup for GKE. | <code title="object({ enable_backup_agent = optional(bool, false) backup_plans = optional(map(object({ encryption_key = optional(string) include_secrets = optional(bool, true) include_volume_data = optional(bool, true) namespaces = optional(list(string)) region = string schedule = string retention_policy_days = optional(string) retention_policy_lock = optional(bool, false) retention_policy_delete_lock_days = optional(string) })), {}) })">object({…})</code> | | <code>{}</code> |
|
||||
| [description](variables.tf#L37) | Cluster description. | <code>string</code> | | <code>null</code> |
|
||||
| [enable_addons](variables.tf#L43) | Addons enabled in the cluster (true means enabled). | <code title="object({ cloudrun = optional(bool, false) config_connector = optional(bool, false) dns_cache = optional(bool, false) horizontal_pod_autoscaling = optional(bool, false) http_load_balancing = optional(bool, false) istio = optional(object({ enable_tls = bool })) kalm = optional(bool, false) network_policy = optional(bool, false) })">object({…})</code> | | <code title="{ horizontal_pod_autoscaling = true http_load_balancing = true }">{…}</code> |
|
||||
|
@ -132,11 +161,12 @@ module "cluster-1" {
|
|||
| [logging_config](variables.tf#L115) | Logging configuration. | <code title="object({ enable_api_server_logs = optional(bool, false) enable_scheduler_logs = optional(bool, false) enable_controller_manager_logs = optional(bool, false) })">object({…})</code> | | <code>{}</code> |
|
||||
| [maintenance_config](variables.tf#L126) | Maintenance window configuration. | <code title="object({ daily_window_start_time = optional(string) recurring_window = optional(object({ start_time = string end_time = string recurrence = string })) maintenance_exclusions = optional(list(object({ name = string start_time = string end_time = string scope = optional(string) }))) })">object({…})</code> | | <code title="{ daily_window_start_time = "03:00" recurring_window = null maintenance_exclusion = [] }">{…}</code> |
|
||||
| [min_master_version](variables.tf#L149) | Minimum version of the master, defaults to the version of the most recent official release. | <code>string</code> | | <code>null</code> |
|
||||
| [node_locations](variables.tf#L160) | Zones in which the cluster's nodes are located. | <code>list(string)</code> | | <code>[]</code> |
|
||||
| [private_cluster_config](variables.tf#L167) | Private cluster configuration. | <code title="object({ enable_private_endpoint = optional(bool) master_global_access = optional(bool) peering_config = optional(object({ export_routes = optional(bool) import_routes = optional(bool) project_id = optional(string) })) })">object({…})</code> | | <code>null</code> |
|
||||
| [release_channel](variables.tf#L186) | Release channel for GKE upgrades. Clusters created in the Autopilot mode must use a release channel. Choose between \"RAPID\", \"REGULAR\", and \"STABLE\". | <code>string</code> | | <code>"REGULAR"</code> |
|
||||
| [service_account](variables.tf#L197) | The Google Cloud Platform Service Account to be used by the node VMs created by GKE Autopilot. | <code>string</code> | | <code>null</code> |
|
||||
| [tags](variables.tf#L203) | Network tags applied to nodes. | <code>list(string)</code> | | <code>null</code> |
|
||||
| [monitoring_config](variables.tf#L155) | Monitoring configuration. System metrics collection cannot be disabled for Autopilot clusters. Control plane metrics are optional. Google Cloud Managed Service for Prometheus is enabled by default. | <code title="object({ enable_api_server_metrics = optional(bool, false) enable_controller_manager_metrics = optional(bool, false) enable_scheduler_metrics = optional(bool, false) enable_managed_prometheus = optional(bool, true) })">object({…})</code> | | <code>{}</code> |
|
||||
| [node_locations](variables.tf#L175) | Zones in which the cluster's nodes are located. | <code>list(string)</code> | | <code>[]</code> |
|
||||
| [private_cluster_config](variables.tf#L182) | Private cluster configuration. | <code title="object({ enable_private_endpoint = optional(bool) master_global_access = optional(bool) peering_config = optional(object({ export_routes = optional(bool) import_routes = optional(bool) project_id = optional(string) })) })">object({…})</code> | | <code>null</code> |
|
||||
| [release_channel](variables.tf#L201) | Release channel for GKE upgrades. Clusters created in the Autopilot mode must use a release channel. Choose between \"RAPID\", \"REGULAR\", and \"STABLE\". | <code>string</code> | | <code>"REGULAR"</code> |
|
||||
| [service_account](variables.tf#L212) | The Google Cloud Platform Service Account to be used by the node VMs created by GKE Autopilot. | <code>string</code> | | <code>null</code> |
|
||||
| [tags](variables.tf#L218) | Network tags applied to nodes. | <code>list(string)</code> | | <code>null</code> |
|
||||
|
||||
## Outputs
|
||||
|
||||
|
|
|
@ -203,6 +203,20 @@ resource "google_container_cluster" "cluster" {
|
|||
}
|
||||
}
|
||||
|
||||
monitoring_config {
|
||||
enable_components = toset(compact([
|
||||
# System metrics collection cannot be disabled for Autopilot clusters.
|
||||
"SYSTEM_COMPONENTS",
|
||||
# Control plane metrics.
|
||||
var.monitoring_config.enable_api_server_metrics ? "APISERVER" : null,
|
||||
var.monitoring_config.enable_controller_manager_metrics ? "CONTROLLER_MANAGER" : null,
|
||||
var.monitoring_config.enable_scheduler_metrics ? "SCHEDULER" : null,
|
||||
]))
|
||||
managed_prometheus {
|
||||
enabled = var.monitoring_config.enable_managed_prometheus
|
||||
}
|
||||
}
|
||||
|
||||
dynamic "notification_config" {
|
||||
for_each = var.enable_features.upgrade_notifications != null ? [""] : []
|
||||
content {
|
||||
|
@ -305,7 +319,6 @@ resource "google_gke_backup_backup_plan" "backup_plan" {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
resource "google_compute_network_peering_routes_config" "gke_master" {
|
||||
count = (
|
||||
try(var.private_cluster_config.peering_config, null) != null ? 1 : 0
|
||||
|
|
|
@ -152,6 +152,21 @@ variable "min_master_version" {
|
|||
default = null
|
||||
}
|
||||
|
||||
variable "monitoring_config" {
|
||||
description = "Monitoring configuration. System metrics collection cannot be disabled for Autopilot clusters. Control plane metrics are optional. Google Cloud Managed Service for Prometheus is enabled by default."
|
||||
type = object({
|
||||
# Control plane metrics
|
||||
enable_api_server_metrics = optional(bool, false)
|
||||
enable_controller_manager_metrics = optional(bool, false)
|
||||
enable_scheduler_metrics = optional(bool, false)
|
||||
# Google Cloud Managed Service for Prometheus
|
||||
# GKE Autopilot clusters running GKE version 1.25 or greater must have this on.
|
||||
enable_managed_prometheus = optional(bool, true)
|
||||
})
|
||||
default = {}
|
||||
nullable = false
|
||||
}
|
||||
|
||||
variable "name" {
|
||||
description = "Cluster name."
|
||||
type = string
|
||||
|
|
|
@ -0,0 +1,27 @@
|
|||
# Copyright 2023 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
values:
|
||||
module.cluster-1.google_container_cluster.cluster:
|
||||
monitoring_config:
|
||||
- enable_components:
|
||||
- APISERVER
|
||||
- CONTROLLER_MANAGER
|
||||
- SCHEDULER
|
||||
- SYSTEM_COMPONENTS
|
||||
managed_prometheus:
|
||||
- enabled: true
|
||||
|
||||
counts:
|
||||
google_container_cluster: 1
|
Loading…
Reference in New Issue