diff --git a/.gitignore b/.gitignore index 91778178..ed88b00d 100644 --- a/.gitignore +++ b/.gitignore @@ -49,3 +49,8 @@ blueprints/apigee/hybrid-gke/apiproxy.zip blueprints/apigee/hybrid-gke/deploy-apiproxy.sh blueprints/apigee/hybrid-gke/ansible/gssh.sh blueprints/apigee/hybrid-gke/ansible/vars/vars.yaml +blueprints/gke/autopilot/ansible/gssh.sh +blueprints/gke/autopilot/ansible/vars/vars.yaml +blueprints/gke/autopilot/bundle/monitoring/kustomization.yaml +blueprints/gke/autopilot/bundle/locust/kustomization.yaml +blueprints/gke/autopilot/bundle.tar.gz \ No newline at end of file diff --git a/blueprints/README.md b/blueprints/README.md index a9867ce7..24610e1b 100644 --- a/blueprints/README.md +++ b/blueprints/README.md @@ -8,7 +8,7 @@ Currently available blueprints: - **cloud operations** - [Active Directory Federation Services](./cloud-operations/adfs), [Cloud Asset Inventory feeds for resource change tracking and remediation](./cloud-operations/asset-inventory-feed-remediation), [Fine-grained Cloud DNS IAM via Service Directory](./cloud-operations/dns-fine-grained-iam), [Cloud DNS & Shared VPC design](./cloud-operations/dns-shared-vpc), [Delegated Role Grants](./cloud-operations/iam-delegated-role-grants), [Networking Dashboard](./cloud-operations/network-dashboard), [Managing on-prem service account keys by uploading public keys](./cloud-operations/onprem-sa-key-management), [Compute Image builder with Hashicorp Packer](./cloud-operations/packer-image-builder), [Packer example](./cloud-operations/packer-image-builder/packer), [Compute Engine quota monitoring](./cloud-operations/quota-monitoring), [Scheduled Cloud Asset Inventory Export to Bigquery](./cloud-operations/scheduled-asset-inventory-export-bq), [Configuring workload identity federation with Terraform Cloud/Enterprise workflows](./cloud-operations/terraform-cloud-dynamic-credentials), [TCP healthcheck and restart for unmanaged GCE instances](./cloud-operations/unmanaged-instances-healthcheck), [Migrate for Compute Engine (v5) blueprints](./cloud-operations/vm-migration), [Configuring workload identity federation to access Google Cloud resources from apps running on Azure](./cloud-operations/workload-identity-federation) - **data solutions** - [GCE and GCS CMEK via centralized Cloud KMS](./data-solutions/cmek-via-centralized-kms), [Cloud Composer version 2 private instance, supporting Shared VPC and external CMEK key](./data-solutions/composer-2), [Cloud SQL instance with multi-region read replicas](./data-solutions/cloudsql-multiregion), [Data Platform](./data-solutions/data-platform-foundations), [Spinning up a foundation data pipeline on Google Cloud using Cloud Storage, Dataflow and BigQuery](./data-solutions/gcs-to-bq-with-least-privileges), [#SQL Server Always On Groups blueprint](./data-solutions/sqlserver-alwayson), [Data Playground](./data-solutions/data-playground), [MLOps with Vertex AI](./data-solutions/vertex-mlops), [Shielded Folder](./data-solutions/shielded-folder), [BigQuery ML and Vertex AI Pipeline](./data-solutions/bq-ml) - **factories** - [The why and the how of Resource Factories](./factories), [Google Cloud Identity Group Factory](./factories/cloud-identity-group-factory), [Google Cloud BQ Factory](./factories/bigquery-factory), [Google Cloud VPC Firewall Factory](./factories/net-vpc-firewall-yaml), [Minimal Project Factory](./factories/project-factory) -- **GKE** - [Binary Authorization Pipeline Blueprint](./gke/binauthz), [Storage API](./gke/binauthz/image), [Multi-cluster mesh on GKE (fleet API)](./gke/multi-cluster-mesh-gke-fleet-api), [GKE Multitenant Blueprint](./gke/multitenant-fleet), [Shared VPC with GKE support](./networking/shared-vpc-gke/) +- **GKE** - [Binary Authorization Pipeline Blueprint](./gke/binauthz), [Storage API](./gke/binauthz/image), [Multi-cluster mesh on GKE (fleet API)](./gke/multi-cluster-mesh-gke-fleet-api), [GKE Multitenant Blueprint](./gke/multitenant-fleet), [Shared VPC with GKE support](./networking/shared-vpc-gke/), [GKE Autopilot](./gke/autopilot) - **networking** - [Calling a private Cloud Function from On-premises](./networking/private-cloud-function-from-onprem), [Decentralized firewall management](./networking/decentralized-firewall), [Decentralized firewall validator](./networking/decentralized-firewall/validator), [Network filtering with Squid](./networking/filtering-proxy), [GLB and multi-regional daisy-chaining through hybrid NEGs](./networking/glb-hybrid-neg-internal), [Hybrid connectivity to on-premise services through PSC](./networking/psc-hybrid), [HTTP Load Balancer with Cloud Armor](./networking/glb-and-armor), [Hub and Spoke via VPN](./networking/hub-and-spoke-vpn), [Hub and Spoke via VPC Peering](./networking/hub-and-spoke-peering), [Internal Load Balancer as Next Hop](./networking/ilb-next-hop), [Network filtering with Squid with isolated VPCs using Private Service Connect](./networking/filtering-proxy-psc), On-prem DNS and Google Private Access, [PSC Producer](./networking/psc-hybrid/psc-producer), [PSC Consumer](./networking/psc-hybrid/psc-consumer), [Shared VPC with optional GKE cluster](./networking/shared-vpc-gke) - **serverless** - [Creating multi-region deployments for API Gateway](./serverless/api-gateway), [Cloud Run series](./serverless/cloud-run-explore) - **third party solutions** - [OpenShift on GCP user-provisioned infrastructure](./third-party-solutions/openshift), [Wordpress deployment on Cloud Run](./third-party-solutions/wordpress/cloudrun) diff --git a/blueprints/data-solutions/bq-ml/README.md b/blueprints/data-solutions/bq-ml/README.md index 39402b91..e1a114d8 100644 --- a/blueprints/data-solutions/bq-ml/README.md +++ b/blueprints/data-solutions/bq-ml/README.md @@ -98,5 +98,5 @@ module "test" { prefix = "prefix" } -# tftest modules=9 resources=46 +# tftest modules=9 resources=47 ``` diff --git a/blueprints/data-solutions/bq-ml/demo/bmql_pipeline.ipynb b/blueprints/data-solutions/bq-ml/demo/bmql_pipeline.ipynb index 4d3f5b53..aa494da5 100644 --- a/blueprints/data-solutions/bq-ml/demo/bmql_pipeline.ipynb +++ b/blueprints/data-solutions/bq-ml/demo/bmql_pipeline.ipynb @@ -73,15 +73,24 @@ "metadata": {}, "outputs": [], "source": [ + "# Set your variables\n", + "PREFIX = 'your-prefix'\n", + "PROJECT_ID = 'your-project-id'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "DATASET = \"{}_data\".format(PREFIX.replace(\"-\",\"_\")) \n", "EXPERIMENT_NAME = 'bqml-experiment'\n", "ENDPOINT_DISPLAY_NAME = 'bqml-endpoint'\n", - "DATASET = \"{}_data\".format(PREFIX.replace(\"-\",\"_\")) \n", "LOCATION = 'US'\n", "MODEL_NAME = 'bqml-model'\n", "PIPELINE_NAME = 'bqml-vertex-pipeline'\n", "PIPELINE_ROOT = f\"gs://{PREFIX}-data\"\n", - "PREFIX = 'your-prefix'\n", - "PROJECT_ID = 'your-project-id'\n", "REGION = 'us-central1'\n", "SERVICE_ACCOUNT = f\"vertex-sa@{PROJECT_ID}.iam.gserviceaccount.com\"" ] @@ -227,7 +236,6 @@ " project=project_id,\n", " location=location,\n", " query=features_query.format(dataset=dataset, project_id=project_id),\n", - " #job_configuration_query = {\"writeDisposition\": \"WRITE_TRUNCATE\"} #, \"destinationTable\":{\"projectId\":project_id,\"datasetId\":dataset,\"tableId\":\"ecommerce_abt_table\"}} #{\"destinationTable\":{\"projectId\":\"project_id\",\"datasetId\":dataset,\"tableId\":\"ecommerce_abt_table\"}}, #\"writeDisposition\": \"WRITE_TRUNCATE\", \n", "\n", " ).after(create_dataset)\n", "\n", @@ -408,9 +416,6 @@ "source": [ "# batch prediction on BigQuery\n", "\n", - "with open(\"sql/explain_predict.sql\") as file:\n", - " explain_predict_query = file.read()\n", - "\n", "client = bigquery_client = bigquery.Client(location=LOCATION, project=PROJECT_ID)\n", "batch_predictions = bigquery_client.query(\n", " explain_predict_query.format(\n", diff --git a/blueprints/data-solutions/data-playground/README.md b/blueprints/data-solutions/data-playground/README.md index 2e719445..6691e496 100644 --- a/blueprints/data-solutions/data-playground/README.md +++ b/blueprints/data-solutions/data-playground/README.md @@ -17,30 +17,35 @@ This sample creates several distinct groups of resources: - One BigQuery dataset ## Virtual Private Cloud (VPC) design + As is often the case in real-world configurations, this blueprint accepts as input an existing Shared-VPC via the network_config variable. Make sure that 'container.googleapis.com', 'notebooks.googleapis.com' and 'servicenetworking.googleapis.com' are enabled in the VPC host project. If the network_config variable is not provided, one VPC will be created in each project that supports network resources (load, transformation and orchestration). ## Deploy your enviroment + We assume the identiy running the following steps has the following role: - resourcemanager.projectCreator in case a new project will be created. - owner on the project in case you use an existing project. Run Terraform init: + ``` -$ terraform init +terraform init ``` Configure the Terraform variable in your terraform.tfvars file. You need to spefify at least the following variables: + ``` prefix = "prefix" project_id = "data-001" ``` You can run now: + ``` -$ terraform apply +terraform apply ``` You can now connect to the Vertex AI notbook to perform your data analysy. @@ -81,5 +86,5 @@ module "test" { parent = "folders/467898377" } } -# tftest modules=8 resources=39 +# tftest modules=8 resources=40 ``` diff --git a/blueprints/data-solutions/vertex-mlops/README.md b/blueprints/data-solutions/vertex-mlops/README.md index 8bb3043e..a204fee8 100644 --- a/blueprints/data-solutions/vertex-mlops/README.md +++ b/blueprints/data-solutions/vertex-mlops/README.md @@ -1,20 +1,23 @@ # MLOps with Vertex AI ## Introduction -This example implements the infrastructure required to deploy an end-to-end [MLOps process](https://services.google.com/fh/files/misc/practitioners_guide_to_mlops_whitepaper.pdf) using [Vertex AI](https://cloud.google.com/vertex-ai) platform. -## GCP resources +This example implements the infrastructure required to deploy an end-to-end [MLOps process](https://services.google.com/fh/files/misc/practitioners_guide_to_mlops_whitepaper.pdf) using [Vertex AI](https://cloud.google.com/vertex-ai) platform. + +## GCP resources + The blueprint will deploy all the required resources to have a fully functional MLOPs environment containing: + - Vertex Workbench (for the experimentation environment) - GCP Project (optional) to host all the resources -- Isolated VPC network and a subnet to be used by Vertex and Dataflow. Alternatively, an external Shared VPC can be configured using the `network_config`variable. +- Isolated VPC network and a subnet to be used by Vertex and Dataflow. Alternatively, an external Shared VPC can be configured using the `network_config`variable. - Firewall rule to allow the internal subnet communication required by Dataflow - Cloud NAT required to reach the internet from the different computing resources (Vertex and Dataflow) - GCS buckets to host Vertex AI and Cloud Build Artifacts. By default the buckets will be regional and should match the Vertex AI region for the different resources (i.e. Vertex Managed Dataset) and processes (i.e. Vertex trainining) - BigQuery Dataset where the training data will be stored. This is optional, since the training data could be already hosted in an existing BigQuery dataset. - Artifact Registry Docker repository to host the custom images. - Service account (`mlops-[env]@`) with the minimum permissions required by Vertex AI and Dataflow (if this service is used inside of the Vertex AI Pipeline). -- Service account (`github@`) to be used by Workload Identity Federation, to federate Github identity (Optional). +- Service account (`github@`) to be used by Workload Identity Federation, to federate Github identity (Optional). - Secret to store the Github SSH key to get access the CICD code repo. ![MLOps project description](./images/mlops_projects.png "MLOps project description") @@ -28,13 +31,14 @@ Assign roles relying on User groups is a way to decouple the final set of permis We use the following groups to control access to resources: - *Data Scientits* (gcp-ml-ds@). They manage notebooks and create ML pipelines. -- *ML Engineers* (gcp-ml-eng@). They manage the different Vertex resources. -- *ML Viewer* (gcp-ml-eng@). Group with wiewer permission for the different resources. +- *ML Engineers* (gcp-ml-eng@). They manage the different Vertex resources. +- *ML Viewer* (gcp-ml-eng@). Group with wiewer permission for the different resources. Please note that these groups are not suitable for production grade environments. Roles can be customized in the `main.tf`file. -## Instructions -### Deploy the experimentation environment +## Instructions + +### Deploy the experimentation environment - Create a `terraform.tfvars` file and specify the variables to match your desired configuration. You can use the provided `terraform.tfvars.sample` as reference. - Run `terraform init` and `terraform apply` @@ -76,6 +80,7 @@ This blueprint can be used as a building block for setting up an end2end ML Ops ## TODO + - Add support for User Managed Notebooks, SA permission option and non default SA for Single User mode. - Improve default naming for local VPC and Cloud NAT @@ -105,5 +110,5 @@ module "test" { parent = "folders/111111111111" } } -# tftest modules=12 resources=56 +# tftest modules=12 resources=57 ``` diff --git a/blueprints/gke/README.md b/blueprints/gke/README.md index 30418ca4..acba2a8f 100644 --- a/blueprints/gke/README.md +++ b/blueprints/gke/README.md @@ -21,6 +21,7 @@ They are meant to be used as minimal but complete starting points to create actu ### Multitenant GKE fleet This [blueprint](./multitenant-fleet/) allows simple centralized management of similar sets of GKE clusters and their nodepools in a single project, and optional fleet management via GKE Hub templated configurations. +
### Shared VPC with GKE and per-subnet support @@ -30,3 +31,9 @@ They are meant to be used as minimal but complete starting points to create actu It is meant to be used as a starting point for most Shared VPC configurations, and to be integrated to the above blueprints where Shared VPC is needed in more complex network topologies.
+ +### Autopilot + + This [blueprint](./autopilot) creates an Autopilot cluster with Google-managed Prometheus enabled and installs an application that scales as the traffic that is hitting the load balancer exposing it grows. + +
diff --git a/blueprints/gke/autopilot/README.md b/blueprints/gke/autopilot/README.md new file mode 100644 index 00000000..02178e5f --- /dev/null +++ b/blueprints/gke/autopilot/README.md @@ -0,0 +1,94 @@ +# Load testing an application running on an autopilot cluster + +This blueprint creates an Autopilot cluster with Google-managed Prometheus enabled and install an application that scales as the traffic that is hitting the load balancer exposing it grows. It also installs the tooling required to distributed load test with [locust](https://locust.io) on that application and the monitoring tooling required to observe how things evolve in the cluster during the load test. Ansible is used to install the application and all the tooling on a management VM. + +The diagram below depicts the architecture. + +![Diagram](./diagram.png) + +## Running the blueprint + +1. Clone this repository or [open it in cloud shell](https://ssh.cloud.google.com/cloudshell/editor?cloudshell_git_repo=https%3A%2F%2Fgithub.com%2Fterraform-google-modules%2Fcloud-foundation-fabric&cloudshell_print=cloud-shell-readme.txt&cloudshell_working_dir=blueprints%2Fgke%2Fautopilot), then go through the following steps to create resources: + +2. Initialize the terraform configuration + + ``` + terraform init + ``` + +3. Apply the terraform configuration + + ``` + terraform apply -var project_id=my-project-id + ``` + +4. Copy the IP addresses for grafana, the locust master. + +4. Change to the ansible directory and run the following command + + ``` + ansible-playbook -v playbook.yaml + ``` + +5. Open to the locust master web interface url in your browser and start the load test + + +6. SSH to the management VM + + ``` + gcloud compute ssh mgmt --project my-project + ``` + +7. Run the following command to check that the application pods are running on different nodes than the load testing and monitoring tooling. + + ``` + kubectl get pods -A -o wide + ``` + +8. Run the following command to see how the application pods scale + + ``` + kubectl get hpa -n sample -w + ``` + +9. Run the following command to see how the cluster nodes scale + + ``` + kubectl get nodes -n + ``` + +Alternatively you can also check all the above using the dashboards available in grafana. + + +## Variables + +| name | description | type | required | default | +|---|---|:---:|:---:|:---:| +| [project_id](variables.tf#L75) | Project ID. | string | ✓ | | +| [cluster_network_config](variables.tf#L17) | Cluster network configuration. | object({…}) | | {…} | +| [mgmt_server_config](variables.tf#L37) | Management server configuration. | object({…}) | | {…} | +| [mgmt_subnet_cidr_block](variables.tf#L53) | Management subnet IP CIDR range. | string | | "10.0.2.0/24" | +| [network](variables.tf#L59) | VPC name. | string | | "vpc" | +| [project_create](variables.tf#L66) | Parameters for the creation of the new project. | object({…}) | | null | +| [region](variables.tf#L80) | Region. | string | | "europe-west1" | + +## Outputs + +| name | description | sensitive | +|---|---|:---:| +| [urls](outputs.tf#L17) | Grafanam, locust and application URLs. | | + + +## Test + +```hcl +module "test" { + source = "./fabric/blueprints/gke/autopilot" + project_create = { + billing_account_id = "12345-12345-12345" + parent = "folders/123456789" + } + project_id = "my-project" +} +# tftest modules=10 resources=30 +``` \ No newline at end of file diff --git a/blueprints/gke/autopilot/ansible.tf b/blueprints/gke/autopilot/ansible.tf new file mode 100644 index 00000000..393d6009 --- /dev/null +++ b/blueprints/gke/autopilot/ansible.tf @@ -0,0 +1,37 @@ +/** + * Copyright 2023 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +# tfdoc:file:description Ansible generated files. + +resource "local_file" "vars_file" { + content = yamlencode({ + cluster = module.cluster.name + region = var.region + project_id = module.project.project_id + app_url = local.urls["app"] + }) + filename = "${path.module}/ansible/vars/vars.yaml" + file_permission = "0666" +} + +resource "local_file" "gssh_file" { + content = templatefile("${path.module}/templates/gssh.sh.tpl", { + project_id = module.project.project_id + zone = local.zone + }) + filename = "${path.module}/ansible/gssh.sh" + file_permission = "0777" +} diff --git a/blueprints/gke/autopilot/ansible/ansible.cfg b/blueprints/gke/autopilot/ansible/ansible.cfg new file mode 100644 index 00000000..654f1729 --- /dev/null +++ b/blueprints/gke/autopilot/ansible/ansible.cfg @@ -0,0 +1,8 @@ +[defaults] +inventory = inventory/hosts.ini +timeout = 900 + +[ssh_connection] +pipelining = True +ssh_executable = ./gssh.sh +transfer_method = piped \ No newline at end of file diff --git a/blueprints/gke/autopilot/ansible/inventory/hosts.ini b/blueprints/gke/autopilot/ansible/inventory/hosts.ini new file mode 100644 index 00000000..842da83f --- /dev/null +++ b/blueprints/gke/autopilot/ansible/inventory/hosts.ini @@ -0,0 +1 @@ +mgmt \ No newline at end of file diff --git a/blueprints/gke/autopilot/ansible/playbook.yaml b/blueprints/gke/autopilot/ansible/playbook.yaml new file mode 100644 index 00000000..ad30859c --- /dev/null +++ b/blueprints/gke/autopilot/ansible/playbook.yaml @@ -0,0 +1,128 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +- hosts: mgmt + gather_facts: "no" + vars_files: + - vars/vars.yaml + environment: + USE_GKE_GCLOUD_AUTH_PLUGIN: True + tasks: + - name: Download the Google Cloud SDK package repository signing key + get_url: + url: https://packages.cloud.google.com/apt/doc/apt-key.gpg + dest: /usr/share/keyrings/cloud.google.gpg + force: yes + become: true + become_user: root + - name: Add Google Cloud SDK package repository source + apt_repository: + filename: google-cloud-sdk + repo: "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" + state: present + update_cache: yes + become: true + become_user: root + - name: Install dependencies + apt: + pkg: + - google-cloud-sdk-gke-gcloud-auth-plugin + - kubectl + state: present + become: true + become_user: root + - name: Enable bash completion for kubectl + shell: + cmd: kubectl completion bash > /etc/bash_completion.d/kubectl + creates: /etc/bash_completion.d/kubectl + become: true + become_user: root + - name: Get cluster credentials + shell: > + gcloud container clusters get-credentials {{ cluster }} + --region {{ region }} + --project {{ project_id }} + --internal-ip + - name: Render templates + template: + src: ../bundle/{{ item }}/kustomization.yaml.j2 + dest: ../bundle/{{ item }}/kustomization.yaml + delegate_to: localhost + with_items: + - monitoring + - locust + - name: Remove bundle locally + local_action: + module: file + path: ../bundle.tar.gz + state: absent + - name: Archive bundle locally + archive: + path: ../bundle + dest: ../bundle.tar.gz + delegate_to: localhost + - name: Unarchive bundle remotely + unarchive: + src: ../bundle.tar.gz + dest: ~/ + - name: Build locust image + shell: > + gcloud builds submit --tag {{ region }}-docker.pkg.dev/{{ project_id }}/registry/load-test:latest \ + --project {{ project_id }} . + args: + chdir: ~/bundle/locust/image + - name: Enable scraping of kubelet and cAdvisor metrics + shell: > + kubectl patch operatorconfig config + -n gmp-public + --type=merge + -p '{"collection":{"kubeletScraping":{"interval": "30s"}}}' + - name: Deploy monitoring tooling + shell: > + kubectl apply -k . + args: + chdir: ~/bundle/monitoring + - name: Deploy app + shell: > + kubectl apply -k . + args: + chdir: ~/bundle/app + - name: Get forwarding rule name + shell: > + while true; do + forwarding_rule_name=$(kubectl get ingress -n sample -o=jsonpath='{.items[0].metadata.annotations.ingress\.kubernetes\.io\/forwarding-rule}') + if [ -n "$forwarding_rule_name" ]; then + echo $forwarding_rule_name + break + fi + sleep 10 + done + register: forwarding_rule_name_output + - name: Set fact forwarding_url_name + set_fact: + forwarding_rule_name: "{{ forwarding_rule_name_output.stdout }}" + - name: Render template (HPA) + template: + src: ../bundle/app/hpa.yaml.j2 + dest: ~/bundle/app/hpa.yaml + - name: Apply HPA manifest + shell: > + kubectl apply -f hpa.yaml + args: + chdir: ~/bundle/app + - name: Deploy locust + shell: > + kubectl apply -k . + args: + chdir: ~/bundle/locust diff --git a/blueprints/gke/autopilot/bundle/app/hpa.yaml.j2 b/blueprints/gke/autopilot/bundle/app/hpa.yaml.j2 new file mode 100644 index 00000000..54a12b4b --- /dev/null +++ b/blueprints/gke/autopilot/bundle/app/hpa.yaml.j2 @@ -0,0 +1,37 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: nginx + namespace: sample +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: nginx + minReplicas: 1 + maxReplicas: 50 + metrics: + - type: External + external: + metric: + name: loadbalancing.googleapis.com|https|request_count + selector: + matchLabels: + resource.labels.forwarding_rule_name: {{ forwarding_rule_name }} + target: + type: AverageValue + averageValue: 5 \ No newline at end of file diff --git a/blueprints/gke/autopilot/bundle/app/ingress.yaml b/blueprints/gke/autopilot/bundle/app/ingress.yaml new file mode 100644 index 00000000..05910240 --- /dev/null +++ b/blueprints/gke/autopilot/bundle/app/ingress.yaml @@ -0,0 +1,42 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: cloud.google.com/v1 +kind: BackendConfig +metadata: + name: backendconfig + namespace: sample +spec: + healthCheck: + requestPath: / + port: 80 + type: HTTP + logging: + enable: true + sampleRate: 0.5 +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + annotations: + kubernetes.io/ingress.global-static-ip-name: "app" + kubernetes.io/ingress.allow-http: "true" + name: ingress + namespace: sample +spec: + defaultBackend: + service: + name: nginx + port: + name: web \ No newline at end of file diff --git a/blueprints/gke/autopilot/bundle/app/kustomization.yaml b/blueprints/gke/autopilot/bundle/app/kustomization.yaml new file mode 100644 index 00000000..cb074c01 --- /dev/null +++ b/blueprints/gke/autopilot/bundle/app/kustomization.yaml @@ -0,0 +1,18 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https:#www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +resources: + - namespace.yaml + - nginx.yaml + - ingress.yaml diff --git a/blueprints/gke/autopilot/bundle/app/namespace.yaml b/blueprints/gke/autopilot/bundle/app/namespace.yaml new file mode 100644 index 00000000..60510a40 --- /dev/null +++ b/blueprints/gke/autopilot/bundle/app/namespace.yaml @@ -0,0 +1,18 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: v1 +kind: Namespace +metadata: + name: sample \ No newline at end of file diff --git a/blueprints/gke/autopilot/bundle/app/nginx.yaml b/blueprints/gke/autopilot/bundle/app/nginx.yaml new file mode 100644 index 00000000..48a9d2c1 --- /dev/null +++ b/blueprints/gke/autopilot/bundle/app/nginx.yaml @@ -0,0 +1,127 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: v1 +kind: ConfigMap +metadata: + name: nginx-config + namespace: sample +data: + nginx.conf: | + events {} + http { + server { + listen 80; + root /var/www/html; + location / { + return 200 'Hello World!'; + } + } + server { + listen 8080; + location /stub_status { + stub_status on; + } + } + } +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: nginx + namespace: sample +spec: + replicas: 1 + selector: + matchLabels: + app: nginx + template: + metadata: + labels: + app: nginx + spec: + containers: + - name: nginx + image: nginx:latest + ports: + - containerPort: 80 + name: web + - containerPort: 8080 + name: status + volumeMounts: + - name: nginx-config + mountPath: /etc/nginx/nginx.conf + subPath: nginx.conf + readinessProbe: + httpGet: + path: /stub_status + port: 8080 + initialDelaySeconds: 2 + periodSeconds: 2 + failureThreshold: 1 + requests: + cpu: 10m + memory: 10Mi + limits: + memory: 10Mi + - name: nginx-prometheus-exporter + image: nginx/nginx-prometheus-exporter:0.10.0 + ports: + - containerPort: 9113 + name: metrics + env: + - name: SCRAPE_URI + value: http://localhost:8080/stub_status + requests: + cpu: 5m + memory: 5Mi + limits: + memory: 5Mi + volumes: + - name: nginx-config + configMap: + name: nginx-config +--- +apiVersion: v1 +kind: Service +metadata: + name: nginx + namespace: sample + annotations: + annotations: + cloud.google.com/neg: '{"ingress": true}' + cloud.google.com/app-protocols: '{"web":"HTTP"}' + cloud.google.com/backend-config: '{"default": "backendconfig"}' + labels: + app: nginx +spec: + ports: + - name: web + port: 80 + protocol: TCP + selector: + app: nginx +--- +apiVersion: monitoring.googleapis.com/v1 +kind: ClusterPodMonitoring +metadata: + name: nginx + namespace: sample +spec: + selector: + matchLabels: + app: nginx + endpoints: + - port: metrics + interval: 30s \ No newline at end of file diff --git a/blueprints/gke/autopilot/bundle/locust/image/Dockerfile b/blueprints/gke/autopilot/bundle/locust/image/Dockerfile new file mode 100644 index 00000000..85e7a177 --- /dev/null +++ b/blueprints/gke/autopilot/bundle/locust/image/Dockerfile @@ -0,0 +1,21 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +FROM locustio/locust:latest + +ADD locust-files /home/locust/locust-files + +ADD run.sh /home/locust/run.sh + +ENTRYPOINT ["/home/locust/run.sh"] \ No newline at end of file diff --git a/blueprints/gke/autopilot/bundle/locust/image/locust-files/test.py b/blueprints/gke/autopilot/bundle/locust/image/locust-files/test.py new file mode 100644 index 00000000..def61168 --- /dev/null +++ b/blueprints/gke/autopilot/bundle/locust/image/locust-files/test.py @@ -0,0 +1,65 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import os +from locust import HttpUser, LoadTestShape, task, between + + +class TestUser(HttpUser): + + host = os.getenv("URL", "http://nginx.sample.svc.cluster.local") + + wait_time = between(int(os.getenv('MIN_WAIT_TIME'), 1), + int(os.getenv('MAX_WAIT_TIME'), 2)) + + @task + def home(self): + with self.client.get("/", catch_response=True) as response: + if response.status_code == 200: + response.success() + else: + logging.info('Response code is ' + str(response.status_code)) + + +class CustomLoadShape(LoadTestShape): + + stages = [] + + num_stages = int(os.getenv('NUM_STAGES', 20)) + stage_duration = int(os.getenv('STAGE_DURATION', 60)) + spawn_rate = int(os.getenv('SPAWN_RATE', 1)) + new_users_per_stage = int(os.getenv('NEW_USERS_PER_STAGE', 10)) + + for i in range(1, num_stages + 1): + stages.append({ + 'duration': stage_duration * i, + 'users': new_users_per_stage * i, + 'spawn_rate': spawn_rate + }) + + for i in range(1, num_stages): + stages.append({ + 'duration': stage_duration * (num_stages + i), + 'users': new_users_per_stage * (num_stages - i), + 'spawn_rate': spawn_rate + }) + + def tick(self): + run_time = self.get_run_time() + for stage in self.stages: + if run_time < stage['duration']: + tick_data = (stage['users'], stage['spawn_rate']) + return tick_data + return None diff --git a/blueprints/gke/autopilot/bundle/locust/image/run.sh b/blueprints/gke/autopilot/bundle/locust/image/run.sh new file mode 100755 index 00000000..36c3ee18 --- /dev/null +++ b/blueprints/gke/autopilot/bundle/locust/image/run.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +LOCUS_OPTS="-f /home/locust/locust-files" +LOCUST_MODE=${LOCUST_MODE:-standalone} + +if [[ "$LOCUST_MODE" = "master" ]]; then + LOCUS_OPTS="$LOCUS_OPTS --master" +elif [[ "$LOCUST_MODE" = "worker" ]]; then + LOCUS_OPTS="$LOCUS_OPTS --worker --master-host=$LOCUST_MASTER" +fi + +locust $LOCUS_OPTS \ No newline at end of file diff --git a/blueprints/gke/autopilot/bundle/locust/ingress.yaml b/blueprints/gke/autopilot/bundle/locust/ingress.yaml new file mode 100644 index 00000000..8db7a1a4 --- /dev/null +++ b/blueprints/gke/autopilot/bundle/locust/ingress.yaml @@ -0,0 +1,42 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: cloud.google.com/v1 +kind: BackendConfig +metadata: + name: backendconfig + namespace: locust +spec: + healthCheck: + requestPath: / + port: 8089 + type: HTTP + logging: + enable: true + sampleRate: 0.5 +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: ingress + namespace: locust + annotations: + kubernetes.io/ingress.global-static-ip-name: "locust" + kubernetes.io/ingress.allow-http: "true" +spec: + defaultBackend: + service: + name: locust-master-web + port: + name: loc-master-web \ No newline at end of file diff --git a/blueprints/gke/autopilot/bundle/locust/kustomization.yaml.j2 b/blueprints/gke/autopilot/bundle/locust/kustomization.yaml.j2 new file mode 100755 index 00000000..d1bafc55 --- /dev/null +++ b/blueprints/gke/autopilot/bundle/locust/kustomization.yaml.j2 @@ -0,0 +1,66 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +resources: + - namespace.yaml + - master.yaml + - workers.yaml + - ingress.yaml +patches: + - target: + group: apps + version: v1 + kind: Deployment + name: locust-master + namespace: locust + patch: |- + apiVersion: apps/v1 + kind: Deployment + metadata: + name: locust-master + namespace: locust + spec: + template: + spec: + containers: + - name: locust-master + image: load-test-image + env: + - name: URL + value: {{ app_url }} + - target: + group: apps + version: v1 + kind: Deployment + name: locust-worker + namespace: locust + patch: |- + apiVersion: apps/v1 + kind: Deployment + metadata: + name: locust-worker + namespace: locust + spec: + template: + spec: + containers: + - name: locust-master + image: load-test-image + env: + - name: URL + value: {{ app_url }} +images: + - name: load-test-image + newName: {{ region }}-docker.pkg.dev/{{ project_id}}/registry/load-test + newTag: latest \ No newline at end of file diff --git a/blueprints/gke/autopilot/bundle/locust/master.yaml b/blueprints/gke/autopilot/bundle/locust/master.yaml new file mode 100644 index 00000000..89bc3c02 --- /dev/null +++ b/blueprints/gke/autopilot/bundle/locust/master.yaml @@ -0,0 +1,128 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: "apps/v1" +kind: "Deployment" +metadata: + name: locust-master + namespace: locust + labels: + name: locust-master +spec: + replicas: 1 + selector: + matchLabels: + app: locust-master + template: + metadata: + labels: + app: locust-master + spec: + tolerations: + - key: group + operator: Equal + value: "locust" + effect: NoSchedule + nodeSelector: + group: "locust" + containers: + - name: locust-master + image: load-test-image + env: + - name: LOCUST_MODE + value: master + ports: + - name: loc-master-web + containerPort: 8089 + protocol: TCP + - name: loc-master-p1 + containerPort: 5557 + protocol: TCP + - name: loc-master-p2 + containerPort: 5558 + protocol: TCP + resources: + requests: + cpu: 50m + memory: 50Mi + limits: + memory: 50Mi + - name: locust-prometheus-exporter + image: containersol/locust_exporter + ports: + - name: metrics + containerPort: 9646 + resources: + requests: + cpu: 5m + memory: 5Mi + limits: + memory: 5Mi +--- +kind: Service +apiVersion: v1 +metadata: + name: locust-master + namespace: locust + labels: + app: locust-master +spec: + ports: + - port: 5557 + targetPort: loc-master-p1 + protocol: TCP + name: loc-master-p1 + - port: 5558 + targetPort: loc-master-p2 + protocol: TCP + name: loc-master-p2 + - port: 9646 + targetPort: metrics + protocol: TCP + name: metrics + selector: + app: locust-master +--- +kind: Service +apiVersion: v1 +metadata: + name: locust-master-web + namespace: locust + annotations: + cloud.google.com/neg: '{"ingress": true}' + cloud.google.com/app-protocols: '{"loc-master-web":"HTTP"}' + cloud.google.com/backend-config: '{"default": "backendconfig"}' + labels: + app: locust-master +spec: + ports: + - port: 8089 + targetPort: loc-master-web + protocol: TCP + name: loc-master-web + selector: + app: locust-master +--- +apiVersion: monitoring.googleapis.com/v1 +kind: ClusterPodMonitoring +metadata: + name: locust-master + namespace: locust +spec: + selector: + matchLabels: + app: locust-master + endpoints: + - port: metrics + interval: 30s \ No newline at end of file diff --git a/blueprints/gke/autopilot/bundle/locust/namespace.yaml b/blueprints/gke/autopilot/bundle/locust/namespace.yaml new file mode 100644 index 00000000..16aa6e18 --- /dev/null +++ b/blueprints/gke/autopilot/bundle/locust/namespace.yaml @@ -0,0 +1,18 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: v1 +kind: Namespace +metadata: + name: locust \ No newline at end of file diff --git a/blueprints/gke/autopilot/bundle/locust/workers.yaml b/blueprints/gke/autopilot/bundle/locust/workers.yaml new file mode 100644 index 00000000..7d6b41fe --- /dev/null +++ b/blueprints/gke/autopilot/bundle/locust/workers.yaml @@ -0,0 +1,51 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: "apps/v1" +kind: "Deployment" +metadata: + name: locust-worker + namespace: locust + labels: + name: locust-worker +spec: + replicas: 5 + selector: + matchLabels: + app: locust-worker + template: + metadata: + labels: + app: locust-worker + spec: + tolerations: + - key: group + operator: Equal + value: "locust" + effect: NoSchedule + nodeSelector: + group: "locust" + containers: + - name: locust-worker + image: load-test-image + env: + - name: LOCUST_MODE + value: worker + - name: LOCUST_MASTER + value: locust-master + requests: + cpu: 20m + memory: 50Mi + limits: + memory: 50Mi \ No newline at end of file diff --git a/blueprints/gke/autopilot/bundle/monitoring/custom-stackdriver-metrics-adapter.yaml b/blueprints/gke/autopilot/bundle/monitoring/custom-stackdriver-metrics-adapter.yaml new file mode 100644 index 00000000..8e159213 --- /dev/null +++ b/blueprints/gke/autopilot/bundle/monitoring/custom-stackdriver-metrics-adapter.yaml @@ -0,0 +1,184 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https:#www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: v1 +kind: ServiceAccount +metadata: + name: custom-metrics-stackdriver-adapter + namespace: monitoring +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: custom-metrics:system:auth-delegator +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: system:auth-delegator +subjects: +- kind: ServiceAccount + name: custom-metrics-stackdriver-adapter + namespace: monitoring +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: custom-metrics-auth-reader + namespace: kube-system +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: extension-apiserver-authentication-reader +subjects: +- kind: ServiceAccount + name: custom-metrics-stackdriver-adapter + namespace: monitoring +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: custom-metrics-resource-reader +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: view +subjects: +- kind: ServiceAccount + name: custom-metrics-stackdriver-adapter + namespace: monitoring +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: custom-metrics-stackdriver-adapter + namespace: monitoring + labels: + run: custom-metrics-stackdriver-adapter + k8s-app: custom-metrics-stackdriver-adapter +spec: + replicas: 1 + selector: + matchLabels: + run: custom-metrics-stackdriver-adapter + k8s-app: custom-metrics-stackdriver-adapter + template: + metadata: + labels: + run: custom-metrics-stackdriver-adapter + k8s-app: custom-metrics-stackdriver-adapter + kubernetes.io/cluster-service: "true" + spec: + serviceAccountName: custom-metrics-stackdriver-adapter + containers: + - image: gcr.io/gke-release/custom-metrics-stackdriver-adapter:v0.13.1-gke.0 + imagePullPolicy: Always + name: pod-custom-metrics-stackdriver-adapter + command: + - /adapter + - --use-new-resource-model=false + resources: + limits: + cpu: 100m + memory: 150Mi + requests: + memory: 150Mi +--- +apiVersion: v1 +kind: Service +metadata: + labels: + run: custom-metrics-stackdriver-adapter + k8s-app: custom-metrics-stackdriver-adapter + kubernetes.io/cluster-service: 'true' + kubernetes.io/name: Adapter + name: custom-metrics-stackdriver-adapter + namespace: monitoring +spec: + ports: + - port: 443 + protocol: TCP + targetPort: 443 + selector: + run: custom-metrics-stackdriver-adapter + k8s-app: custom-metrics-stackdriver-adapter + type: ClusterIP +--- +apiVersion: apiregistration.k8s.io/v1 +kind: APIService +metadata: + name: v1beta1.custom.metrics.k8s.io +spec: + insecureSkipTLSVerify: true + group: custom.metrics.k8s.io + groupPriorityMinimum: 100 + versionPriority: 100 + service: + name: custom-metrics-stackdriver-adapter + namespace: monitoring + version: v1beta1 +--- +apiVersion: apiregistration.k8s.io/v1 +kind: APIService +metadata: + name: v1beta2.custom.metrics.k8s.io +spec: + insecureSkipTLSVerify: true + group: custom.metrics.k8s.io + groupPriorityMinimum: 100 + versionPriority: 200 + service: + name: custom-metrics-stackdriver-adapter + namespace: monitoring + version: v1beta2 +--- +apiVersion: apiregistration.k8s.io/v1 +kind: APIService +metadata: + name: v1beta1.external.metrics.k8s.io +spec: + insecureSkipTLSVerify: true + group: external.metrics.k8s.io + groupPriorityMinimum: 100 + versionPriority: 100 + service: + name: custom-metrics-stackdriver-adapter + namespace: monitoring + version: v1beta1 +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: external-metrics-reader +rules: +- apiGroups: + - "external.metrics.k8s.io" + resources: + - "*" + verbs: + - list + - get + - watch© +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: external-metrics-reader +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: external-metrics-reader +subjects: +- kind: ServiceAccount + name: horizontal-pod-autoscaler + namespace: kube-system \ No newline at end of file diff --git a/blueprints/gke/autopilot/bundle/monitoring/dashboards/k8s-global.json b/blueprints/gke/autopilot/bundle/monitoring/dashboards/k8s-global.json new file mode 100644 index 00000000..13d3eb4c --- /dev/null +++ b/blueprints/gke/autopilot/bundle/monitoring/dashboards/k8s-global.json @@ -0,0 +1 @@ +{"annotations":{"list":[{"builtIn":1,"datasource":{"type":"datasource","uid":"grafana"},"enable":true,"hide":true,"iconColor":"rgba(0, 211, 255, 1)","name":"Annotations & Alerts","target":{"limit":100,"matchAny":false,"tags":[],"type":"dashboard"},"type":"dashboard"}]},"description":"This is a modern 'Global View' dashboard for your Kubernetes cluster(s). Made for kube-prometheus-stack and take advantage of the latest Grafana features. GitHub repository: https://github.com/dotdc/grafana-dashboards-kubernetes","editable":true,"fiscalYearStartMonth":0,"gnetId":15757,"graphTooltip":1,"id":11,"iteration":1677857459220,"links":[],"liveNow":false,"panels":[{"collapsed":false,"datasource":{"type":"prometheus","uid":"prometheus"},"gridPos":{"h":1,"w":24,"x":0,"y":0},"id":67,"panels":[],"title":"Overview","type":"row"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"continuous-GrYlRd"},"mappings":[],"max":1,"min":0,"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"percentunit"},"overrides":[]},"gridPos":{"h":8,"w":6,"x":0,"y":1},"id":77,"options":{"displayMode":"lcd","minVizHeight":10,"minVizWidth":0,"orientation":"horizontal","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"showUnfilled":true},"pluginVersion":"8.3.4","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"editorMode":"code","exemplar":false,"expr":"sum(kube_pod_container_resource_requests{unit=\"core\"}) / sum(machine_cpu_cores)","hide":false,"interval":"","legendFormat":"Requests","range":true,"refId":"B"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"editorMode":"code","exemplar":false,"expr":"sum(kube_pod_container_resource_limits{unit=\"core\"}) / sum(machine_cpu_cores)","hide":false,"interval":"","legendFormat":"Limits","range":true,"refId":"C"}],"title":"Global CPU Usage","type":"bargauge"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"continuous-GrYlRd"},"decimals":2,"mappings":[],"max":1,"min":0,"thresholds":{"mode":"percentage","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"percentunit"},"overrides":[]},"gridPos":{"h":8,"w":6,"x":6,"y":1},"id":78,"options":{"displayMode":"lcd","minVizHeight":10,"minVizWidth":0,"orientation":"horizontal","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"showUnfilled":true,"text":{}},"pluginVersion":"8.3.4","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"editorMode":"code","exemplar":false,"expr":"sum(kube_pod_container_resource_requests{unit=\"byte\"}) / sum(machine_memory_bytes)","hide":false,"interval":"","legendFormat":"Requests","range":true,"refId":"B"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"editorMode":"code","exemplar":false,"expr":"sum(kube_pod_container_resource_limits{unit=\"byte\"}) / sum(machine_memory_bytes)","hide":false,"interval":"","legendFormat":"Limits","range":true,"refId":"C"}],"title":"Global RAM Usage","type":"bargauge"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"mappings":[],"noValue":"0","thresholds":{"mode":"absolute","steps":[{"color":"blue","value":null}]}},"overrides":[]},"gridPos":{"h":4,"w":2,"x":12,"y":1},"id":59,"options":{"colorMode":"value","graphMode":"none","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["last"],"fields":"","values":false},"text":{},"textMode":"value"},"pluginVersion":"8.3.4","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"count(kube_namespace_created)","interval":"","legendFormat":"","refId":"A"}],"title":"Namespaces","type":"stat"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"NB","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":25,"gradientMode":"opacity","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"smooth","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":true,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"short"},"overrides":[]},"gridPos":{"h":12,"w":10,"x":14,"y":1},"id":52,"options":{"legend":{"calcs":["min","max","mean"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"multi","sort":"none"}},"pluginVersion":"8.3.3","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"exemplar":true,"expr":"sum(kube_namespace_labels)","interval":"","legendFormat":"Namespaces","refId":"A"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_container_status_running)","interval":"","legendFormat":"Running Containers","refId":"B"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_status_phase{phase='Running'})","interval":"","legendFormat":"Running Pods","refId":"O"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_service_info)","interval":"","legendFormat":"Services","refId":"C"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_endpoint_info)","interval":"","legendFormat":"Endpoints","refId":"D"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_ingress_info)","interval":"","legendFormat":"Ingresses","refId":"E"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_deployment_labels)","interval":"","legendFormat":"Deployments","refId":"F"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_statefulset_labels)","interval":"","legendFormat":"Statefulsets","refId":"G"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_daemonset_labels)","interval":"","legendFormat":"Daemonsets","refId":"H"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_persistentvolumeclaim_info)","interval":"","legendFormat":"Persistent Volume Claims","refId":"I"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_hpa_labels)","interval":"","legendFormat":"Horizontal Pod Autoscalers","refId":"J"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_configmap_info)","interval":"","legendFormat":"Configmaps","refId":"K"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_secret_info)","interval":"","legendFormat":"Secrets","refId":"L"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_networkpolicy_labels)","interval":"","legendFormat":"Network Policies","refId":"M"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"exemplar":true,"expr":"count(up{job=\"node-exporter\"})","hide":false,"interval":"","legendFormat":"Nodes","refId":"N"}],"title":"Kubernetes Resource Count","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"mappings":[],"noValue":"0","thresholds":{"mode":"absolute","steps":[{"color":"blue","value":null}]}},"overrides":[]},"gridPos":{"h":4,"w":2,"x":12,"y":5},"id":62,"options":{"colorMode":"value","graphMode":"none","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["last"],"fields":"","values":false},"text":{},"textMode":"value"},"pluginVersion":"8.3.4","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_status_phase{phase='Running'})","interval":"","legendFormat":"","refId":"A"}],"title":"Running Pods","type":"stat"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"mappings":[],"noValue":"0","thresholds":{"mode":"absolute","steps":[{"color":"rgb(255, 255, 255)","value":null}]},"unit":"none"},"overrides":[]},"gridPos":{"h":4,"w":6,"x":0,"y":9},"id":37,"options":{"colorMode":"none","graphMode":"none","justifyMode":"center","orientation":"auto","reduceOptions":{"calcs":["mean"],"fields":"","values":false},"text":{},"textMode":"auto"},"pluginVersion":"8.3.4","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"editorMode":"code","exemplar":false,"expr":"sum(kube_pod_container_resource_requests{unit=\"core\"})","hide":false,"interval":"","legendFormat":"Requests","range":true,"refId":"B"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"editorMode":"code","exemplar":false,"expr":"sum(kube_pod_container_resource_limits{unit=\"core\"})","hide":false,"interval":"","legendFormat":"Limits","range":true,"refId":"C"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"editorMode":"code","exemplar":false,"expr":"sum(machine_cpu_cores)","hide":false,"interval":"","legendFormat":"Total","range":true,"refId":"D"}],"title":"CPU Usage","type":"stat"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"mappings":[],"noValue":"0","thresholds":{"mode":"absolute","steps":[{"color":"rgb(255, 255, 255)","value":null}]},"unit":"bytes"},"overrides":[]},"gridPos":{"h":4,"w":8,"x":6,"y":9},"id":39,"options":{"colorMode":"none","graphMode":"none","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["mean"],"fields":"","values":false},"text":{},"textMode":"auto"},"pluginVersion":"8.3.4","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"editorMode":"code","exemplar":false,"expr":"sum(kube_pod_container_resource_requests{unit=\"byte\"})","hide":false,"interval":"","legendFormat":"Requests","range":true,"refId":"B"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"editorMode":"code","exemplar":false,"expr":"sum(kube_pod_container_resource_limits{unit=\"byte\"})","hide":false,"interval":"","legendFormat":"Limits","range":true,"refId":"C"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"editorMode":"code","exemplar":false,"expr":"sum(machine_memory_bytes)","hide":false,"interval":"","legendFormat":"Total","range":true,"refId":"D"}],"title":"RAM Usage","type":"stat"},{"collapsed":false,"datasource":{"type":"prometheus","uid":"prometheus"},"gridPos":{"h":1,"w":24,"x":0,"y":13},"id":71,"panels":[],"title":"Resources","type":"row"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"CPU Cores","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":25,"gradientMode":"opacity","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"smooth","lineStyle":{"fill":"solid"},"lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":true,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"decimals":2,"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"none"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":14},"id":46,"options":{"legend":{"calcs":["min","max","mean"],"displayMode":"table","placement":"right","showLegend":true,"sortBy":"Max","sortDesc":true},"tooltip":{"mode":"single","sort":"none"}},"pluginVersion":"8.3.3","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"exemplar":false,"expr":"sum(rate(container_cpu_usage_seconds_total{image!=\"\"}[$__rate_interval])) by (namespace)","interval":"$resolution","legendFormat":"{{ namespace }}","refId":"A"}],"title":"CPU Utilization by namespace","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"MEMORY","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":25,"gradientMode":"opacity","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"smooth","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":true,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"bytes"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":14},"id":50,"options":{"legend":{"calcs":["min","max","mean"],"displayMode":"table","placement":"right","showLegend":true,"sortBy":"Max","sortDesc":true},"tooltip":{"mode":"single","sort":"none"}},"pluginVersion":"8.3.3","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"exemplar":true,"expr":"sum(container_memory_working_set_bytes{image!=\"\"}) by (namespace)","interval":"$resolution","legendFormat":"{{ namespace }}","refId":"A"}],"title":"Memory Utilization by namespace","type":"timeseries"},{"collapsed":false,"datasource":{"type":"prometheus","uid":"prometheus"},"gridPos":{"h":1,"w":24,"x":0,"y":22},"id":69,"panels":[],"title":"Network","type":"row"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"BANDWIDTH","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":25,"gradientMode":"opacity","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"smooth","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":true,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"bytes"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":23},"id":79,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":false},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"8.3.3","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"editorMode":"code","exemplar":true,"expr":"sum(rate(container_network_receive_bytes_total[$__rate_interval])) by (namespace)","interval":"$resolution","legendFormat":"Received bytes in {{ namespace }}","range":true,"refId":"A"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"editorMode":"code","expr":"- sum(rate(container_network_transmit_bytes_total[$__rate_interval])) by (namespace)","hide":false,"interval":"$resolution","legendFormat":"Transmitted bytes in {{ namespace }}","range":true,"refId":"B"}],"title":"Network Received by namespace","type":"timeseries"}],"refresh":"30s","schemaVersion":34,"style":"dark","tags":["Kubernetes","Prometheus"],"templating":{"list":[{"current":{"selected":false,"text":"Prometheus","value":"Prometheus"},"hide":0,"includeAll":false,"multi":false,"name":"datasource","options":[],"query":"prometheus","queryValue":"","refresh":1,"regex":"","skipUrlSync":false,"type":"datasource"},{"current":{"selected":true,"text":"30s","value":"30s"},"hide":0,"includeAll":false,"multi":false,"name":"resolution","options":[{"selected":false,"text":"1s","value":"1s"},{"selected":false,"text":"15s","value":"15s"},{"selected":true,"text":"30s","value":"30s"},{"selected":false,"text":"1m","value":"1m"},{"selected":false,"text":"3m","value":"3m"},{"selected":false,"text":"5m","value":"5m"}],"query":"1s, 15s, 30s, 1m, 3m, 5m","queryValue":"","skipUrlSync":false,"type":"custom"}]},"time":{"from":"now-1h","to":"now"},"timepicker":{},"timezone":"","title":"k8s / view / global","uid":"k8s_view_global","version":3,"weekStart":""} \ No newline at end of file diff --git a/blueprints/gke/autopilot/bundle/monitoring/dashboards/k8s-namespaces.json b/blueprints/gke/autopilot/bundle/monitoring/dashboards/k8s-namespaces.json new file mode 100644 index 00000000..815e9e1a --- /dev/null +++ b/blueprints/gke/autopilot/bundle/monitoring/dashboards/k8s-namespaces.json @@ -0,0 +1 @@ +{"annotations":{"list":[{"builtIn":1,"datasource":{"type":"datasource","uid":"grafana"},"enable":true,"hide":true,"iconColor":"rgba(0, 211, 255, 1)","name":"Annotations & Alerts","target":{"limit":100,"matchAny":false,"tags":[],"type":"dashboard"},"type":"dashboard"}]},"description":"This is a modern 'Namespaces View' dashboard for your Kubernetes cluster(s). Made for kube-prometheus-stack and take advantage of the latest Grafana features. GitHub repository: https://github.com/dotdc/grafana-dashboards-kubernetes","editable":true,"fiscalYearStartMonth":0,"gnetId":15758,"graphTooltip":1,"id":10,"iteration":1677857471900,"links":[],"liveNow":false,"panels":[{"collapsed":false,"datasource":{"type":"datasource","uid":"grafana"},"gridPos":{"h":1,"w":24,"x":0,"y":0},"id":38,"panels":[],"title":"Overview","type":"row"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"decimals":2,"mappings":[],"max":1,"min":0,"thresholds":{"mode":"percentage","steps":[{"color":"green","value":null},{"color":"orange","value":50},{"color":"red","value":70}]},"unit":"percentunit"},"overrides":[]},"gridPos":{"h":7,"w":6,"x":0,"y":1},"id":46,"options":{"orientation":"auto","reduceOptions":{"calcs":["mean"],"fields":"","values":false},"showThresholdLabels":false,"showThresholdMarkers":true,"text":{}},"pluginVersion":"8.3.4","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"editorMode":"code","exemplar":false,"expr":"sum(rate(container_cpu_usage_seconds_total{namespace=~\"$namespace\", image!=\"\"}[$__rate_interval])) / sum(machine_cpu_cores)","instant":true,"interval":"","legendFormat":"","range":false,"refId":"A"}],"title":"Namespace(s) usage on total cluster CPU in %","type":"gauge"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"decimals":2,"mappings":[],"max":1,"min":0,"thresholds":{"mode":"percentage","steps":[{"color":"green","value":null},{"color":"orange","value":50},{"color":"red","value":70}]},"unit":"percentunit"},"overrides":[]},"gridPos":{"h":7,"w":6,"x":6,"y":1},"id":48,"options":{"orientation":"auto","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"showThresholdLabels":false,"showThresholdMarkers":true,"text":{}},"pluginVersion":"8.3.4","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"exemplar":true,"expr":"sum(container_memory_working_set_bytes{namespace=~\"$namespace\"}) / sum(machine_memory_bytes)","interval":"","legendFormat":"","refId":"A"}],"title":"Namespace(s) usage on total cluster RAM in %","type":"gauge"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":25,"gradientMode":"opacity","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"smooth","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":true,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"short"},"overrides":[]},"gridPos":{"h":11,"w":12,"x":12,"y":1},"id":32,"options":{"legend":{"calcs":["min","max","mean"],"displayMode":"table","placement":"right","showLegend":true,"sortBy":"Max","sortDesc":true},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"8.3.3","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_container_status_running{namespace=~\"$namespace\"})","interval":"","legendFormat":"Running Pods","refId":"A"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_service_info{namespace=~\"$namespace\"})","interval":"","legendFormat":"Services","refId":"B"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_ingress_info{namespace=~\"$namespace\"})","interval":"","legendFormat":"Ingresses","refId":"C"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_deployment_labels{namespace=~\"$namespace\"})","interval":"","legendFormat":"Deployments","refId":"D"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_statefulset_labels{namespace=~\"$namespace\"})","interval":"","legendFormat":"Statefulsets","refId":"E"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_daemonset_labels{namespace=~\"$namespace\"})","interval":"","legendFormat":"Daemonsets","refId":"F"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_persistentvolumeclaim_info{namespace=~\"$namespace\"})","interval":"","legendFormat":"Persistent Volume Claims","refId":"G"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_hpa_labels{namespace=~\"$namespace\"})","interval":"","legendFormat":"Horizontal Pod Autoscalers","refId":"H"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_configmap_info{namespace=~\"$namespace\"})","interval":"","legendFormat":"Configmaps","refId":"I"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_secret_info{namespace=~\"$namespace\"})","interval":"","legendFormat":"Secrets","refId":"J"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_networkpolicy_labels{namespace=~\"$namespace\"})","interval":"","legendFormat":"Network Policies","refId":"K"}],"title":"Kubernetes Resource Count","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"mappings":[],"noValue":"0","thresholds":{"mode":"absolute","steps":[{"color":"rgb(255, 255, 255)","value":null}]},"unit":"none"},"overrides":[]},"gridPos":{"h":4,"w":6,"x":0,"y":8},"id":62,"options":{"colorMode":"none","graphMode":"none","justifyMode":"center","orientation":"auto","reduceOptions":{"calcs":["mean"],"fields":"","values":false},"text":{},"textMode":"auto"},"pluginVersion":"8.3.4","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"editorMode":"code","exemplar":true,"expr":"sum(rate(container_cpu_usage_seconds_total{namespace=~\"$namespace\"}[$__rate_interval]))","interval":"","legendFormat":"Real","range":true,"refId":"A"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"editorMode":"code","expr":"sum(kube_pod_container_resource_requests{namespace=~\"$namespace\", unit=\"core\"})","hide":false,"legendFormat":"Requests","range":true,"refId":"B"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"editorMode":"code","expr":"sum(kube_pod_container_resource_limits{namespace=~\"$namespace\", unit=\"core\"})","hide":false,"legendFormat":"Limits","range":true,"refId":"C"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"editorMode":"code","expr":"sum(machine_cpu_cores)","hide":false,"legendFormat":"Cluster Total","range":true,"refId":"D"}],"title":"Namespace(s) CPU Usage in cores","type":"stat"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"mappings":[],"noValue":"0","thresholds":{"mode":"absolute","steps":[{"color":"rgb(255, 255, 255)","value":null}]},"unit":"bytes"},"overrides":[]},"gridPos":{"h":4,"w":6,"x":6,"y":8},"id":64,"options":{"colorMode":"none","graphMode":"none","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["mean"],"fields":"","values":false},"text":{},"textMode":"auto"},"pluginVersion":"8.3.4","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"editorMode":"code","exemplar":true,"expr":"sum(container_memory_working_set_bytes{namespace=~\"$namespace\"})","interval":"","legendFormat":"Real","range":true,"refId":"A"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"editorMode":"code","expr":"sum(kube_pod_container_resource_requests{namespace=~\"$namespace\", unit=\"byte\"})","hide":false,"legendFormat":"Requests","range":true,"refId":"B"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"editorMode":"code","expr":"sum(kube_pod_container_resource_limits{namespace=~\"$namespace\", unit=\"byte\"})","hide":false,"legendFormat":"Limits","range":true,"refId":"C"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"editorMode":"code","expr":"sum(machine_memory_bytes)","hide":false,"legendFormat":"Cluster Total","range":true,"refId":"D"}],"title":"Namespace(s) RAM Usage in bytes","type":"stat"},{"collapsed":false,"datasource":{"type":"datasource","uid":"grafana"},"gridPos":{"h":1,"w":24,"x":0,"y":12},"id":40,"panels":[],"title":"Resources","type":"row"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"CPU Cores","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":25,"gradientMode":"opacity","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"smooth","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":true,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"none"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":13},"id":29,"options":{"legend":{"calcs":[],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single","sort":"none"}},"pluginVersion":"8.3.3","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"exemplar":true,"expr":"sum(rate(container_cpu_usage_seconds_total{namespace=~\"$namespace\", image!=\"\"}[$__rate_interval])) by (pod)","interval":"$resolution","legendFormat":"{{ pod }}","refId":"A"}],"title":"CPU usage by Pod","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":25,"gradientMode":"opacity","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"smooth","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":true,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"bytes"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":13},"id":30,"options":{"legend":{"calcs":[],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single","sort":"none"}},"pluginVersion":"8.3.3","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"exemplar":true,"expr":"sum(container_memory_working_set_bytes{namespace=~\"$namespace\", image!=\"\"}) by (pod)","interval":"$resolution","legendFormat":"{{ pod }}","refId":"A"}],"title":"Memory usage by Pod","type":"timeseries"},{"collapsed":false,"datasource":{"type":"datasource","uid":"grafana"},"gridPos":{"h":1,"w":24,"x":0,"y":21},"id":44,"panels":[],"title":"Kubernetes Resources","type":"row"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":25,"gradientMode":"opacity","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"smooth","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":true,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"decimals":0,"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"none"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":22},"id":5,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":false},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"8.3.3","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_container_status_ready{namespace=~\"$namespace\"})","interval":"","legendFormat":"Ready","refId":"A"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_container_status_running{namespace=~\"$namespace\"})","interval":"","legendFormat":"Running","refId":"B"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_container_status_waiting{namespace=~\"$namespace\"})","interval":"","legendFormat":"Waiting","refId":"C"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_container_status_restarts_total{namespace=~\"$namespace\"})","interval":"","legendFormat":"Restarts Total","refId":"D"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_container_status_terminated{namespace=~\"$namespace\"})","interval":"","legendFormat":"Terminated","refId":"E"}],"title":"Nb of pods by state","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":25,"gradientMode":"opacity","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"smooth","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":true,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"decimals":0,"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"none"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":22},"id":2,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":false},"tooltip":{"mode":"multi","sort":"none"}},"pluginVersion":"8.3.3","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_container_info{namespace=~\"$namespace\"}) by (pod)","interval":"","legendFormat":"{{ pod }}","refId":"A"}],"title":"Nb of containers by pod","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":25,"gradientMode":"opacity","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"smooth","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"short"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":30},"id":7,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":false},"tooltip":{"mode":"multi","sort":"none"}},"pluginVersion":"8.3.3","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"exemplar":true,"expr":"sum(kube_deployment_status_replicas_available{namespace=~\"$namespace\"}) by (deployment)","interval":"","legendFormat":"{{ deployment }}","refId":"A"}],"title":"Replicas available by deployment","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":25,"gradientMode":"opacity","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"smooth","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"short"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":30},"id":8,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":false},"tooltip":{"mode":"multi","sort":"none"}},"pluginVersion":"8.3.3","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_deployment_status_replicas_unavailable{namespace=~\"$namespace\"}) by (deployment)","interval":"","legendFormat":"{{ deployment }}","refId":"A"}],"title":"Replicas unavailable by deployment","type":"timeseries"},{"collapsed":false,"datasource":{"type":"datasource","uid":"grafana"},"gridPos":{"h":1,"w":24,"x":0,"y":38},"id":42,"panels":[],"title":"Kubernetes Storage","type":"row"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":25,"gradientMode":"opacity","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"smooth","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":true,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"decimals":2,"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"percentunit"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":39},"id":12,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":false},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"8.3.3","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"exemplar":true,"expr":"sum(kubelet_volume_stats_used_bytes{namespace=~\"$namespace\"}) by (persistentvolumeclaim) / sum(kubelet_volume_stats_capacity_bytes{namespace=~\"$namespace\"}) by (persistentvolumeclaim)","interval":"","legendFormat":"{{ persistentvolumeclaim }}","refId":"A"}],"title":"Persistent Volumes - Capacity","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":25,"gradientMode":"opacity","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"smooth","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":true,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"decimals":2,"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"percent"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":39},"id":27,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":false},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"8.3.3","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"exemplar":true,"expr":"sum(kubelet_volume_stats_inodes_used{namespace=~\"$namespace\"}) by (persistentvolumeclaim) / sum(kubelet_volume_stats_inodes{namespace=~\"$namespace\"}) by (persistentvolumeclaim) * 100","interval":"","legendFormat":"{{ persistentvolumeclaim }}","refId":"A"}],"title":"Persistent Volumes - Inodes","type":"timeseries"}],"refresh":"30s","schemaVersion":34,"style":"dark","tags":["Kubernetes","Prometheus"],"templating":{"list":[{"current":{"selected":false,"text":"Prometheus","value":"Prometheus"},"hide":0,"includeAll":false,"multi":false,"name":"datasource","options":[],"query":"prometheus","queryValue":"","refresh":1,"regex":"","skipUrlSync":false,"type":"datasource"},{"current":{"selected":true,"text":["kube-system"],"value":["kube-system"]},"datasource":{"type":"prometheus","uid":"${datasource}"},"definition":"label_values(kube_pod_info, namespace)","hide":0,"includeAll":true,"multi":true,"name":"namespace","options":[],"query":{"query":"label_values(kube_pod_info, namespace)","refId":"StandardVariableQuery"},"refresh":1,"regex":"","skipUrlSync":false,"sort":1,"tagValuesQuery":"","tagsQuery":"","type":"query","useTags":false},{"current":{"selected":true,"text":"30s","value":"30s"},"hide":0,"includeAll":false,"multi":false,"name":"resolution","options":[{"selected":false,"text":"1s","value":"1s"},{"selected":false,"text":"15s","value":"15s"},{"selected":true,"text":"30s","value":"30s"},{"selected":false,"text":"1m","value":"1m"},{"selected":false,"text":"3m","value":"3m"},{"selected":false,"text":"5m","value":"5m"}],"query":"1s, 15s, 30s, 1m, 3m, 5m","queryValue":"","skipUrlSync":false,"type":"custom"}]},"time":{"from":"now-1h","to":"now"},"timepicker":{},"timezone":"","title":"k8s / view / namespaces","uid":"k8s_view_ns","version":1,"weekStart":""} \ No newline at end of file diff --git a/blueprints/gke/autopilot/bundle/monitoring/dashboards/k8s-nodes.json b/blueprints/gke/autopilot/bundle/monitoring/dashboards/k8s-nodes.json new file mode 100644 index 00000000..9c8fee93 --- /dev/null +++ b/blueprints/gke/autopilot/bundle/monitoring/dashboards/k8s-nodes.json @@ -0,0 +1 @@ +{"annotations":{"list":[{"builtIn":1,"datasource":{"type":"datasource","uid":"grafana"},"enable":true,"hide":true,"iconColor":"rgba(0, 211, 255, 1)","name":"Annotations & Alerts","target":{"limit":100,"matchAny":false,"tags":[],"type":"dashboard"},"type":"dashboard"}]},"description":"This is a modern 'Nodes View' dashboard for your Kubernetes cluster(s). Made for kube-prometheus-stack and take advantage of the latest Grafana features. GitHub repository: https://github.com/dotdc/grafana-dashboards-kubernetes","editable":true,"fiscalYearStartMonth":0,"gnetId":15759,"graphTooltip":1,"id":9,"iteration":1677857491220,"links":[],"liveNow":false,"panels":[{"collapsed":false,"datasource":{"type":"prometheus","uid":"prometheus"},"gridPos":{"h":1,"w":24,"x":0,"y":0},"id":40,"panels":[],"title":"Overview","type":"row"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"mappings":[],"noValue":"0","thresholds":{"mode":"absolute","steps":[{"color":"rgb(255, 255, 255)","value":null}]},"unit":"none"},"overrides":[]},"gridPos":{"h":8,"w":3,"x":0,"y":1},"id":11,"options":{"colorMode":"none","graphMode":"none","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["mean"],"fields":"","values":false},"text":{},"textMode":"auto"},"pluginVersion":"8.3.4","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"exemplar":true,"expr":"sum(machine_cpu_cores{node=\"$node\"})","interval":"$resolution","legendFormat":"","refId":"A"}],"title":"CPU Total","type":"stat"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"mappings":[],"noValue":"0","thresholds":{"mode":"absolute","steps":[{"color":"rgb(255, 255, 255)","value":null}]},"unit":"bytes"},"overrides":[]},"gridPos":{"h":8,"w":5,"x":3,"y":1},"id":17,"options":{"colorMode":"none","graphMode":"none","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["mean"],"fields":"","values":false},"text":{},"textMode":"auto"},"pluginVersion":"8.3.4","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"exemplar":true,"expr":"machine_memory_bytes{node=\"$node\"}","instant":false,"interval":"","legendFormat":"","refId":"A"}],"title":"RAM Total","type":"stat"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"mappings":[],"noValue":"0","thresholds":{"mode":"absolute","steps":[{"color":"blue","value":null}]}},"overrides":[]},"gridPos":{"h":8,"w":4,"x":8,"y":1},"id":24,"options":{"colorMode":"value","graphMode":"none","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["last"],"fields":"","values":false},"text":{},"textMode":"value"},"pluginVersion":"8.3.4","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"exemplar":true,"expr":"sum(kube_pod_info{node=\"$node\"})","interval":"","legendFormat":"","refId":"A"}],"title":"Pods on node","type":"stat"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"thresholds"},"custom":{"align":"auto","displayMode":"auto"},"links":[{"targetBlank":true,"title":"Pod details","url":"/d/k8s_views_pods/kubernetes-views-pods?${datasource:queryparam}&var-namespace=${__data.fields.namespace}&var-pod=${__data.fields.pod}&${resolution:queryparam}&${__url_time_range}"}],"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[{"matcher":{"id":"byName","options":"pod"},"properties":[{"id":"custom.width","value":416}]},{"matcher":{"id":"byName","options":"priority_class"},"properties":[{"id":"custom.width","value":176}]},{"matcher":{"id":"byName","options":"pod_ip"},"properties":[{"id":"custom.width","value":157}]},{"matcher":{"id":"byName","options":"created_by_kind"},"properties":[{"id":"custom.width","value":205}]},{"matcher":{"id":"byName","options":"namespace"},"properties":[{"id":"custom.width","value":263}]}]},"gridPos":{"h":8,"w":12,"x":12,"y":1},"id":5,"options":{"footer":{"fields":"","reducer":["sum"],"show":false},"showHeader":true,"sortBy":[]},"pluginVersion":"8.3.4","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"exemplar":true,"expr":"kube_pod_info{node=\"$node\"}","format":"table","interval":"","legendFormat":"","refId":"A"}],"title":"List of pods on node ($node)","transformations":[{"id":"organize","options":{"excludeByName":{"Time":true,"Value":true,"__name__":true,"container":true,"created_by_kind":false,"created_by_name":true,"endpoint":true,"env":true,"host_ip":true,"host_network":true,"instance":true,"job":true,"node":true,"project":true,"prometheus_replica":true,"service":true,"uid":true},"indexByName":{"Time":6,"Value":20,"__name__":7,"container":8,"created_by_kind":2,"created_by_name":9,"endpoint":10,"env":11,"host_ip":5,"host_network":12,"instance":13,"job":14,"namespace":1,"node":15,"pod":0,"pod_ip":3,"priority_class":4,"project":16,"prometheus_replica":17,"service":18,"uid":19},"renameByName":{}}},{"id":"groupBy","options":{"fields":{"created_by_kind":{"aggregations":[],"operation":"groupby"},"host_ip":{"aggregations":[],"operation":"groupby"},"namespace":{"aggregations":["last"],"operation":"groupby"},"pod":{"aggregations":[],"operation":"groupby"},"pod_ip":{"aggregations":[],"operation":"groupby"},"priority_class":{"aggregations":[],"operation":"groupby"}}}}],"type":"table"},{"collapsed":false,"datasource":{"type":"prometheus","uid":"prometheus"},"gridPos":{"h":1,"w":24,"x":0,"y":9},"id":38,"panels":[],"title":"Resources","type":"row"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"CPU Cores","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":25,"gradientMode":"opacity","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"smooth","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":true,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"none"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":10},"id":26,"options":{"legend":{"calcs":[],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single","sort":"none"}},"pluginVersion":"8.3.3","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"exemplar":true,"expr":"sum(rate(container_cpu_usage_seconds_total{node=\"$node\", image!=\"\"}[$__rate_interval])) by (pod)","interval":"$resolution","legendFormat":"{{ pod }}","refId":"A"}],"title":"CPU usage by Pod","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":25,"gradientMode":"opacity","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"smooth","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":true,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"bytes"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":10},"id":28,"options":{"legend":{"calcs":[],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single","sort":"none"}},"pluginVersion":"8.3.3","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"exemplar":true,"expr":"sum(container_memory_working_set_bytes{node=\"$node\", image!=\"\"}) by (pod)","interval":"$resolution","legendFormat":"{{ pod }}","refId":"A"}],"title":"Memory usage by Pod","type":"timeseries"}],"refresh":"30s","schemaVersion":34,"style":"dark","tags":["Kubernetes","Prometheus"],"templating":{"list":[{"current":{"isNone":true,"selected":false,"text":"None","value":""},"datasource":{"type":"prometheus","uid":"${datasource}"},"definition":"label_values(node_uname_info, job)","hide":2,"includeAll":false,"multi":false,"name":"job","options":[],"query":{"query":"label_values(node_uname_info, job)","refId":"StandardVariableQuery"},"refresh":1,"regex":"","skipUrlSync":false,"sort":1,"type":"query"},{"current":{"selected":false,"text":"Prometheus","value":"Prometheus"},"hide":0,"includeAll":false,"multi":false,"name":"datasource","options":[],"query":"prometheus","queryValue":"","refresh":1,"regex":"","skipUrlSync":false,"type":"datasource"},{"current":{"selected":true,"text":"30s","value":"30s"},"hide":0,"includeAll":false,"multi":false,"name":"resolution","options":[{"selected":false,"text":"1s","value":"1s"},{"selected":false,"text":"15s","value":"15s"},{"selected":true,"text":"30s","value":"30s"},{"selected":false,"text":"1m","value":"1m"},{"selected":false,"text":"3m","value":"3m"},{"selected":false,"text":"5m","value":"5m"}],"query":"1s, 15s, 30s, 1m, 3m, 5m","queryValue":"","skipUrlSync":false,"type":"custom"},{"current":{"selected":false,"text":"gk3-cluster-default-pool-30d45773-8vk4","value":"gk3-cluster-default-pool-30d45773-8vk4"},"datasource":{"type":"prometheus","uid":"${datasource}"},"definition":"label_values(kube_node_info, node)","hide":0,"includeAll":false,"multi":false,"name":"node","options":[],"query":{"query":"label_values(kube_node_info, node)","refId":"StandardVariableQuery"},"refresh":2,"regex":"","skipUrlSync":false,"sort":1,"type":"query"},{"current":{"isNone":true,"selected":false,"text":"None","value":""},"datasource":{"type":"prometheus","uid":"${datasource}"},"definition":"label_values(node_uname_info{nodename=~\"(?i:($node))\"}, instance)","hide":2,"includeAll":false,"multi":false,"name":"instance","options":[],"query":{"query":"label_values(node_uname_info{nodename=~\"(?i:($node))\"}, instance)","refId":"StandardVariableQuery"},"refresh":2,"regex":"","skipUrlSync":false,"sort":1,"type":"query"}]},"time":{"from":"now-1h","to":"now"},"timepicker":{},"timezone":"","title":"k8s / view / nodes","uid":"k8s_view_nodes","version":4,"weekStart":""} \ No newline at end of file diff --git a/blueprints/gke/autopilot/bundle/monitoring/dashboards/k8s-pods.json b/blueprints/gke/autopilot/bundle/monitoring/dashboards/k8s-pods.json new file mode 100644 index 00000000..af3f9145 --- /dev/null +++ b/blueprints/gke/autopilot/bundle/monitoring/dashboards/k8s-pods.json @@ -0,0 +1 @@ +{"annotations":{"list":[{"builtIn":1,"datasource":{"type":"datasource","uid":"grafana"},"enable":true,"hide":true,"iconColor":"rgba(0, 211, 255, 1)","name":"Annotations & Alerts","target":{"limit":100,"matchAny":false,"tags":[],"type":"dashboard"},"type":"dashboard"}]},"description":"This is a modern 'Pods View' dashboard for your Kubernetes cluster(s). Made for kube-prometheus-stack and take advantage of the latest Grafana features. GitHub repository: https://github.com/dotdc/grafana-dashboards-kubernetes","editable":true,"fiscalYearStartMonth":0,"gnetId":15760,"graphTooltip":1,"id":8,"iteration":1677857508072,"links":[],"liveNow":false,"panels":[{"collapsed":false,"datasource":{"type":"datasource","uid":"grafana"},"gridPos":{"h":1,"w":24,"x":0,"y":0},"id":43,"panels":[],"targets":[{"datasource":{"type":"datasource","uid":"grafana"},"refId":"A"}],"title":"Information","type":"row"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"rgb(255, 255, 255)","value":null}]},"unit":"none"},"overrides":[]},"gridPos":{"h":2,"w":12,"x":0,"y":1},"id":2,"options":{"colorMode":"none","graphMode":"none","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["mean"],"fields":"","values":false},"textMode":"name"},"pluginVersion":"8.3.4","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"exemplar":true,"expr":"kube_pod_info{namespace=\"$namespace\", pod=\"$pod\"}","interval":"","legendFormat":"{{ created_by_kind }}: {{ created_by_name }}","refId":"A"}],"title":"Created by","type":"stat"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"rgb(255, 255, 255)","value":null}]},"unit":"none"},"overrides":[]},"gridPos":{"h":2,"w":6,"x":12,"y":1},"id":33,"options":{"colorMode":"none","graphMode":"none","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["mean"],"fields":"","values":false},"textMode":"name"},"pluginVersion":"8.3.4","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"kube_pod_info{namespace=\"$namespace\", pod=\"$pod\"}","interval":"","legendFormat":"{{ node }}","refId":"A"}],"title":"Running on","type":"stat"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"rgb(255, 255, 255)","value":null}]},"unit":"none"},"overrides":[]},"gridPos":{"h":2,"w":6,"x":18,"y":1},"id":41,"options":{"colorMode":"none","graphMode":"none","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["mean"],"fields":"","values":false},"textMode":"name"},"pluginVersion":"8.3.4","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"kube_pod_info{namespace=\"$namespace\", pod=\"$pod\"}","interval":"","legendFormat":"{{ pod_ip }}","refId":"A"}],"title":"Pod IP","type":"stat"},{"collapsed":false,"datasource":{"type":"datasource","uid":"grafana"},"gridPos":{"h":1,"w":24,"x":0,"y":3},"id":47,"panels":[],"targets":[{"datasource":{"type":"datasource","uid":"grafana"},"refId":"A"}],"title":"Resources","type":"row"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"color":{"fixedColor":"blue","mode":"fixed"},"decimals":2,"mappings":[],"max":1,"min":0,"thresholds":{"mode":"percentage","steps":[{"color":"green","value":null},{"color":"#EAB839","value":60},{"color":"red","value":75}]},"unit":"percentunit"},"overrides":[]},"gridPos":{"h":8,"w":3,"x":0,"y":4},"id":39,"options":{"orientation":"auto","reduceOptions":{"calcs":["last"],"fields":"","values":false},"showThresholdLabels":false,"showThresholdMarkers":true},"pluginVersion":"8.3.4","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"exemplar":false,"expr":"sum(rate(container_cpu_usage_seconds_total{namespace=\"$namespace\", pod=\"$pod\", image!=\"\"}[$__rate_interval])) / sum(kube_pod_container_resource_requests{namespace=\"$namespace\", pod=\"$pod\", unit=\"core\"})","instant":true,"interval":"$resolution","legendFormat":"Requests","refId":"A"}],"title":"Total pod CPU Requests usage","type":"gauge"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"decimals":2,"mappings":[],"max":1,"min":0,"thresholds":{"mode":"percentage","steps":[{"color":"green","value":null},{"color":"#EAB839","value":60},{"color":"red","value":75}]},"unit":"percentunit"},"overrides":[]},"gridPos":{"h":8,"w":3,"x":3,"y":4},"id":48,"options":{"orientation":"auto","reduceOptions":{"calcs":["last"],"fields":"","values":false},"showThresholdLabels":false,"showThresholdMarkers":true},"pluginVersion":"8.3.4","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"exemplar":false,"expr":"sum(rate(container_cpu_usage_seconds_total{namespace=\"$namespace\", pod=\"$pod\", image!=\"\"}[$__rate_interval])) / sum(kube_pod_container_resource_limits{namespace=\"$namespace\", pod=\"$pod\", unit=\"core\"})","instant":true,"interval":"$resolution","legendFormat":"Limits","refId":"A"}],"title":"Total pod CPU Limits usage","type":"gauge"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"color":{"fixedColor":"blue","mode":"fixed"},"decimals":2,"mappings":[],"max":1,"min":0,"thresholds":{"mode":"percentage","steps":[{"color":"blue","value":null},{"color":"#EAB839","value":80},{"color":"red","value":99}]},"unit":"percentunit"},"overrides":[]},"gridPos":{"h":8,"w":3,"x":6,"y":4},"id":40,"options":{"orientation":"auto","reduceOptions":{"calcs":["last"],"fields":"","values":false},"showThresholdLabels":false,"showThresholdMarkers":true},"pluginVersion":"8.3.4","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"exemplar":false,"expr":"sum(container_memory_working_set_bytes{namespace=\"$namespace\", pod=\"$pod\", image!=\"\"}) / sum(kube_pod_container_resource_requests{namespace=\"$namespace\", pod=\"$pod\", unit=\"byte\"})","instant":true,"interval":"$resolution","legendFormat":"Requests","refId":"A"}],"title":"Total pod RAM Requests usage","type":"gauge"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"decimals":2,"mappings":[],"max":1,"min":0,"thresholds":{"mode":"percentage","steps":[{"color":"green","value":null},{"color":"#EAB839","value":60},{"color":"red","value":75}]},"unit":"percentunit"},"overrides":[]},"gridPos":{"h":8,"w":3,"x":9,"y":4},"id":49,"options":{"orientation":"auto","reduceOptions":{"calcs":["last"],"fields":"","values":false},"showThresholdLabels":false,"showThresholdMarkers":true},"pluginVersion":"8.3.4","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"exemplar":false,"expr":"sum(container_memory_working_set_bytes{namespace=\"$namespace\", pod=\"$pod\", image!=\"\"}) / sum(kube_pod_container_resource_limits{namespace=\"$namespace\", pod=\"$pod\", unit=\"byte\"}) ","instant":true,"interval":"$resolution","legendFormat":"Limits","refId":"B"}],"title":"Total pod RAM Limits usage","type":"gauge"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"custom":{"align":"auto","displayMode":"auto","filterable":false},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"blue","value":null}]},"unit":"none"},"overrides":[{"matcher":{"id":"byName","options":"Memory Requests"},"properties":[{"id":"unit","value":"bytes"}]},{"matcher":{"id":"byName","options":"Memory Limits"},"properties":[{"id":"unit","value":"bytes"}]},{"matcher":{"id":"byName","options":"Memory Used"},"properties":[{"id":"unit","value":"bytes"}]}]},"gridPos":{"h":8,"w":12,"x":12,"y":4},"id":38,"options":{"footer":{"fields":"","reducer":["sum"],"show":false},"showHeader":true,"sortBy":[]},"pluginVersion":"8.3.4","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"exemplar":false,"expr":"sum(kube_pod_container_resource_requests{namespace=\"$namespace\", pod=\"$pod\", unit=\"core\"}) by (container)","format":"table","instant":true,"interval":"","intervalFactor":1,"legendFormat":"","refId":"A"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"exemplar":false,"expr":"sum(kube_pod_container_resource_limits{namespace=\"$namespace\", pod=\"$pod\", unit=\"core\"}) by (container)","format":"table","instant":true,"interval":"","intervalFactor":1,"legendFormat":"","refId":"B"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"exemplar":false,"expr":"sum(kube_pod_container_resource_requests{namespace=\"$namespace\", pod=\"$pod\", unit=\"byte\"}) by (container)","format":"table","instant":true,"interval":"","legendFormat":"","refId":"C"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"exemplar":false,"expr":"sum(kube_pod_container_resource_limits{namespace=\"$namespace\", pod=\"$pod\", unit=\"byte\"}) by (container)","format":"table","instant":true,"interval":"","legendFormat":"","refId":"D"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"editorMode":"code","exemplar":false,"expr":"sum(rate(container_cpu_usage_seconds_total{namespace=\"$namespace\", pod=\"$pod\", image!=\"\", container!=\"POD\"}[$__rate_interval])) by (container)","format":"table","hide":false,"instant":true,"legendFormat":"__auto","range":false,"refId":"E"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"editorMode":"code","exemplar":false,"expr":"sum(container_memory_working_set_bytes{namespace=\"$namespace\", pod=\"$pod\", image!=\"\", container!=\"POD\"}) by (container)","format":"table","hide":false,"instant":true,"range":false,"refId":"F"}],"title":"Resources by container","transformations":[{"id":"seriesToColumns","options":{"byField":"container"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time 1":true,"Time 2":true,"Time 4":true,"__name__":true,"__name__ 1":true,"__name__ 2":true,"__name__ 3":true,"__name__ 4":true,"container":false,"endpoint":true,"endpoint 2":true,"endpoint 3":true,"endpoint 4":true,"instance":true,"instance 2":true,"instance 3":true,"instance 4":true,"job":true,"job 2":true,"job 3":true,"job 4":true,"namespace":true,"namespace 2":true,"namespace 3":true,"namespace 4":true,"node":true,"node 2":true,"node 3":true,"node 4":true,"pod":true,"pod 2":true,"pod 3":true,"pod 4":true,"resource 1":true,"resource 2":true,"resource 3":true,"resource 4":true,"service":true,"service 2":true,"service 3":true,"service 4":true,"uid 1":true,"uid 2":true,"uid 3":true,"uid 4":true,"unit 1":true,"unit 2":true,"unit 3":true,"unit 4":true},"indexByName":{"Time 1":7,"Time 2":8,"Time 3":9,"Time 4":10,"Time 5":11,"Time 6":12,"Value #A":2,"Value #B":3,"Value #C":5,"Value #D":6,"Value #E":1,"Value #F":4,"container":0},"renameByName":{"Value #A":"CPU Requests","Value #B":"CPU Limits","Value #C":"Memory Requests","Value #D":"Memory Limits","Value #E":"CPU Used","Value #F":"Memory Used","container":"Container"}}}],"type":"table"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"thresholds"},"custom":{"axisLabel":"Percent","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":25,"gradientMode":"opacity","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"smooth","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":true,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"area"}},"mappings":[],"max":1,"min":0,"thresholds":{"mode":"percentage","steps":[{"color":"red","value":null},{"color":"yellow","value":20},{"color":"green","value":30},{"color":"yellow","value":70},{"color":"red","value":80}]},"unit":"percentunit"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":12},"id":50,"options":{"legend":{"calcs":[],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"8.3.3","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"editorMode":"code","exemplar":true,"expr":"sum(rate(container_cpu_usage_seconds_total{namespace=\"$namespace\", pod=\"$pod\", image!=\"\"}[$__rate_interval])) by (container) / sum(kube_pod_container_resource_requests{namespace=\"$namespace\", pod=\"$pod\", unit=\"core\"}) by (container)","interval":"$resolution","legendFormat":"{{ container }} REQUESTS","range":true,"refId":"A"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"editorMode":"code","expr":"sum(rate(container_cpu_usage_seconds_total{namespace=\"$namespace\", pod=\"$pod\", image!=\"\"}[$__rate_interval])) by (container) / sum(kube_pod_container_resource_limits{namespace=\"$namespace\", pod=\"$pod\", unit=\"core\"}) by (container)","hide":false,"legendFormat":"{{ container }} LIMITS","range":true,"refId":"B"}],"title":"CPU Usage / Requests & Limits by container","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"color":{"fixedColor":"blue","mode":"thresholds"},"custom":{"axisLabel":"Percent","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":25,"gradientMode":"opacity","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"smooth","lineStyle":{"fill":"solid"},"lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":true,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"area"}},"mappings":[],"max":1,"min":0,"thresholds":{"mode":"percentage","steps":[{"color":"red","value":null},{"color":"yellow","value":20},{"color":"green","value":30},{"color":"#EAB839","value":70},{"color":"red","value":80}]},"unit":"percentunit"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":12},"id":30,"options":{"legend":{"calcs":[],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"8.3.3","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"editorMode":"code","exemplar":true,"expr":"sum(container_memory_working_set_bytes{namespace=\"$namespace\", pod=\"$pod\", image!=\"\"}) by (container) / sum(kube_pod_container_resource_requests{namespace=\"$namespace\", pod=\"$pod\", unit=\"byte\"}) by (container)","interval":"","legendFormat":"{{ container }} REQUESTS","range":true,"refId":"A"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"editorMode":"code","expr":"sum(container_memory_working_set_bytes{namespace=\"$namespace\", pod=\"$pod\", image!=\"\"}) by (container) / sum(kube_pod_container_resource_limits{namespace=\"$namespace\", pod=\"$pod\", unit=\"byte\"}) by (container)","hide":false,"legendFormat":"{{ container }} LIMITS","range":true,"refId":"B"}],"title":"Memory Usage / Requests & Limits by container","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"CPU Cores","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":25,"gradientMode":"opacity","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"smooth","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":true,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"none"},"overrides":[{"matcher":{"id":"byName","options":"limit"},"properties":[{"id":"color","value":{"fixedColor":"#F2495C","mode":"fixed"}},{"id":"custom.fillOpacity","value":0}]}]},"gridPos":{"h":8,"w":12,"x":0,"y":20},"id":29,"options":{"legend":{"calcs":[],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"8.3.3","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"exemplar":true,"expr":"sum(rate(container_cpu_usage_seconds_total{namespace=\"$namespace\", pod=\"$pod\", image!=\"\", container!=\"POD\"}[$__rate_interval])) by (container)","interval":"$resolution","legendFormat":"{{ container }}","refId":"A"}],"title":"CPU Usage by container","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"Bytes","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":25,"gradientMode":"opacity","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"smooth","lineStyle":{"fill":"solid"},"lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":true,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"bytes"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":20},"id":51,"options":{"legend":{"calcs":[],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"8.3.3","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"exemplar":true,"expr":"sum(container_memory_working_set_bytes{namespace=\"$namespace\", pod=\"$pod\", image!=\"\", container!=\"POD\"}) by (container)","interval":"","legendFormat":"{{ container }}","refId":"A"}],"title":"Memory Usage by container","type":"timeseries"},{"collapsed":false,"datasource":{"type":"datasource","uid":"grafana"},"gridPos":{"h":1,"w":24,"x":0,"y":28},"id":45,"panels":[],"targets":[{"datasource":{"type":"datasource","uid":"grafana"},"refId":"A"}],"title":"Network","type":"row"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":25,"gradientMode":"opacity","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"smooth","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":true,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"bytes"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":29},"id":31,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"none"}},"pluginVersion":"8.3.3","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"exemplar":true,"expr":"sum(rate(container_network_receive_bytes_total{namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))","interval":"$resolution","legendFormat":"Received","refId":"A"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"exemplar":true,"expr":"- sum(rate(container_network_transmit_bytes_total{namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))","interval":"$resolution","legendFormat":"Transmitted","refId":"B"}],"title":"Network - Bandwidth","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":25,"gradientMode":"opacity","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"smooth","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":true,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"pps"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":29},"id":34,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"none"}},"pluginVersion":"8.3.3","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"exemplar":true,"expr":"sum(rate(container_network_receive_packets_total{namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))","interval":"$resolution","legendFormat":"Received","refId":"A"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"exemplar":true,"expr":"- sum(rate(container_network_transmit_packets_total{namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))","interval":"$resolution","legendFormat":"Transmitted","refId":"B"}],"title":"Network - Packets Rate","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":25,"gradientMode":"opacity","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"smooth","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":true,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"pps"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":37},"id":36,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"none"}},"pluginVersion":"8.3.3","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"exemplar":true,"expr":"sum(rate(container_network_receive_packets_dropped_total{namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))","interval":"$resolution","legendFormat":"Received","refId":"A"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"exemplar":true,"expr":"- sum(rate(container_network_transmit_packets_dropped_total{namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))","interval":"$resolution","legendFormat":"Transmitted","refId":"B"}],"title":"Network - Packets Dropped","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":25,"gradientMode":"opacity","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineInterpolation":"smooth","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":true,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"pps"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":37},"id":37,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"none"}},"pluginVersion":"8.3.3","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"exemplar":true,"expr":"sum(rate(container_network_receive_errors_total{namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))","interval":"$resolution","legendFormat":"Received","refId":"A"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"exemplar":true,"expr":"- sum(rate(container_network_transmit_errors_total{namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))","interval":"$resolution","legendFormat":"Transmitted","refId":"B"}],"title":"Network - Errors","type":"timeseries"}],"refresh":"30s","schemaVersion":34,"style":"dark","tags":["Kubernetes","Prometheus"],"templating":{"list":[{"current":{"selected":false,"text":"Prometheus","value":"Prometheus"},"hide":0,"includeAll":false,"multi":false,"name":"datasource","options":[],"query":"prometheus","queryValue":"","refresh":1,"regex":"","skipUrlSync":false,"type":"datasource"},{"current":{"selected":false,"text":"monitoring","value":"monitoring"},"datasource":{"type":"prometheus","uid":"${datasource}"},"definition":"label_values(kube_pod_info, namespace)","hide":0,"includeAll":false,"multi":false,"name":"namespace","options":[],"query":{"query":"label_values(kube_pod_info, namespace)","refId":"Prometheus-namespace-Variable-Query"},"refresh":1,"regex":"","skipUrlSync":false,"sort":1,"tagValuesQuery":"","tagsQuery":"","type":"query","useTags":false},{"current":{"selected":false,"text":"custom-metrics-stackdriver-adapter-5d95c47588-h2xlx","value":"custom-metrics-stackdriver-adapter-5d95c47588-h2xlx"},"datasource":{"type":"prometheus","uid":"${datasource}"},"definition":"label_values(kube_pod_info{namespace=\"$namespace\"}, pod)","hide":0,"includeAll":false,"multi":false,"name":"pod","options":[],"query":{"query":"label_values(kube_pod_info{namespace=\"$namespace\"}, pod)","refId":"Prometheus-pod-Variable-Query"},"refresh":2,"regex":"","skipUrlSync":false,"sort":1,"tagValuesQuery":"","tagsQuery":"","type":"query","useTags":false},{"current":{"selected":true,"text":"30s","value":"30s"},"hide":0,"includeAll":false,"multi":false,"name":"resolution","options":[{"selected":false,"text":"1s","value":"1s"},{"selected":false,"text":"15s","value":"15s"},{"selected":true,"text":"30s","value":"30s"},{"selected":false,"text":"1m","value":"1m"},{"selected":false,"text":"3m","value":"3m"},{"selected":false,"text":"5m","value":"5m"}],"query":"1s, 15s, 30s, 1m, 3m, 5m","queryValue":"","skipUrlSync":false,"type":"custom"}]},"time":{"from":"now-1h","to":"now"},"timepicker":{},"timezone":"","title":"k8s / view / pods","uid":"k8s_view_pods","version":1,"weekStart":""} \ No newline at end of file diff --git a/blueprints/gke/autopilot/bundle/monitoring/dashboards/locust.json b/blueprints/gke/autopilot/bundle/monitoring/dashboards/locust.json new file mode 100644 index 00000000..ba95c099 --- /dev/null +++ b/blueprints/gke/autopilot/bundle/monitoring/dashboards/locust.json @@ -0,0 +1 @@ +{"annotations":{"list":[{"builtIn":1,"datasource":"-- Grafana --","enable":true,"hide":true,"iconColor":"rgba(0, 211, 255, 1)","limit":100,"name":"Annotations & Alerts","showIn":0,"target":{"limit":100,"matchAny":false,"tags":[],"type":"dashboard"},"type":"dashboard"}]},"description":"Locust Exporter - Container Solutions","editable":true,"fiscalYearStartMonth":0,"graphTooltip":0,"id":8,"links":[],"liveNow":false,"panels":[{"datasource":{"type":"prometheus","uid":"PBFA97CFB590B2093"},"fieldConfig":{"defaults":{"color":{"mode":"thresholds"},"mappings":[{"options":{"0":{"text":"Stop"},"1":{"text":"Hatching"},"2":{"text":"Running"}},"type":"value"}],"thresholds":{"mode":"absolute","steps":[{"color":"rgba(245, 54, 54, 0.9)","value":null},{"color":"rgba(237, 129, 40, 0.89)","value":0},{"color":"rgba(50, 172, 45, 0.97)","value":2}]},"unit":"none"},"overrides":[]},"gridPos":{"h":7,"w":4,"x":0,"y":0},"id":1,"links":[],"maxDataPoints":100,"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"horizontal","reduceOptions":{"calcs":["mean"],"fields":"","values":false},"textMode":"auto"},"pluginVersion":"8.3.4","targets":[{"expr":"locust_running","interval":"","intervalFactor":2,"legendFormat":"","metric":"","refId":"A","step":20}],"title":"Locust Status","type":"stat"},{"datasource":{"type":"prometheus","uid":"PBFA97CFB590B2093"},"fieldConfig":{"defaults":{"color":{"fixedColor":"rgb(31, 120, 193)","mode":"fixed"},"mappings":[{"options":{"match":"null","result":{"text":"N/A"}},"type":"special"}],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"none"},"overrides":[]},"gridPos":{"h":4,"w":4,"x":4,"y":0},"id":2,"links":[],"maxDataPoints":100,"options":{"colorMode":"none","graphMode":"area","justifyMode":"auto","orientation":"horizontal","reduceOptions":{"calcs":["mean"],"fields":"","values":false},"textMode":"auto"},"pluginVersion":"8.3.4","targets":[{"expr":"locust_users","interval":"","intervalFactor":2,"legendFormat":"","refId":"A","step":20}],"title":"Swarmed users","type":"stat"},{"fieldConfig":{"defaults":{"color":{"fixedColor":"rgb(31, 120, 193)","mode":"fixed"},"decimals":2,"mappings":[{"options":{"match":"null","result":{"text":"N/A"}},"type":"special"}],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"short"},"overrides":[]},"gridPos":{"h":7,"w":6,"x":8,"y":0},"id":4,"links":[],"maxDataPoints":100,"options":{"colorMode":"none","graphMode":"area","justifyMode":"auto","orientation":"horizontal","reduceOptions":{"calcs":["mean"],"fields":"","values":false},"textMode":"auto"},"pluginVersion":"8.3.4","targets":[{"datasource":{"type":"prometheus","uid":"PBFA97CFB590B2093"},"exemplar":true,"expr":"locust_requests_current_rps{method=~\"\"}","interval":"","intervalFactor":2,"legendFormat":"","refId":"A","step":20}],"title":"Current RPS","type":"stat"},{"fieldConfig":{"defaults":{"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null}]}},"overrides":[]},"gridPos":{"h":4,"w":4,"x":14,"y":0},"id":18,"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["mean"],"fields":"","values":false},"textMode":"auto"},"pluginVersion":"8.3.4","targets":[{"datasource":{"type":"prometheus","uid":"PBFA97CFB590B2093"},"exemplar":true,"expr":"locust_requests_num_requests{method=~\"\"}","interval":"","legendFormat":"","refId":"A"}],"title":"Requests","type":"stat"},{"fieldConfig":{"defaults":{"mappings":[],"max":1,"min":0,"thresholds":{"mode":"absolute","steps":[{"color":"semi-dark-red","value":null}]},"unit":"percentunit"},"overrides":[]},"gridPos":{"h":7,"w":6,"x":18,"y":0},"id":21,"options":{"orientation":"auto","reduceOptions":{"calcs":["last"],"fields":"","values":false},"showThresholdLabels":false,"showThresholdMarkers":true},"pluginVersion":"8.3.4","targets":[{"datasource":{"type":"prometheus","uid":"PBFA97CFB590B2093"},"exemplar":true,"expr":"locust_requests_fail_ratio","interval":"","legendFormat":"","refId":"A"}],"title":"Fails","type":"gauge"},{"datasource":{"type":"prometheus","uid":"PBFA97CFB590B2093"},"fieldConfig":{"defaults":{"color":{"fixedColor":"rgb(31, 120, 193)","mode":"fixed"},"mappings":[{"options":{"match":"null","result":{"text":"N/A"}},"type":"special"}],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"none"},"overrides":[]},"gridPos":{"h":3,"w":4,"x":4,"y":4},"id":3,"links":[],"maxDataPoints":100,"options":{"colorMode":"none","graphMode":"area","justifyMode":"auto","orientation":"horizontal","reduceOptions":{"calcs":["mean"],"fields":"","values":false},"textMode":"auto"},"pluginVersion":"8.3.4","targets":[{"expr":"locust_workers_count","interval":"","intervalFactor":2,"legendFormat":"","refId":"A","step":20}],"title":"Connected workers","type":"stat"},{"fieldConfig":{"defaults":{"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"red","value":null}]}},"overrides":[]},"gridPos":{"h":3,"w":4,"x":14,"y":4},"id":19,"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["mean"],"fields":"","values":false},"textMode":"auto"},"pluginVersion":"8.3.4","targets":[{"datasource":{"type":"prometheus","uid":"PBFA97CFB590B2093"},"exemplar":true,"expr":"locust_requests_num_failures{method=~\"\"}","interval":"","legendFormat":"","refId":"A"}],"title":"Failers","type":"stat"},{"columns":[],"fontSize":"100%","gridPos":{"h":6,"w":24,"x":0,"y":7},"id":14,"showHeader":true,"sort":{"col":0,"desc":true},"styles":[{"alias":"Time","align":"auto","dateFormat":"YYYY-MM-DD HH:mm:ss","pattern":"Time","type":"hidden"},{"alias":"Method","align":"auto","colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"mappingType":1,"pattern":"method","preserveFormat":false,"thresholds":[],"type":"string","unit":"short"},{"alias":"URL","align":"auto","colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"mappingType":1,"pattern":"name","thresholds":[],"type":"string","unit":"short"},{"alias":"MIN RT","align":"","colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":0,"mappingType":1,"pattern":"Value #A","thresholds":[],"type":"number","unit":"ms"},{"alias":"Errors","align":"","colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"decimals":0,"pattern":"Value #B","thresholds":[],"type":"number","unit":"none"},{"alias":"MAX RT","align":"","colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":0,"mappingType":1,"pattern":"Value #C","thresholds":[],"type":"number","unit":"ms"},{"alias":"MEDIAN RT","align":"auto","colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":0,"mappingType":1,"pattern":"Value #D","thresholds":[],"type":"number","unit":"ms"},{"alias":"AVG RT","align":"auto","colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"mappingType":1,"pattern":"Value #E","thresholds":[],"type":"number","unit":"ms"},{"alias":"Errors Ratio","align":"auto","colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"mappingType":1,"pattern":"Value #H","thresholds":[],"type":"number","unit":"reqps"},{"alias":"Requests","align":"auto","colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":0,"mappingType":1,"pattern":"Value #F","thresholds":[],"type":"number","unit":"none"},{"alias":"Requests Ratio","align":"auto","colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"mappingType":1,"pattern":"Value #G","thresholds":[],"type":"number","unit":"reqps"},{"alias":"Content","align":"auto","colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"mappingType":1,"pattern":"Value #I","thresholds":[],"type":"number","unit":"bytes"}],"targets":[{"datasource":{"type":"prometheus","uid":"PBFA97CFB590B2093"},"exemplar":false,"expr":"sum(locust_requests_avg_content_length{method=~\".+\"}) by (method, name)","format":"table","instant":true,"interval":"","legendFormat":"","refId":"I"},{"datasource":{"type":"prometheus","uid":"PBFA97CFB590B2093"},"exemplar":false,"expr":"sum(locust_requests_min_response_time{method=~\".+\"}) by (method, name)","format":"table","instant":true,"interval":"","legendFormat":"","refId":"A"},{"datasource":{"type":"prometheus","uid":"PBFA97CFB590B2093"},"exemplar":false,"expr":"sum(locust_requests_max_response_time{method=~\".+\"}) by (method, name)","format":"table","instant":true,"interval":"","legendFormat":"","refId":"C"},{"datasource":{"type":"prometheus","uid":"PBFA97CFB590B2093"},"exemplar":false,"expr":"sum(locust_requests_avg_response_time{method=~\".+\"}) by (method, name)","format":"table","instant":true,"interval":"","legendFormat":"","refId":"E"},{"datasource":{"type":"prometheus","uid":"PBFA97CFB590B2093"},"exemplar":false,"expr":"sum(locust_requests_median_response_time{method=~\".+\"}) by (method, name)","format":"table","instant":true,"interval":"","legendFormat":"","refId":"D"},{"datasource":{"type":"prometheus","uid":"PBFA97CFB590B2093"},"exemplar":false,"expr":"sum(locust_requests_num_failures{method=~\".+\"}) by (method, name)","format":"table","instant":true,"interval":"","legendFormat":"","refId":"B"},{"datasource":{"type":"prometheus","uid":"PBFA97CFB590B2093"},"exemplar":false,"expr":"sum(locust_requests_current_fail_per_sec{method=~\".+\"}) by (method, name)","format":"table","instant":true,"interval":"","legendFormat":"","refId":"H"},{"datasource":{"type":"prometheus","uid":"PBFA97CFB590B2093"},"exemplar":false,"expr":"sum(locust_requests_num_requests{method=~\".+\"}) by (method, name)","format":"table","instant":true,"interval":"","legendFormat":"","refId":"F"},{"datasource":{"type":"prometheus","uid":"PBFA97CFB590B2093"},"exemplar":false,"expr":"sum(locust_requests_current_rps{method=~\".+\"}) by (method, name)","format":"table","instant":true,"interval":"","legendFormat":"","refId":"G"}],"title":"Endpoints","transform":"table","type":"table-old"},{"columns":[],"fontSize":"100%","gridPos":{"h":3,"w":24,"x":0,"y":13},"id":16,"showHeader":true,"sort":{"col":0,"desc":true},"styles":[{"alias":"Time","align":"auto","dateFormat":"YYYY-MM-DD HH:mm:ss","pattern":"Time","type":"hidden"},{"alias":"Requests","align":"auto","colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":0,"mappingType":1,"pattern":"Value","thresholds":[],"type":"number","unit":"none"},{"alias":"Method","align":"auto","colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"mappingType":1,"pattern":"method","thresholds":[],"type":"string","unit":"short"},{"alias":"URL","align":"auto","colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"mappingType":1,"pattern":"name","thresholds":[],"type":"string","unit":"short"},{"alias":"Error","align":"auto","colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"mappingType":1,"pattern":"error","thresholds":[],"type":"string","unit":"short"},{"alias":"","align":"left","colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"decimals":2,"pattern":"/.*/","thresholds":[],"type":"number","unit":"short"}],"targets":[{"expr":"sum(locust_errors) by (method, name, error)","format":"table","hide":false,"instant":true,"interval":"","intervalFactor":1,"legendFormat":"","refId":"A"}],"title":"Errors","transform":"table","type":"table-old"},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"decimals":0,"editable":true,"error":false,"fieldConfig":{"defaults":{"links":[]},"overrides":[]},"fill":1,"fillGradient":0,"gridPos":{"h":8,"w":17,"x":0,"y":16},"hiddenSeries":false,"id":6,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"connected","options":{"alertThreshold":true},"paceLength":10,"percentage":false,"pluginVersion":"8.3.4","pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"AVG MAX","yaxis":2}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"datasource":{"type":"prometheus","uid":"PBFA97CFB590B2093"},"exemplar":true,"expr":"avg(locust_requests_max_response_time{method=~\".+\"})","interval":"","intervalFactor":2,"legendFormat":"AVG MAX","refId":"A","step":2},{"datasource":{"type":"prometheus","uid":"PBFA97CFB590B2093"},"exemplar":true,"expr":"avg(locust_requests_min_response_time{method=~\".+\"})","interval":"","intervalFactor":2,"legendFormat":"AVG MIN","metric":"","refId":"B","step":2},{"datasource":{"type":"prometheus","uid":"PBFA97CFB590B2093"},"exemplar":true,"expr":"avg(locust_requests_avg_response_time{method=~\".+\"})","interval":"","intervalFactor":2,"legendFormat":"AVG AVG","metric":"","refId":"C","step":2},{"datasource":{"type":"prometheus","uid":"PBFA97CFB590B2093"},"exemplar":true,"expr":"avg(locust_requests_median_response_time{method=~\".+\"})","interval":"","intervalFactor":2,"legendFormat":"AVG MEDIAN","refId":"D","step":2}],"thresholds":[],"timeRegions":[],"title":"Response Times","tooltip":{"msResolution":false,"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"mode":"time","show":true,"values":[]},"yaxes":[{"decimals":0,"format":"ms","logBase":1,"show":true},{"decimals":0,"format":"ms","logBase":1,"show":true}],"yaxis":{"align":false}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"decimals":0,"editable":true,"error":false,"fieldConfig":{"defaults":{"links":[]},"overrides":[]},"fill":1,"fillGradient":0,"gridPos":{"h":8,"w":7,"x":17,"y":16},"hiddenSeries":false,"id":15,"interval":"","legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"connected","options":{"alertThreshold":true},"paceLength":10,"percentage":false,"pluginVersion":"8.3.4","pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"AVG MAX","yaxis":2}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"datasource":{"type":"prometheus","uid":"PBFA97CFB590B2093"},"exemplar":true,"expr":"locust_requests_current_response_time_percentile_95","interval":"","intervalFactor":2,"legendFormat":"P95","refId":"D","step":2},{"datasource":{"type":"prometheus","uid":"PBFA97CFB590B2093"},"expr":"locust_requests_current_response_time_percentile_50","interval":"","legendFormat":"P50","refId":"A"}],"thresholds":[],"timeRegions":[],"title":"Response Times","tooltip":{"msResolution":false,"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"mode":"time","show":true,"values":[]},"yaxes":[{"decimals":0,"format":"ms","logBase":1,"show":true},{"decimals":0,"format":"ms","logBase":1,"show":true}],"yaxis":{"align":false}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":{"type":"prometheus","uid":"PBFA97CFB590B2093"},"decimals":0,"fieldConfig":{"defaults":{"links":[]},"overrides":[]},"fill":1,"fillGradient":0,"gridPos":{"h":8,"w":12,"x":0,"y":24},"hiddenSeries":false,"id":9,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","options":{"alertThreshold":true},"paceLength":10,"percentage":false,"pluginVersion":"8.3.4","pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"locust_users","format":"time_series","interval":"","intervalFactor":1,"legendFormat":"Users","refId":"A"}],"thresholds":[],"timeRegions":[],"title":"Users","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"mode":"time","show":true,"values":[]},"yaxes":[{"format":"short","logBase":1,"show":true},{"format":"short","logBase":1,"show":true}],"yaxis":{"align":false}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":{"type":"prometheus","uid":"PBFA97CFB590B2093"},"decimals":0,"fieldConfig":{"defaults":{"links":[]},"overrides":[]},"fill":1,"fillGradient":0,"gridPos":{"h":8,"w":12,"x":12,"y":24},"hiddenSeries":false,"id":12,"interval":"","legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","options":{"alertThreshold":true},"paceLength":10,"percentage":false,"pluginVersion":"8.3.4","pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"locust_workers_count","format":"time_series","instant":false,"interval":"","intervalFactor":1,"legendFormat":"Workers","refId":"A"}],"thresholds":[],"timeRegions":[],"title":"Workers","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"mode":"time","show":true,"values":[]},"yaxes":[{"decimals":0,"format":"short","logBase":1,"show":true},{"decimals":0,"format":"short","logBase":1,"show":false}],"yaxis":{"align":false}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":{"type":"prometheus","uid":"PBFA97CFB590B2093"},"editable":true,"error":false,"fieldConfig":{"defaults":{"links":[]},"overrides":[]},"fill":1,"fillGradient":0,"gridPos":{"h":7,"w":24,"x":0,"y":32},"hiddenSeries":false,"id":7,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"connected","paceLength":10,"percentage":false,"pluginVersion":"8.3.4","pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"fail_ratio [%]","yaxis":2}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"rate(locust_requests_num_requests{method=~\".+\"}[1m])","format":"time_series","interval":"","intervalFactor":2,"legendFormat":"{{ method}} - {{name}}","refId":"A","step":2},{"expr":" sum(rate(locust_requests_num_requests{method=~\".+\"}[1m]))","format":"time_series","interval":"","intervalFactor":1,"legendFormat":"total","refId":"C"},{"expr":"locust_requests_fail_ratio * 100","format":"time_series","interval":"","intervalFactor":2,"legendFormat":"fail_ratio [%]","refId":"B","step":2}],"thresholds":[],"timeRegions":[],"title":"Requests per endpoint / s","tooltip":{"msResolution":false,"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"mode":"time","show":true,"values":[]},"yaxes":[{"format":"reqps","logBase":1,"show":true},{"format":"percent","logBase":1,"min":"0","show":true}],"yaxis":{"align":false}}],"refresh":"10s","schemaVersion":34,"style":"dark","tags":["locust"],"templating":{"list":[]},"time":{"from":"now-15m","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone":"browser","title":"locust / view","uid":"0WllLp6mz","version":4,"weekStart":""} \ No newline at end of file diff --git a/blueprints/gke/autopilot/bundle/monitoring/dashboards/nginx.json b/blueprints/gke/autopilot/bundle/monitoring/dashboards/nginx.json new file mode 100644 index 00000000..50f74362 --- /dev/null +++ b/blueprints/gke/autopilot/bundle/monitoring/dashboards/nginx.json @@ -0,0 +1 @@ +{"__inputs":[{"description":"","label":"Prometheus","name":"DS_PROMETHEUS","pluginId":"prometheus","pluginName":"Prometheus","type":"datasource"}],"__requires":[{"id":"grafana","name":"Grafana","type":"grafana","version":"5.0.0"},{"id":"graph","name":"Graph","type":"panel","version":""},{"id":"prometheus","name":"Prometheus","type":"datasource","version":"1.0.0"},{"id":"singlestat","name":"Singlestat","type":"panel","version":""}],"annotations":{"list":[{"builtIn":1,"datasource":"-- Grafana --","enable":true,"hide":true,"iconColor":"rgba(0, 211, 255, 1)","name":"Annotations & Alerts","type":"dashboard"}]},"description":"Official dashboard for NGINX Prometheus exporter","editable":true,"gnetId":null,"graphTooltip":0,"id":null,"iteration":1562682051068,"links":[],"panels":[{"collapsed":false,"datasource":"${DS_PROMETHEUS}","gridPos":{"h":1,"w":24,"x":0,"y":0},"id":4,"panels":[],"title":"Status","type":"row"},{"cacheTimeout":null,"colorBackground":true,"colorPostfix":false,"colorPrefix":false,"colorValue":false,"colors":["#E02F44","#FF9830","#299c46"],"datasource":"${DS_PROMETHEUS}","decimals":null,"description":"","format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":12,"x":0,"y":1},"id":8,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"options":{},"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"repeat":"instance","repeatDirection":"h","sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"nginx_up{instance=~\"$instance\"}","format":"time_series","instant":false,"intervalFactor":1,"refId":"A"}],"thresholds":"1,1","timeFrom":null,"timeShift":null,"title":"NGINX Status for $instance","type":"singlestat","valueFontSize":"100%","valueMaps":[{"op":"=","text":"Down","value":"0"},{"op":"=","text":"Up","value":"1"}],"valueName":"current"},{"collapsed":false,"datasource":"${DS_PROMETHEUS}","gridPos":{"h":1,"w":24,"x":0,"y":4},"id":6,"panels":[],"title":"Metrics","type":"row"},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":null,"description":"","fill":1,"gridPos":{"h":10,"w":12,"x":0,"y":5},"id":10,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":false,"max":false,"min":false,"rightSide":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","options":{},"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(nginx_connections_accepted{instance=~\"$instance\"}[5m])","format":"time_series","instant":false,"intervalFactor":1,"legendFormat":"{{instance}} accepted","refId":"A"},{"expr":"irate(nginx_connections_handled{instance=~\"$instance\"}[5m])","format":"time_series","instant":false,"intervalFactor":1,"legendFormat":"{{instance}} handled","refId":"B"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Processed connections","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"decimals":1,"format":"short","label":"Connections (rate)","logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":"","logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":0,"fill":1,"gridPos":{"h":10,"w":12,"x":12,"y":5},"id":12,"legend":{"alignAsTable":false,"avg":false,"current":false,"max":false,"min":false,"rightSide":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","options":{},"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"nginx_connections_active{instance=~\"$instance\"}","format":"time_series","intervalFactor":1,"legendFormat":"{{instance}} active","refId":"A"},{"expr":"nginx_connections_reading{instance=~\"$instance\"}","format":"time_series","intervalFactor":1,"legendFormat":"{{instance}} reading","refId":"B"},{"expr":"nginx_connections_waiting{instance=~\"$instance\"}","format":"time_series","intervalFactor":1,"legendFormat":"{{instance}} waiting","refId":"C"},{"expr":"nginx_connections_writing{instance=~\"$instance\"}","format":"time_series","intervalFactor":1,"legendFormat":"{{instance}} writing","refId":"D"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Active Connections","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"decimals":0,"format":"short","label":"Connections","logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":1,"gridPos":{"h":8,"w":24,"x":0,"y":15},"id":15,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","options":{},"percentage":false,"pointradius":2,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(nginx_http_requests_total{instance=~\"$instance\"}[5m])","format":"time_series","intervalFactor":1,"legendFormat":"{{instance}} total requests","refId":"A"}],"thresholds":[],"timeFrom":null,"timeRegions":[],"timeShift":null,"title":"Total requests","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}}],"refresh":"5s","schemaVersion":18,"style":"dark","tags":["nginx","prometheus","nginx prometheus exporter"],"templating":{"list":[{"current":{"selected":false,"tags":[],"text":"default","value":"default"},"hide":0,"includeAll":false,"label":"datasource","multi":false,"name":"DS_PROMETHEUS","options":[],"query":"prometheus","refresh":1,"regex":"","skipUrlSync":false,"type":"datasource"},{"allValue":null,"current":{},"datasource":"${DS_PROMETHEUS}","definition":"label_values(nginx_up, instance)","hide":0,"includeAll":true,"label":"","multi":true,"name":"instance","options":[],"query":"label_values(nginx_up, instance)","refresh":1,"regex":"","skipUrlSync":false,"sort":0,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-15m","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone":"","title":"nginx / view","uid":"nginx_view","version":1} \ No newline at end of file diff --git a/blueprints/gke/autopilot/bundle/monitoring/frontend.yaml b/blueprints/gke/autopilot/bundle/monitoring/frontend.yaml new file mode 100644 index 00000000..d37b2e0f --- /dev/null +++ b/blueprints/gke/autopilot/bundle/monitoring/frontend.yaml @@ -0,0 +1,79 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https:#www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: v1 +kind: ServiceAccount +metadata: + name: frontend + namespace: monitoring +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: frontend + namespace: monitoring +spec: + replicas: 1 + selector: + matchLabels: + app: frontend + template: + metadata: + labels: + app: frontend + spec: + serviceAccountName: frontend + tolerations: + - key: group + operator: Equal + value: monitoring + effect: NoSchedule + nodeSelector: + group: monitoring + automountServiceAccountToken: true + containers: + - name: frontend + image: "gke.gcr.io/prometheus-engine/frontend:v0.5.0-gke.0" + args: + - "--web.listen-address=:9090" + ports: + - name: web + containerPort: 9090 + resources: + requests: + cpu: 10m + memory: 15Mi + limits: + memory: 15Mi + readinessProbe: + httpGet: + path: /-/ready + port: web + livenessProbe: + httpGet: + path: /-/healthy + port: web +--- +apiVersion: v1 +kind: Service +metadata: + name: frontend + namespace: monitoring +spec: + clusterIP: None + selector: + app: frontend + ports: + - name: web + port: 9090 \ No newline at end of file diff --git a/blueprints/gke/autopilot/bundle/monitoring/grafana.yaml b/blueprints/gke/autopilot/bundle/monitoring/grafana.yaml new file mode 100644 index 00000000..dd4bfb03 --- /dev/null +++ b/blueprints/gke/autopilot/bundle/monitoring/grafana.yaml @@ -0,0 +1,184 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: v1 +kind: ConfigMap +metadata: + name: grafana + namespace: monitoring +data: + allow-snippet-annotations: "false" + grafana.ini: | + [analytics] + check_for_updates = true + [grafana_net] + url = https://grafana.net + [log] + mode = console + [paths] + data = /var/lib/grafana/ + logs = /var/log/grafana + plugins = /var/lib/grafana/plugins + provisioning = /etc/grafana/provisioning + datasources.yaml: | + apiVersion: 1 + datasources: + - access: proxy + editable: true + isDefault: true + jsonData: + timeInterval: 5s + name: Prometheus + orgId: 1 + type: prometheus + url: http://frontend.monitoring.svc.cluster.local:9090 + dashboardproviders.yaml: | + apiVersion: 1 + providers: + - disableDeletion: false + folder: k8s + name: k8s + options: + path: /var/lib/grafana/dashboards/k8s + orgId: 1 + type: file + - disableDeletion: false + folder: locust + name: locust + options: + path: /var/lib/grafana/dashboards/locust + orgId: 1 + type: file + - disableDeletion: false + folder: nginx + name: nginx + options: + path: /var/lib/grafana/dashboards/nginx + orgId: 1 + type: file +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: grafana + namespace: monitoring +spec: + replicas: 1 + selector: + matchLabels: + app: grafana + template: + metadata: + labels: + app: grafana + spec: + tolerations: + - key: group + operator: Equal + value: monitoring + effect: NoSchedule + nodeSelector: + group: monitoring + containers: + - name: grafana + image: grafana/grafana:8.3.4 + ports: + - name: web + containerPort: 3000 + env: + - name: GF_PATHS_DATA + value: /var/lib/grafana/ + - name: GF_PATHS_LOGS + value: /var/log/grafana + - name: GF_PATHS_PLUGINS + value: /var/lib/grafana/plugins + - name: GF_PATHS_PROVISIONING + value: /etc/grafana/provisioning + - name: "GF_AUTH_ANONYMOUS_ENABLED" + value: "true" + - name: "GF_AUTH_ANONYMOUS_ORG_ROLE" + value: "Admin" + - name: "GF_AUTH_BASIC_ENABLED" + value: "false" + - name: "GF_SECURITY_ADMIN_PASSWORD" + value: "-" + - name: "GF_SECURITY_ADMIN_USER" + value: "-" + volumeMounts: + - name: config + mountPath: "/etc/grafana/grafana.ini" + subPath: grafana.ini + - name: storage + mountPath: "/var/lib/grafana" + - name: k8s-grafana-dashboards + mountPath: "/var/lib/grafana/dashboards/k8s" + - name: locust-grafana-dashboards + mountPath: "/var/lib/grafana/dashboards/locust" + - name: nginx-grafana-dashboards + mountPath: "/var/lib/grafana/dashboards/nginx" + - name: config + mountPath: "/etc/grafana/provisioning/datasources/datasources.yaml" + subPath: "datasources.yaml" + - name: config + mountPath: "/etc/grafana/provisioning/dashboards/dashboardproviders.yaml" + subPath: "dashboardproviders.yaml" + resources: + requests: + cpu: 30m + memory: 100Mi + limits: + memory: 100Mi + livenessProbe: + failureThreshold: 10 + httpGet: + path: /api/health + port: 3000 + initialDelaySeconds: 60 + timeoutSeconds: 30 + readinessProbe: + httpGet: + path: /api/health + port: 3000 + volumes: + - name: config + configMap: + name: grafana + - name: k8s-grafana-dashboards + configMap: + name: k8s-grafana-dashboards + - name: locust-grafana-dashboards + configMap: + name: locust-grafana-dashboards + - name: nginx-grafana-dashboards + configMap: + name: nginx-grafana-dashboards + - name: storage + emptyDir: {} +--- +apiVersion: v1 +kind: Service +metadata: + name: grafana + namespace: monitoring + annotations: + cloud.google.com/neg: '{"ingress": true}' + cloud.google.com/app-protocols: '{"web":"HTTP"}' + cloud.google.com/backend-config: '{"default": "backendconfig"}' +spec: + clusterIP: None + selector: + app: grafana + ports: + - name: web + port: 3000 \ No newline at end of file diff --git a/blueprints/gke/autopilot/bundle/monitoring/ingress.yaml b/blueprints/gke/autopilot/bundle/monitoring/ingress.yaml new file mode 100644 index 00000000..12b810a2 --- /dev/null +++ b/blueprints/gke/autopilot/bundle/monitoring/ingress.yaml @@ -0,0 +1,43 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--- +apiVersion: cloud.google.com/v1 +kind: BackendConfig +metadata: + name: backendconfig + namespace: monitoring +spec: + healthCheck: + requestPath: /api/health + port: 3000 + type: HTTP + logging: + enable: true + sampleRate: 0.5 +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: ingress + namespace: monitoring + annotations: + kubernetes.io/ingress.global-static-ip-name: "grafana" + kubernetes.io/ingress.allow-http: "true" +spec: + defaultBackend: + service: + name: grafana + port: + name: web \ No newline at end of file diff --git a/blueprints/gke/autopilot/bundle/monitoring/kube-state-metrics.yaml b/blueprints/gke/autopilot/bundle/monitoring/kube-state-metrics.yaml new file mode 100644 index 00000000..d74bdb75 --- /dev/null +++ b/blueprints/gke/autopilot/bundle/monitoring/kube-state-metrics.yaml @@ -0,0 +1,342 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: apps/v1 +kind: StatefulSet +metadata: + labels: + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/version: 2.3.0 + namespace: gmp-public + name: kube-state-metrics +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: kube-state-metrics + serviceName: kube-state-metrics + template: + metadata: + labels: + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/version: 2.3.0 + spec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/arch + operator: In + values: + - arm64 + - amd64 + - key: kubernetes.io/os + operator: In + values: + - linux + containers: + - name: kube-state-metric + image: k8s.gcr.io/kube-state-metrics/kube-state-metrics:v2.3.0 + env: + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + args: + - --pod=$(POD_NAME) + - --pod-namespace=$(POD_NAMESPACE) + - --port=8080 + - --telemetry-port=8081 + ports: + - name: metrics + containerPort: 8080 + - name: metrics-self + containerPort: 8081 + resources: + requests: + cpu: 10m + memory: 50Mi + limits: + memory: 50Mi + securityContext: + allowPrivilegeEscalation: false + privileged: false + capabilities: + drop: + - all + runAsUser: 1000 + runAsGroup: 1000 + livenessProbe: + httpGet: + path: /healthz + port: 8080 + initialDelaySeconds: 5 + timeoutSeconds: 5 + readinessProbe: + httpGet: + path: / + port: 8081 + initialDelaySeconds: 5 + timeoutSeconds: 5 + serviceAccountName: kube-state-metrics +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/version: 2.3.0 + namespace: gmp-public + name: kube-state-metrics +spec: + clusterIP: None + ports: + - name: metrics + port: 8080 + targetPort: metrics + - name: metrics-self + port: 8081 + targetPort: metrics-self + selector: + app.kubernetes.io/name: kube-state-metrics +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + namespace: gmp-public + name: kube-state-metrics + labels: + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/version: 2.3.0 +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: gmp-public:kube-state-metrics + labels: + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/version: 2.3.0 +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: gmp-public:kube-state-metrics +subjects: +- kind: ServiceAccount + namespace: gmp-public + name: kube-state-metrics +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: gmp-public:kube-state-metrics + labels: + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/version: 2.3.0 +rules: +- apiGroups: + - "" + resources: + - configmaps + - secrets + - nodes + - pods + - services + - resourcequotas + - replicationcontrollers + - limitranges + - persistentvolumeclaims + - persistentvolumes + - namespaces + - endpoints + verbs: + - list + - watch +- apiGroups: + - "" + resources: + - pods + verbs: + - get +- apiGroups: + - extensions + resources: + - daemonsets + - deployments + - replicasets + - ingresses + verbs: + - list + - watch +- apiGroups: + - apps + resources: + - statefulsets + - daemonsets + - deployments + - replicasets + verbs: + - list + - watch +- apiGroups: + - apps + resources: + - statefulsets + verbs: + - get +- apiGroups: + - batch + resources: + - cronjobs + - jobs + verbs: + - list + - watch +- apiGroups: + - autoscaling + resources: + - horizontalpodautoscalers + verbs: + - list + - watch +- apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create +- apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create +- apiGroups: + - policy + resources: + - poddisruptionbudgets + verbs: + - list + - watch +- apiGroups: + - certificates.k8s.io + resources: + - certificatesigningrequests + verbs: + - list + - watch +- apiGroups: + - storage.k8s.io + resources: + - storageclasses + - volumeattachments + verbs: + - list + - watch +- apiGroups: + - admissionregistration.k8s.io + resources: + - mutatingwebhookconfigurations + - validatingwebhookconfigurations + verbs: + - list + - watch +- apiGroups: + - networking.k8s.io + resources: + - networkpolicies + - ingresses + verbs: + - list + - watch +- apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - list + - watch +--- +# TODO(pintohutch): bump to autoscaling/v2 when 1.23 is the default in the GKE +# stable release channel. +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: kube-state-metrics + namespace: gmp-public +spec: + maxReplicas: 10 + minReplicas: 1 + scaleTargetRef: + apiVersion: apps/v1 + kind: StatefulSet + name: kube-state-metrics + metrics: + - type: Resource + resource: + name: memory + target: + type: Utilization + averageUtilization: 60 + behavior: + scaleDown: + policies: + - type: Pods + value: 1 + # Under-utilization needs to persist for `periodSeconds` before any action can be taken. + # Current supported max from https://kubernetes.io/docs/reference/kubernetes-api/workload-resources/horizontal-pod-autoscaler-v2beta2/. + periodSeconds: 1800 + # Current supported max from https://kubernetes.io/docs/reference/kubernetes-api/workload-resources/horizontal-pod-autoscaler-v2beta2/. + stabilizationWindowSeconds: 3600 +--- +apiVersion: monitoring.googleapis.com/v1 +kind: ClusterPodMonitoring +metadata: + name: kube-state-metrics + labels: + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/part-of: google-cloud-managed-prometheus +spec: + selector: + matchLabels: + app.kubernetes.io/name: kube-state-metrics + endpoints: + - port: metrics + interval: 30s + metricRelabeling: + - action: keep + regex: kube_(daemonset|deployment|pod|namespace|node|statefulset)_.+ + sourceLabels: [__name__] + targetLabels: + metadata: [] # explicitly empty so the metric labels are respected +--- +apiVersion: monitoring.googleapis.com/v1 +kind: PodMonitoring +metadata: + namespace: gmp-public + name: kube-state-metrics + labels: + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/part-of: google-cloud-managed-prometheus +spec: + selector: + matchLabels: + app.kubernetes.io/name: kube-state-metrics + endpoints: + - port: metrics-self + interval: 30s \ No newline at end of file diff --git a/blueprints/gke/autopilot/bundle/monitoring/kustomization.yaml.j2 b/blueprints/gke/autopilot/bundle/monitoring/kustomization.yaml.j2 new file mode 100755 index 00000000..8ac7957b --- /dev/null +++ b/blueprints/gke/autopilot/bundle/monitoring/kustomization.yaml.j2 @@ -0,0 +1,72 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +resources: + - namespace.yaml + - frontend.yaml + - grafana.yaml + - ingress.yaml + - custom-stackdriver-metrics-adapter.yaml + - kube-state-metrics.yaml +configMapGenerator: +- name: k8s-grafana-dashboards + namespace: monitoring + options: + disableNameSuffixHash: true + files: + - dashboards/k8s-global.json + - dashboards/k8s-namespaces.json + - dashboards/k8s-nodes.json + - dashboards/k8s-pods.json +- name: locust-grafana-dashboards + namespace: monitoring + options: + disableNameSuffixHash: true + files: + - dashboards/locust.json +- name: nginx-grafana-dashboards + namespace: monitoring + options: + disableNameSuffixHash: true + files: + - dashboards/nginx.json +patches: + - target: + version: v1 + kind: ServiceAccount + name: frontend + namespace: monitoring + patch: |- + - op: add + path: /metadata/annotations/iam.gke.io~1gcp-service-account + value: sa-monitoring@{{ project_id }}.iam.gserviceaccount.com + - target: + version: v1 + kind: ServiceAccount + name: custom-metrics-stackdriver-adapter + namespace: monitoring + patch: |- + - op: add + path: /metadata/annotations/iam.gke.io~1gcp-service-account + value: sa-monitoring@{{ project_id }}.iam.gserviceaccount.com + - target: + group: apps + version: v1 + kind: Deployment + name: frontend + namespace: monitoring + patch: |- + - op: add + path: /spec/template/spec/containers/0/args/- + value: "--query.project-id={{ project_id }}" \ No newline at end of file diff --git a/blueprints/gke/autopilot/bundle/monitoring/namespace.yaml b/blueprints/gke/autopilot/bundle/monitoring/namespace.yaml new file mode 100644 index 00000000..38e4ef69 --- /dev/null +++ b/blueprints/gke/autopilot/bundle/monitoring/namespace.yaml @@ -0,0 +1,18 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https:#www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: v1 +kind: Namespace +metadata: + name: monitoring \ No newline at end of file diff --git a/blueprints/gke/autopilot/cluster.tf b/blueprints/gke/autopilot/cluster.tf new file mode 100644 index 00000000..2ded1f63 --- /dev/null +++ b/blueprints/gke/autopilot/cluster.tf @@ -0,0 +1,54 @@ +/** + * Copyright 2023 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +module "cluster" { + source = "../../../modules/gke-cluster" + project_id = module.project.project_id + name = "cluster" + location = var.region + vpc_config = { + network = module.vpc.self_link + subnetwork = module.vpc.subnet_self_links["${var.region}/subnet-cluster"] + secondary_range_names = { + pods = "pods" + services = "services" + } + master_authorized_ranges = var.cluster_network_config.master_authorized_cidr_blocks + master_ipv4_cidr_block = var.cluster_network_config.master_cidr_block + } + enable_features = { + autopilot = true + } + monitoring_config = { + enenable_components = ["SYSTEM_COMPONENTS"] + managed_prometheus = true + } + cluster_autoscaling = { + auto_provisioning_defaults = { + service_account = module.node_sa.email + } + } + release_channel = "RAPID" + depends_on = [ + module.project + ] +} + +module "node_sa" { + source = "../../../modules/iam-service-account" + project_id = module.project.project_id + name = "sa-node" +} \ No newline at end of file diff --git a/blueprints/gke/autopilot/diagram.png b/blueprints/gke/autopilot/diagram.png new file mode 100644 index 00000000..121f115a Binary files /dev/null and b/blueprints/gke/autopilot/diagram.png differ diff --git a/blueprints/gke/autopilot/glbs.tf b/blueprints/gke/autopilot/glbs.tf new file mode 100644 index 00000000..39897c43 --- /dev/null +++ b/blueprints/gke/autopilot/glbs.tf @@ -0,0 +1,25 @@ +/** + * Copyright 2023 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +locals { + urls = { for k, v in module.addresses.global_addresses : k => "http://${v.address}" } +} + +module "addresses" { + source = "../../../modules/net-address" + project_id = module.project.project_id + global_addresses = ["grafana", "locust", "app"] +} diff --git a/blueprints/gke/autopilot/main.tf b/blueprints/gke/autopilot/main.tf new file mode 100644 index 00000000..9856dfaa --- /dev/null +++ b/blueprints/gke/autopilot/main.tf @@ -0,0 +1,65 @@ +/** + * Copyright 2023 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +module "project" { + source = "../../../modules/project" + billing_account = (var.project_create != null + ? var.project_create.billing_account_id + : null + ) + parent = (var.project_create != null + ? var.project_create.parent + : null + ) + project_create = var.project_create != null + name = var.project_id + services = [ + "artifactregistry.googleapis.com", + "cloudbuild.googleapis.com", + "container.googleapis.com" + ] + iam = { + "roles/monitoring.viewer" = [module.monitoring_sa.iam_email] + "roles/container.nodeServiceAccount" = [module.node_sa.iam_email] + "roles/container.admin" = [module.mgmt_server.service_account_iam_email] + "roles/storage.admin" = [module.mgmt_server.service_account_iam_email] + "roles/cloudbuild.builds.editor" = [module.mgmt_server.service_account_iam_email] + "roles/viewer" = [module.mgmt_server.service_account_iam_email] + } +} + +module "monitoring_sa" { + source = "../../../modules/iam-service-account" + project_id = module.project.project_id + name = "sa-monitoring" + iam = { + "roles/iam.workloadIdentityUser" = [ + "serviceAccount:${module.cluster.workload_identity_pool}[monitoring/frontend]", + "serviceAccount:${module.cluster.workload_identity_pool}[monitoring/custom-metrics-stackdriver-adapter]" + ] + } +} + +module "docker_artifact_registry" { + source = "../../../modules/artifact-registry" + project_id = module.project.project_id + location = var.region + format = "DOCKER" + id = "registry" + iam = { + "roles/artifactregistry.reader" = [module.node_sa.iam_email] + } +} diff --git a/blueprints/gke/autopilot/mgmt.tf b/blueprints/gke/autopilot/mgmt.tf new file mode 100644 index 00000000..81b7d1db --- /dev/null +++ b/blueprints/gke/autopilot/mgmt.tf @@ -0,0 +1,39 @@ +/** + * Copyright 2023 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +locals { + zone = "${var.region}-b" +} + +module "mgmt_server" { + source = "../../../modules/compute-vm" + project_id = module.project.project_id + zone = local.zone + name = "mgmt" + instance_type = var.mgmt_server_config.instance_type + network_interfaces = [{ + network = module.vpc.self_link + subnetwork = module.vpc.subnet_self_links["${var.region}/subnet-mgmt"] + nat = false + addresses = null + }] + service_account_create = true + boot_disk = { + image = var.mgmt_server_config.image + type = var.mgmt_server_config.disk_type + size = var.mgmt_server_config.disk_size + } +} \ No newline at end of file diff --git a/blueprints/gke/autopilot/outputs.tf b/blueprints/gke/autopilot/outputs.tf new file mode 100644 index 00000000..db526d41 --- /dev/null +++ b/blueprints/gke/autopilot/outputs.tf @@ -0,0 +1,20 @@ +/** + * Copyright 2023 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +output "urls" { + description = "Grafanam, locust and application URLs." + value = local.urls +} \ No newline at end of file diff --git a/blueprints/gke/autopilot/templates/gssh.sh.tpl b/blueprints/gke/autopilot/templates/gssh.sh.tpl new file mode 100644 index 00000000..b366231d --- /dev/null +++ b/blueprints/gke/autopilot/templates/gssh.sh.tpl @@ -0,0 +1,30 @@ +#!/bin/bash +# +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +host="$${@: -2: 1}" +cmd="$${@: -1: 1}" + +gcloud_args=" +--tunnel-through-iap +--zone=${zone} +--project=${project_id} +--quiet +--no-user-output-enabled +-- +-C +" + +exec gcloud compute ssh "$host" $gcloud_args "$cmd" \ No newline at end of file diff --git a/blueprints/gke/autopilot/variables.tf b/blueprints/gke/autopilot/variables.tf new file mode 100644 index 00000000..785cb48e --- /dev/null +++ b/blueprints/gke/autopilot/variables.tf @@ -0,0 +1,84 @@ +/** + * Copyright 2023 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +variable "cluster_network_config" { + description = "Cluster network configuration." + type = object({ + nodes_cidr_block = string + pods_cidr_block = string + services_cidr_block = string + master_authorized_cidr_blocks = map(string) + master_cidr_block = string + }) + default = { + nodes_cidr_block = "10.0.1.0/24" + pods_cidr_block = "172.16.0.0/20" + services_cidr_block = "192.168.0.0/24" + master_authorized_cidr_blocks = { + internal = "10.0.0.0/8" + } + master_cidr_block = "10.0.0.0/28" + } +} + +variable "mgmt_server_config" { + description = "Management server configuration." + type = object({ + disk_size = number + disk_type = string + image = string + instance_type = string + }) + default = { + disk_size = 50 + disk_type = "pd-ssd" + image = "projects/ubuntu-os-cloud/global/images/family/ubuntu-2204-lts" + instance_type = "n1-standard-2" + } +} + +variable "mgmt_subnet_cidr_block" { + description = "Management subnet IP CIDR range." + type = string + default = "10.0.2.0/24" +} + +variable "network" { + description = "VPC name." + type = string + default = "vpc" + nullable = false +} + +variable "project_create" { + description = "Parameters for the creation of the new project." + type = object({ + billing_account_id = string + parent = string + }) + default = null +} + +variable "project_id" { + description = "Project ID." + type = string +} + +variable "region" { + description = "Region." + type = string + default = "europe-west1" +} \ No newline at end of file diff --git a/blueprints/gke/autopilot/vpc.tf b/blueprints/gke/autopilot/vpc.tf new file mode 100644 index 00000000..c7cc9526 --- /dev/null +++ b/blueprints/gke/autopilot/vpc.tf @@ -0,0 +1,46 @@ +/** + * Copyright 2023 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +module "vpc" { + source = "../../../modules/net-vpc" + project_id = module.project.project_id + name = var.network + vpc_create = (var.project_create != null) + subnets = [ + { + ip_cidr_range = var.mgmt_subnet_cidr_block + name = "subnet-mgmt" + region = var.region + }, + { + ip_cidr_range = var.cluster_network_config.nodes_cidr_block + name = "subnet-cluster" + region = var.region + secondary_ip_ranges = { + pods = var.cluster_network_config.pods_cidr_block + services = var.cluster_network_config.services_cidr_block + } + } + ] +} + +module "nat" { + source = "../../../modules/net-cloudnat" + project_id = module.project.project_id + region = var.region + name = "nat" + router_network = module.vpc.name +} diff --git a/fast/stages/0-bootstrap/organization.tf b/fast/stages/0-bootstrap/organization.tf index d75a25f2..e94841f7 100644 --- a/fast/stages/0-bootstrap/organization.tf +++ b/fast/stages/0-bootstrap/organization.tf @@ -1,5 +1,5 @@ /** - * Copyright 2022 Google LLC + * Copyright 2023 Google LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ locals { # organization authoritative IAM bindings, in an easy to edit format before # they are combined with var.iam a bit further in locals _iam = { + "roles/billing.creator" = [] "roles/browser" = [ "domain:${var.organization.domain}" ] diff --git a/fast/stages/FAQ.md b/fast/stages/FAQ.md index 5245c8a9..6c0fceec 100644 --- a/fast/stages/FAQ.md +++ b/fast/stages/FAQ.md @@ -11,3 +11,15 @@ - **How can I fix permission issues when running Terraform apply?** - Make sure your account is part of the organization admin group defined in variables. - Make sure you have configured [application default credentials](https://cloud.google.com/docs/authentication/application-default-credentials), rerun `gcloud auth login --update-adc` to fix them. +- **My GCP Org is not empty, what is the best way to save existing work and still install Fast?** + - Background: Fast needs to be installed on the org level - because of many things that one can do only on that level, like the org policy role, secure tags, org policies + - Create a folder, you can call it "Legacy" + - Move all the existing projects and folders into this folder (you can do it by selecting all of them at once on the [resource management page](https://console.cloud.google.com/cloud-resource-manager) of the GCP console) + - Collect the existing defined org policies and save them: + ``` + gcloud organizations list + export FAST_ORG_ID=123456 + for c in $(gcloud org-policies list --organization $FAST_ORG_ID --format='get(constraint)'); do gcloud org-policies describe --organization $FAST_ORG_ID $c ; echo '---' ; done > previous_policies.yaml + ``` + - Analyze the policies together with [the ones Fast applies](1-resman/data/org-policies) and apply the ones that still make sense on that "Legacy" folder level + - Proceed with installing Fast normally diff --git a/modules/cloud-config-container/simple-nva/README.md b/modules/cloud-config-container/simple-nva/README.md index f70842e8..e0800d17 100644 --- a/modules/cloud-config-container/simple-nva/README.md +++ b/modules/cloud-config-container/simple-nva/README.md @@ -62,16 +62,88 @@ module "vm" { } # tftest modules=1 resources=1 ``` + +### Example with advanced routing capabilities + +Find below a sample terraform example for bootstrapping a simple NVA powered by [COS](https://cloud.google.com/container-optimized-os/docs) and running [FRRouting](https://frrouting.org/) container. +Please find below a sample frr.conf file based on the documentation available [here](https://docs.frrouting.org/en/latest/basic.html) for hosting a BGP service with ASN 65001 on FRR container establishing a BGP session with a remote neighbor with IP address 10.128.0.2 and ASN 65002. + +``` +# tftest-file id=frr_conf path=./frr.conf +# Example frr.conmf file + +log syslog informational +no ipv6 forwarding +router bgp 65001 + neighbor 10.128.0.2 remote-as 65002 +line vty +``` + +Following code assumes a file in the same folder named frr.conf exists. + +```hcl +locals { + network_interfaces = [ + { + addresses = null + name = "dev" + nat = false + network = "dev_vpc_self_link" + routes = ["10.128.0.0/9"] + subnetwork = "dev_vpc_nva_subnet_self_link" + enable_masquerading = true + non_masq_cidrs = ["10.0.0.0/8"] + }, + { + addresses = null + name = "prod" + nat = false + network = "prod_vpc_self_link" + routes = ["10.0.0.0/9"] + subnetwork = "prod_vpc_nva_subnet_self_link" + } + ] +} + +module "cos-nva" { + source = "./fabric/modules/cloud-config-container/simple-nva" + enable_health_checks = true + network_interfaces = local.network_interfaces + frr_config = { config_file = "./frr.conf", daemons_enabled = ["bgpd"] } + optional_run_cmds = ["ls -l"] +} + +module "vm" { + source = "./fabric/modules/compute-vm" + project_id = "my-project" + zone = "europe-west8-b" + name = "cos-nva" + network_interfaces = local.network_interfaces + metadata = { + user-data = module.cos-nva.cloud_config + google-logging-enabled = true + } + boot_disk = { + image = "projects/cos-cloud/global/images/family/cos-stable" + type = "pd-ssd" + size = 10 + } + tags = ["nva", "ssh"] +} +# tftest modules=1 resources=1 files=frr_conf +``` ## Variables | name | description | type | required | default | |---|---|:---:|:---:|:---:| -| [network_interfaces](variables.tf#L39) | Network interfaces configuration. | list(object({…})) | ✓ | | +| [network_interfaces](variables.tf#L75) | Network interfaces configuration. | list(object({…})) | ✓ | | | [cloud_config](variables.tf#L17) | Cloud config template path. If null default will be used. | string | | null | | [enable_health_checks](variables.tf#L23) | Configures routing to enable responses to health check probes. | bool | | false | | [files](variables.tf#L29) | Map of extra files to create on the instance, path as key. Owner and permissions will use defaults if null. | map(object({…})) | | {} | +| [frr_config](variables.tf#L39) | FRR configuration for container running on the NVA. | object({…}) | | null | +| [optional_run_cmds](variables.tf#L84) | Optional Cloud Init run commands to execute. | list(string) | | [] | ## Outputs diff --git a/modules/cloud-config-container/simple-nva/cloud-config.yaml b/modules/cloud-config-container/simple-nva/cloud-config.yaml index f1d71e82..f44cd08e 100644 --- a/modules/cloud-config-container/simple-nva/cloud-config.yaml +++ b/modules/cloud-config-container/simple-nva/cloud-config.yaml @@ -1,6 +1,6 @@ #cloud-config -# Copyright 2022 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -22,6 +22,7 @@ write_files: content: | ${indent(6, data.content)} %{ endfor } + - path: /etc/systemd/system/routing.service permissions: 0644 owner: root @@ -34,6 +35,7 @@ write_files: Wants=network-online.target [Service] ExecStart=/bin/sh -c "/var/run/nva/start-routing.sh" + - path: /var/run/nva/start-routing.sh permissions: 0744 owner: root @@ -43,6 +45,12 @@ write_files: %{ if enable_health_checks ~} /var/run/nva/policy_based_routing.sh ${interface.name} %{ endif ~} +%{ if interface.enable_masquerading ~} +%{ for cidr in interface.non_masq_cidrs ~} + iptables -t nat -A POSTROUTING -o ${interface.name} -d ${cidr} -j ACCEPT +%{ endfor ~} + iptables -t nat -A POSTROUTING -o ${interface.name} -j MASQUERADE +%{ endif ~} %{ for route in interface.routes ~} ip route add ${route} via `curl http://metadata.google.internal/computeMetadata/v1/instance/network-interfaces/${interface.number}/gateway -H "Metadata-Flavor:Google"` dev ${interface.name} %{ endfor ~} @@ -55,4 +63,6 @@ runcmd: - systemctl daemon-reload - systemctl enable routing - systemctl start routing - +%{ for cmd in optional_run_cmds ~} + - ${cmd} +%{ endfor ~} diff --git a/modules/cloud-config-container/simple-nva/files/frr/daemons b/modules/cloud-config-container/simple-nva/files/frr/daemons new file mode 100644 index 00000000..0a388df0 --- /dev/null +++ b/modules/cloud-config-container/simple-nva/files/frr/daemons @@ -0,0 +1,65 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +zebra=${zebra_enabled} +bgpd=${bgpd_enabled} +ospfd=${ospfd_enabled} +ospf6d=${ospf6d_enabled} +ripd=${ripd_enabled} +ripngd=${ripngd_enabled} +isisd=${isisd_enabled} +pimd=${pimd_enabled} +ldpd=${ldpd_enabled} +nhrpd=${nhrpd_enabled} +eigrpd=${eigrpd_enabled} +babeld=${babeld_enabled} +sharpd=${sharpd_enabled} +staticd=${staticd_enabled} +pbrd=${pbrd_enabled} +bfdd=${bfdd_enabled} +fabricd=${fabricd_enabled} + +# If this option is set the /etc/init.d/frr script automatically loads +# the config via "vtysh -b" when the servers are started. +# Check /etc/pam.d/frr if you intend to use "vtysh"! + +vtysh_enable=yes +zebra_options=" -A 127.0.0.1 -s 90000000" +bgpd_options=" -A 127.0.0.1" +ospfd_options=" --daemon -A 127.0.0.1" +ospf6d_options=" --daemon -A ::1" +ripd_options=" --daemon -A 127.0.0.1" +ripngd_options=" --daemon -A ::1" +isisd_options=" --daemon -A 127.0.0.1" +pimd_options=" --daemon -A 127.0.0.1" +ldpd_options=" --daemon -A 127.0.0.1" +nhrpd_options=" --daemon -A 127.0.0.1" +eigrpd_options=" --daemon -A 127.0.0.1" +babeld_options=" --daemon -A 127.0.0.1" +sharpd_options=" --daemon -A 127.0.0.1" +staticd_options=" --daemon -A 127.0.0.1" +pbrd_options=" --daemon -A 127.0.0.1" +bfdd_options=" --daemon -A 127.0.0.1" +fabricd_options=" --daemon -A 127.0.0.1" + +#MAX_FDS=1024 +# The list of daemons to watch is automatically generated by the init script. +#watchfrr_options="" + +# for debugging purposes, you can specify a "wrap" command to start instead +# of starting the daemon directly, e.g. to use valgrind on ospfd: +# ospfd_wrap="/usr/bin/valgrind" +# or you can use "all_wrap" for all daemons, e.g. to use perf record: +# all_wrap="/usr/bin/perf record --call-graph -" +# the normal daemon command is added to this at the end. diff --git a/modules/cloud-config-container/simple-nva/files/frr/frr.service b/modules/cloud-config-container/simple-nva/files/frr/frr.service new file mode 100644 index 00000000..a560602e --- /dev/null +++ b/modules/cloud-config-container/simple-nva/files/frr/frr.service @@ -0,0 +1,27 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +[Unit] +Description=Start FRR container +After=gcr-online.target docker.socket +Wants=gcr-online.target docker.socket docker-events-collector.service +[Service] +Environment="HOME=/home/frr" +ExecStart=/usr/bin/docker run --rm --name=frr \ +--privileged \ +--network host \ +-v /etc/frr:/etc/frr \ +frrouting/frr +ExecStop=/usr/bin/docker stop frr +ExecStopPost=/usr/bin/docker rm frr diff --git a/modules/cloud-config-container/simple-nva/files/ipprefix_by_netmask.sh b/modules/cloud-config-container/simple-nva/files/ipprefix_by_netmask.sh index a1c69822..16943825 100644 --- a/modules/cloud-config-container/simple-nva/files/ipprefix_by_netmask.sh +++ b/modules/cloud-config-container/simple-nva/files/ipprefix_by_netmask.sh @@ -1,6 +1,6 @@ #!/bin/bash -# Copyright 2022 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/modules/cloud-config-container/simple-nva/files/policy_based_routing.sh b/modules/cloud-config-container/simple-nva/files/policy_based_routing.sh index 951396d3..49f38288 100644 --- a/modules/cloud-config-container/simple-nva/files/policy_based_routing.sh +++ b/modules/cloud-config-container/simple-nva/files/policy_based_routing.sh @@ -1,6 +1,6 @@ #!/bin/bash -# Copyright 2022 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/modules/cloud-config-container/simple-nva/main.tf b/modules/cloud-config-container/simple-nva/main.tf index 4ff0afe2..110983bf 100644 --- a/modules/cloud-config-container/simple-nva/main.tf +++ b/modules/cloud-config-container/simple-nva/main.tf @@ -1,5 +1,5 @@ /** - * Copyright 2022 Google LLC + * Copyright 2023 Google LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,42 +15,109 @@ */ locals { - cloud_config = templatefile(local.template, merge({ - files = local.files - enable_health_checks = var.enable_health_checks - network_interfaces = local.network_interfaces - })) + _files = merge( + { + "/var/run/nva/ipprefix_by_netmask.sh" = { + content = file("${path.module}/files/ipprefix_by_netmask.sh") + owner = "root" + permissions = "0744" + } + "/var/run/nva/policy_based_routing.sh" = { + content = file("${path.module}/files/policy_based_routing.sh") + owner = "root" + permissions = "0744" + } + }, { + for path, attrs in var.files : path => { + content = attrs.content, + owner = attrs.owner, + permissions = attrs.permissions + } + }, + try(var.frr_config != null, false) ? { + "/etc/frr/daemons" = { + content = templatefile("${path.module}/files/frr/daemons", local._frr_daemons_enabled) + owner = "root" + permissions = "0744" + } + "/etc/frr/frr.conf" = { + content = file(var.frr_config.config_file) + owner = "root" + permissions = "0744" + } + "/etc/systemd/system/frr.service" = { + content = file("${path.module}/files/frr/frr.service") + owner = "root" + permissions = "0644" + } + "/var/lib/docker/daemon.json" = { + content = < { - content = attrs.content, - owner = attrs.owner, - permissions = attrs.permissions - } - }) + _frr_daemons = [ + "zebra", + "bgpd", + "ospfd", + "ospf6d", + "ripd", + "ripngd", + "isisd", + "pimd", + "ldpd", + "nhrpd", + "eigrpd", + "babeld", + "sharpd", + "staticd", + "pbrd", + "bfdd", + "fabricd" + ] - network_interfaces = [ + _frr_daemons_enabled = try( + { + for daemon in local._frr_daemons : + "${daemon}_enabled" => contains(var.frr_config.daemons_enabled, daemon) ? "yes" : "no" + }, {}) + + _network_interfaces = [ for index, interface in var.network_interfaces : { - name = "eth${index}" - number = index - routes = interface.routes + name = "eth${index}" + number = index + routes = interface.routes + enable_masquerading = interface.enable_masquerading != null ? interface.enable_masquerading : false + non_masq_cidrs = interface.non_masq_cidrs != null ? interface.non_masq_cidrs : [] } ] - template = ( + _optional_run_cmds = ( + try(var.frr_config != null, false) + ? concat(["systemctl start frr"], var.optional_run_cmds) + : var.optional_run_cmds + ) + + _template = ( var.cloud_config == null ? "${path.module}/cloud-config.yaml" : var.cloud_config ) + + cloud_config = templatefile(local._template, { + enable_health_checks = var.enable_health_checks + files = local._files + network_interfaces = local._network_interfaces + optional_run_cmds = local._optional_run_cmds + }) } diff --git a/modules/cloud-config-container/simple-nva/outputs.tf b/modules/cloud-config-container/simple-nva/outputs.tf index 7d8d4165..54942c1a 100644 --- a/modules/cloud-config-container/simple-nva/outputs.tf +++ b/modules/cloud-config-container/simple-nva/outputs.tf @@ -1,5 +1,5 @@ /** - * Copyright 2022 Google LLC + * Copyright 2023 Google LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/modules/cloud-config-container/simple-nva/variables.tf b/modules/cloud-config-container/simple-nva/variables.tf index 39d96d91..84f62f69 100644 --- a/modules/cloud-config-container/simple-nva/variables.tf +++ b/modules/cloud-config-container/simple-nva/variables.tf @@ -1,5 +1,5 @@ /** - * Copyright 2022 Google LLC + * Copyright 2023 Google LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -36,9 +36,53 @@ variable "files" { default = {} } +variable "frr_config" { + description = "FRR configuration for container running on the NVA." + type = object({ + daemons_enabled = optional(list(string)) + config_file = string + }) + default = null + validation { + condition = try(alltrue([ + for daemon in var.frr_config.daemons_enabled : contains([ + "zebra", + "bgpd", + "ospfd", + "ospf6d", + "ripd", + "ripngd", + "isisd", + "pimd", + "ldpd", + "nhrpd", + "eigrpd", + "babeld", + "sharpd", + "staticd", + "pbrd", + "bfdd", + "fabricd" + ], daemon) + ]), true) + error_message = <