Merge branch 'master' into ncc

This commit is contained in:
Ludovico Magnocavallo 2023-03-08 20:33:53 +01:00 committed by GitHub
commit 5489162b75
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
61 changed files with 2606 additions and 62 deletions

5
.gitignore vendored
View File

@ -49,3 +49,8 @@ blueprints/apigee/hybrid-gke/apiproxy.zip
blueprints/apigee/hybrid-gke/deploy-apiproxy.sh
blueprints/apigee/hybrid-gke/ansible/gssh.sh
blueprints/apigee/hybrid-gke/ansible/vars/vars.yaml
blueprints/gke/autopilot/ansible/gssh.sh
blueprints/gke/autopilot/ansible/vars/vars.yaml
blueprints/gke/autopilot/bundle/monitoring/kustomization.yaml
blueprints/gke/autopilot/bundle/locust/kustomization.yaml
blueprints/gke/autopilot/bundle.tar.gz

View File

@ -8,7 +8,7 @@ Currently available blueprints:
- **cloud operations** - [Active Directory Federation Services](./cloud-operations/adfs), [Cloud Asset Inventory feeds for resource change tracking and remediation](./cloud-operations/asset-inventory-feed-remediation), [Fine-grained Cloud DNS IAM via Service Directory](./cloud-operations/dns-fine-grained-iam), [Cloud DNS & Shared VPC design](./cloud-operations/dns-shared-vpc), [Delegated Role Grants](./cloud-operations/iam-delegated-role-grants), [Networking Dashboard](./cloud-operations/network-dashboard), [Managing on-prem service account keys by uploading public keys](./cloud-operations/onprem-sa-key-management), [Compute Image builder with Hashicorp Packer](./cloud-operations/packer-image-builder), [Packer example](./cloud-operations/packer-image-builder/packer), [Compute Engine quota monitoring](./cloud-operations/quota-monitoring), [Scheduled Cloud Asset Inventory Export to Bigquery](./cloud-operations/scheduled-asset-inventory-export-bq), [Configuring workload identity federation with Terraform Cloud/Enterprise workflows](./cloud-operations/terraform-cloud-dynamic-credentials), [TCP healthcheck and restart for unmanaged GCE instances](./cloud-operations/unmanaged-instances-healthcheck), [Migrate for Compute Engine (v5) blueprints](./cloud-operations/vm-migration), [Configuring workload identity federation to access Google Cloud resources from apps running on Azure](./cloud-operations/workload-identity-federation)
- **data solutions** - [GCE and GCS CMEK via centralized Cloud KMS](./data-solutions/cmek-via-centralized-kms), [Cloud Composer version 2 private instance, supporting Shared VPC and external CMEK key](./data-solutions/composer-2), [Cloud SQL instance with multi-region read replicas](./data-solutions/cloudsql-multiregion), [Data Platform](./data-solutions/data-platform-foundations), [Spinning up a foundation data pipeline on Google Cloud using Cloud Storage, Dataflow and BigQuery](./data-solutions/gcs-to-bq-with-least-privileges), [#SQL Server Always On Groups blueprint](./data-solutions/sqlserver-alwayson), [Data Playground](./data-solutions/data-playground), [MLOps with Vertex AI](./data-solutions/vertex-mlops), [Shielded Folder](./data-solutions/shielded-folder), [BigQuery ML and Vertex AI Pipeline](./data-solutions/bq-ml)
- **factories** - [The why and the how of Resource Factories](./factories), [Google Cloud Identity Group Factory](./factories/cloud-identity-group-factory), [Google Cloud BQ Factory](./factories/bigquery-factory), [Google Cloud VPC Firewall Factory](./factories/net-vpc-firewall-yaml), [Minimal Project Factory](./factories/project-factory)
- **GKE** - [Binary Authorization Pipeline Blueprint](./gke/binauthz), [Storage API](./gke/binauthz/image), [Multi-cluster mesh on GKE (fleet API)](./gke/multi-cluster-mesh-gke-fleet-api), [GKE Multitenant Blueprint](./gke/multitenant-fleet), [Shared VPC with GKE support](./networking/shared-vpc-gke/)
- **GKE** - [Binary Authorization Pipeline Blueprint](./gke/binauthz), [Storage API](./gke/binauthz/image), [Multi-cluster mesh on GKE (fleet API)](./gke/multi-cluster-mesh-gke-fleet-api), [GKE Multitenant Blueprint](./gke/multitenant-fleet), [Shared VPC with GKE support](./networking/shared-vpc-gke/), [GKE Autopilot](./gke/autopilot)
- **networking** - [Calling a private Cloud Function from On-premises](./networking/private-cloud-function-from-onprem), [Decentralized firewall management](./networking/decentralized-firewall), [Decentralized firewall validator](./networking/decentralized-firewall/validator), [Network filtering with Squid](./networking/filtering-proxy), [GLB and multi-regional daisy-chaining through hybrid NEGs](./networking/glb-hybrid-neg-internal), [Hybrid connectivity to on-premise services through PSC](./networking/psc-hybrid), [HTTP Load Balancer with Cloud Armor](./networking/glb-and-armor), [Hub and Spoke via VPN](./networking/hub-and-spoke-vpn), [Hub and Spoke via VPC Peering](./networking/hub-and-spoke-peering), [Internal Load Balancer as Next Hop](./networking/ilb-next-hop), [Network filtering with Squid with isolated VPCs using Private Service Connect](./networking/filtering-proxy-psc), On-prem DNS and Google Private Access, [PSC Producer](./networking/psc-hybrid/psc-producer), [PSC Consumer](./networking/psc-hybrid/psc-consumer), [Shared VPC with optional GKE cluster](./networking/shared-vpc-gke)
- **serverless** - [Creating multi-region deployments for API Gateway](./serverless/api-gateway), [Cloud Run series](./serverless/cloud-run-explore)
- **third party solutions** - [OpenShift on GCP user-provisioned infrastructure](./third-party-solutions/openshift), [Wordpress deployment on Cloud Run](./third-party-solutions/wordpress/cloudrun)

View File

@ -98,5 +98,5 @@ module "test" {
prefix = "prefix"
}
# tftest modules=9 resources=46
# tftest modules=9 resources=47
```

View File

@ -73,15 +73,24 @@
"metadata": {},
"outputs": [],
"source": [
"# Set your variables\n",
"PREFIX = 'your-prefix'\n",
"PROJECT_ID = 'your-project-id'"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"DATASET = \"{}_data\".format(PREFIX.replace(\"-\",\"_\")) \n",
"EXPERIMENT_NAME = 'bqml-experiment'\n",
"ENDPOINT_DISPLAY_NAME = 'bqml-endpoint'\n",
"DATASET = \"{}_data\".format(PREFIX.replace(\"-\",\"_\")) \n",
"LOCATION = 'US'\n",
"MODEL_NAME = 'bqml-model'\n",
"PIPELINE_NAME = 'bqml-vertex-pipeline'\n",
"PIPELINE_ROOT = f\"gs://{PREFIX}-data\"\n",
"PREFIX = 'your-prefix'\n",
"PROJECT_ID = 'your-project-id'\n",
"REGION = 'us-central1'\n",
"SERVICE_ACCOUNT = f\"vertex-sa@{PROJECT_ID}.iam.gserviceaccount.com\""
]
@ -227,7 +236,6 @@
" project=project_id,\n",
" location=location,\n",
" query=features_query.format(dataset=dataset, project_id=project_id),\n",
" #job_configuration_query = {\"writeDisposition\": \"WRITE_TRUNCATE\"} #, \"destinationTable\":{\"projectId\":project_id,\"datasetId\":dataset,\"tableId\":\"ecommerce_abt_table\"}} #{\"destinationTable\":{\"projectId\":\"project_id\",\"datasetId\":dataset,\"tableId\":\"ecommerce_abt_table\"}}, #\"writeDisposition\": \"WRITE_TRUNCATE\", \n",
"\n",
" ).after(create_dataset)\n",
"\n",
@ -408,9 +416,6 @@
"source": [
"# batch prediction on BigQuery\n",
"\n",
"with open(\"sql/explain_predict.sql\") as file:\n",
" explain_predict_query = file.read()\n",
"\n",
"client = bigquery_client = bigquery.Client(location=LOCATION, project=PROJECT_ID)\n",
"batch_predictions = bigquery_client.query(\n",
" explain_predict_query.format(\n",

View File

@ -17,30 +17,35 @@ This sample creates several distinct groups of resources:
- One BigQuery dataset
## Virtual Private Cloud (VPC) design
As is often the case in real-world configurations, this blueprint accepts as input an existing Shared-VPC via the network_config variable. Make sure that 'container.googleapis.com', 'notebooks.googleapis.com' and 'servicenetworking.googleapis.com' are enabled in the VPC host project.
If the network_config variable is not provided, one VPC will be created in each project that supports network resources (load, transformation and orchestration).
## Deploy your enviroment
We assume the identiy running the following steps has the following role:
- resourcemanager.projectCreator in case a new project will be created.
- owner on the project in case you use an existing project.
Run Terraform init:
```
$ terraform init
terraform init
```
Configure the Terraform variable in your terraform.tfvars file. You need to spefify at least the following variables:
```
prefix = "prefix"
project_id = "data-001"
```
You can run now:
```
$ terraform apply
terraform apply
```
You can now connect to the Vertex AI notbook to perform your data analysy.
@ -81,5 +86,5 @@ module "test" {
parent = "folders/467898377"
}
}
# tftest modules=8 resources=39
# tftest modules=8 resources=40
```

View File

@ -1,20 +1,23 @@
# MLOps with Vertex AI
## Introduction
This example implements the infrastructure required to deploy an end-to-end [MLOps process](https://services.google.com/fh/files/misc/practitioners_guide_to_mlops_whitepaper.pdf) using [Vertex AI](https://cloud.google.com/vertex-ai) platform.
## GCP resources
This example implements the infrastructure required to deploy an end-to-end [MLOps process](https://services.google.com/fh/files/misc/practitioners_guide_to_mlops_whitepaper.pdf) using [Vertex AI](https://cloud.google.com/vertex-ai) platform.
## GCP resources
The blueprint will deploy all the required resources to have a fully functional MLOPs environment containing:
- Vertex Workbench (for the experimentation environment)
- GCP Project (optional) to host all the resources
- Isolated VPC network and a subnet to be used by Vertex and Dataflow. Alternatively, an external Shared VPC can be configured using the `network_config`variable.
- Isolated VPC network and a subnet to be used by Vertex and Dataflow. Alternatively, an external Shared VPC can be configured using the `network_config`variable.
- Firewall rule to allow the internal subnet communication required by Dataflow
- Cloud NAT required to reach the internet from the different computing resources (Vertex and Dataflow)
- GCS buckets to host Vertex AI and Cloud Build Artifacts. By default the buckets will be regional and should match the Vertex AI region for the different resources (i.e. Vertex Managed Dataset) and processes (i.e. Vertex trainining)
- BigQuery Dataset where the training data will be stored. This is optional, since the training data could be already hosted in an existing BigQuery dataset.
- Artifact Registry Docker repository to host the custom images.
- Service account (`mlops-[env]@`) with the minimum permissions required by Vertex AI and Dataflow (if this service is used inside of the Vertex AI Pipeline).
- Service account (`github@`) to be used by Workload Identity Federation, to federate Github identity (Optional).
- Service account (`github@`) to be used by Workload Identity Federation, to federate Github identity (Optional).
- Secret to store the Github SSH key to get access the CICD code repo.
![MLOps project description](./images/mlops_projects.png "MLOps project description")
@ -28,13 +31,14 @@ Assign roles relying on User groups is a way to decouple the final set of permis
We use the following groups to control access to resources:
- *Data Scientits* (gcp-ml-ds@<company.org>). They manage notebooks and create ML pipelines.
- *ML Engineers* (gcp-ml-eng@<company.org>). They manage the different Vertex resources.
- *ML Viewer* (gcp-ml-eng@<company.org>). Group with wiewer permission for the different resources.
- *ML Engineers* (gcp-ml-eng@<company.org>). They manage the different Vertex resources.
- *ML Viewer* (gcp-ml-eng@<company.org>). Group with wiewer permission for the different resources.
Please note that these groups are not suitable for production grade environments. Roles can be customized in the `main.tf`file.
## Instructions
### Deploy the experimentation environment
## Instructions
### Deploy the experimentation environment
- Create a `terraform.tfvars` file and specify the variables to match your desired configuration. You can use the provided `terraform.tfvars.sample` as reference.
- Run `terraform init` and `terraform apply`
@ -76,6 +80,7 @@ This blueprint can be used as a building block for setting up an end2end ML Ops
<!-- END TFDOC -->
## TODO
- Add support for User Managed Notebooks, SA permission option and non default SA for Single User mode.
- Improve default naming for local VPC and Cloud NAT
@ -105,5 +110,5 @@ module "test" {
parent = "folders/111111111111"
}
}
# tftest modules=12 resources=56
# tftest modules=12 resources=57
```

View File

@ -21,6 +21,7 @@ They are meant to be used as minimal but complete starting points to create actu
### Multitenant GKE fleet
<a href="./multitenant-fleet/" title="GKE multitenant fleet"><img src="./multitenant-fleet/diagram.png" align="left" width="280px"></a> This [blueprint](./multitenant-fleet/) allows simple centralized management of similar sets of GKE clusters and their nodepools in a single project, and optional fleet management via GKE Hub templated configurations.
<br clear="left">
### Shared VPC with GKE and per-subnet support
@ -30,3 +31,9 @@ They are meant to be used as minimal but complete starting points to create actu
It is meant to be used as a starting point for most Shared VPC configurations, and to be integrated to the above blueprints where Shared VPC is needed in more complex network topologies.
<br clear="left">
### Autopilot
<a href="./autopilot" title="GKE autopilot"><img src="../networking/shared-vpc-gke/diagram.png" align="left" width="280px"></a> This [blueprint](./autopilot) creates an Autopilot cluster with Google-managed Prometheus enabled and installs an application that scales as the traffic that is hitting the load balancer exposing it grows.
<br clear="left">

View File

@ -0,0 +1,94 @@
# Load testing an application running on an autopilot cluster
This blueprint creates an Autopilot cluster with Google-managed Prometheus enabled and install an application that scales as the traffic that is hitting the load balancer exposing it grows. It also installs the tooling required to distributed load test with [locust](https://locust.io) on that application and the monitoring tooling required to observe how things evolve in the cluster during the load test. Ansible is used to install the application and all the tooling on a management VM.
The diagram below depicts the architecture.
![Diagram](./diagram.png)
## Running the blueprint
1. Clone this repository or [open it in cloud shell](https://ssh.cloud.google.com/cloudshell/editor?cloudshell_git_repo=https%3A%2F%2Fgithub.com%2Fterraform-google-modules%2Fcloud-foundation-fabric&cloudshell_print=cloud-shell-readme.txt&cloudshell_working_dir=blueprints%2Fgke%2Fautopilot), then go through the following steps to create resources:
2. Initialize the terraform configuration
```
terraform init
```
3. Apply the terraform configuration
```
terraform apply -var project_id=my-project-id
```
4. Copy the IP addresses for grafana, the locust master.
4. Change to the ansible directory and run the following command
```
ansible-playbook -v playbook.yaml
```
5. Open to the locust master web interface url in your browser and start the load test
6. SSH to the management VM
```
gcloud compute ssh mgmt --project my-project
```
7. Run the following command to check that the application pods are running on different nodes than the load testing and monitoring tooling.
```
kubectl get pods -A -o wide
```
8. Run the following command to see how the application pods scale
```
kubectl get hpa -n sample -w
```
9. Run the following command to see how the cluster nodes scale
```
kubectl get nodes -n
```
Alternatively you can also check all the above using the dashboards available in grafana.
<!-- BEGIN TFDOC -->
## Variables
| name | description | type | required | default |
|---|---|:---:|:---:|:---:|
| [project_id](variables.tf#L75) | Project ID. | <code>string</code> | ✓ | |
| [cluster_network_config](variables.tf#L17) | Cluster network configuration. | <code title="object&#40;&#123;&#10; nodes_cidr_block &#61; string&#10; pods_cidr_block &#61; string&#10; services_cidr_block &#61; string&#10; master_authorized_cidr_blocks &#61; map&#40;string&#41;&#10; master_cidr_block &#61; string&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | | <code title="&#123;&#10; nodes_cidr_block &#61; &#34;10.0.1.0&#47;24&#34;&#10; pods_cidr_block &#61; &#34;172.16.0.0&#47;20&#34;&#10; services_cidr_block &#61; &#34;192.168.0.0&#47;24&#34;&#10; master_authorized_cidr_blocks &#61; &#123;&#10; internal &#61; &#34;10.0.0.0&#47;8&#34;&#10; &#125;&#10; master_cidr_block &#61; &#34;10.0.0.0&#47;28&#34;&#10;&#125;">&#123;&#8230;&#125;</code> |
| [mgmt_server_config](variables.tf#L37) | Management server configuration. | <code title="object&#40;&#123;&#10; disk_size &#61; number&#10; disk_type &#61; string&#10; image &#61; string&#10; instance_type &#61; string&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | | <code title="&#123;&#10; disk_size &#61; 50&#10; disk_type &#61; &#34;pd-ssd&#34;&#10; image &#61; &#34;projects&#47;ubuntu-os-cloud&#47;global&#47;images&#47;family&#47;ubuntu-2204-lts&#34;&#10; instance_type &#61; &#34;n1-standard-2&#34;&#10;&#125;">&#123;&#8230;&#125;</code> |
| [mgmt_subnet_cidr_block](variables.tf#L53) | Management subnet IP CIDR range. | <code>string</code> | | <code>&#34;10.0.2.0&#47;24&#34;</code> |
| [network](variables.tf#L59) | VPC name. | <code>string</code> | | <code>&#34;vpc&#34;</code> |
| [project_create](variables.tf#L66) | Parameters for the creation of the new project. | <code title="object&#40;&#123;&#10; billing_account_id &#61; string&#10; parent &#61; string&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | | <code>null</code> |
| [region](variables.tf#L80) | Region. | <code>string</code> | | <code>&#34;europe-west1&#34;</code> |
## Outputs
| name | description | sensitive |
|---|---|:---:|
| [urls](outputs.tf#L17) | Grafanam, locust and application URLs. | |
<!-- END TFDOC -->
## Test
```hcl
module "test" {
source = "./fabric/blueprints/gke/autopilot"
project_create = {
billing_account_id = "12345-12345-12345"
parent = "folders/123456789"
}
project_id = "my-project"
}
# tftest modules=10 resources=30
```

View File

@ -0,0 +1,37 @@
/**
* Copyright 2023 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
# tfdoc:file:description Ansible generated files.
resource "local_file" "vars_file" {
content = yamlencode({
cluster = module.cluster.name
region = var.region
project_id = module.project.project_id
app_url = local.urls["app"]
})
filename = "${path.module}/ansible/vars/vars.yaml"
file_permission = "0666"
}
resource "local_file" "gssh_file" {
content = templatefile("${path.module}/templates/gssh.sh.tpl", {
project_id = module.project.project_id
zone = local.zone
})
filename = "${path.module}/ansible/gssh.sh"
file_permission = "0777"
}

View File

@ -0,0 +1,8 @@
[defaults]
inventory = inventory/hosts.ini
timeout = 900
[ssh_connection]
pipelining = True
ssh_executable = ./gssh.sh
transfer_method = piped

View File

@ -0,0 +1 @@
mgmt

View File

@ -0,0 +1,128 @@
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
- hosts: mgmt
gather_facts: "no"
vars_files:
- vars/vars.yaml
environment:
USE_GKE_GCLOUD_AUTH_PLUGIN: True
tasks:
- name: Download the Google Cloud SDK package repository signing key
get_url:
url: https://packages.cloud.google.com/apt/doc/apt-key.gpg
dest: /usr/share/keyrings/cloud.google.gpg
force: yes
become: true
become_user: root
- name: Add Google Cloud SDK package repository source
apt_repository:
filename: google-cloud-sdk
repo: "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main"
state: present
update_cache: yes
become: true
become_user: root
- name: Install dependencies
apt:
pkg:
- google-cloud-sdk-gke-gcloud-auth-plugin
- kubectl
state: present
become: true
become_user: root
- name: Enable bash completion for kubectl
shell:
cmd: kubectl completion bash > /etc/bash_completion.d/kubectl
creates: /etc/bash_completion.d/kubectl
become: true
become_user: root
- name: Get cluster credentials
shell: >
gcloud container clusters get-credentials {{ cluster }}
--region {{ region }}
--project {{ project_id }}
--internal-ip
- name: Render templates
template:
src: ../bundle/{{ item }}/kustomization.yaml.j2
dest: ../bundle/{{ item }}/kustomization.yaml
delegate_to: localhost
with_items:
- monitoring
- locust
- name: Remove bundle locally
local_action:
module: file
path: ../bundle.tar.gz
state: absent
- name: Archive bundle locally
archive:
path: ../bundle
dest: ../bundle.tar.gz
delegate_to: localhost
- name: Unarchive bundle remotely
unarchive:
src: ../bundle.tar.gz
dest: ~/
- name: Build locust image
shell: >
gcloud builds submit --tag {{ region }}-docker.pkg.dev/{{ project_id }}/registry/load-test:latest \
--project {{ project_id }} .
args:
chdir: ~/bundle/locust/image
- name: Enable scraping of kubelet and cAdvisor metrics
shell: >
kubectl patch operatorconfig config
-n gmp-public
--type=merge
-p '{"collection":{"kubeletScraping":{"interval": "30s"}}}'
- name: Deploy monitoring tooling
shell: >
kubectl apply -k .
args:
chdir: ~/bundle/monitoring
- name: Deploy app
shell: >
kubectl apply -k .
args:
chdir: ~/bundle/app
- name: Get forwarding rule name
shell: >
while true; do
forwarding_rule_name=$(kubectl get ingress -n sample -o=jsonpath='{.items[0].metadata.annotations.ingress\.kubernetes\.io\/forwarding-rule}')
if [ -n "$forwarding_rule_name" ]; then
echo $forwarding_rule_name
break
fi
sleep 10
done
register: forwarding_rule_name_output
- name: Set fact forwarding_url_name
set_fact:
forwarding_rule_name: "{{ forwarding_rule_name_output.stdout }}"
- name: Render template (HPA)
template:
src: ../bundle/app/hpa.yaml.j2
dest: ~/bundle/app/hpa.yaml
- name: Apply HPA manifest
shell: >
kubectl apply -f hpa.yaml
args:
chdir: ~/bundle/app
- name: Deploy locust
shell: >
kubectl apply -k .
args:
chdir: ~/bundle/locust

View File

@ -0,0 +1,37 @@
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: nginx
namespace: sample
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: nginx
minReplicas: 1
maxReplicas: 50
metrics:
- type: External
external:
metric:
name: loadbalancing.googleapis.com|https|request_count
selector:
matchLabels:
resource.labels.forwarding_rule_name: {{ forwarding_rule_name }}
target:
type: AverageValue
averageValue: 5

View File

@ -0,0 +1,42 @@
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
apiVersion: cloud.google.com/v1
kind: BackendConfig
metadata:
name: backendconfig
namespace: sample
spec:
healthCheck:
requestPath: /
port: 80
type: HTTP
logging:
enable: true
sampleRate: 0.5
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
annotations:
kubernetes.io/ingress.global-static-ip-name: "app"
kubernetes.io/ingress.allow-http: "true"
name: ingress
namespace: sample
spec:
defaultBackend:
service:
name: nginx
port:
name: web

View File

@ -0,0 +1,18 @@
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https:#www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
resources:
- namespace.yaml
- nginx.yaml
- ingress.yaml

View File

@ -0,0 +1,18 @@
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
apiVersion: v1
kind: Namespace
metadata:
name: sample

View File

@ -0,0 +1,127 @@
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
apiVersion: v1
kind: ConfigMap
metadata:
name: nginx-config
namespace: sample
data:
nginx.conf: |
events {}
http {
server {
listen 80;
root /var/www/html;
location / {
return 200 'Hello World!';
}
}
server {
listen 8080;
location /stub_status {
stub_status on;
}
}
}
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: nginx
namespace: sample
spec:
replicas: 1
selector:
matchLabels:
app: nginx
template:
metadata:
labels:
app: nginx
spec:
containers:
- name: nginx
image: nginx:latest
ports:
- containerPort: 80
name: web
- containerPort: 8080
name: status
volumeMounts:
- name: nginx-config
mountPath: /etc/nginx/nginx.conf
subPath: nginx.conf
readinessProbe:
httpGet:
path: /stub_status
port: 8080
initialDelaySeconds: 2
periodSeconds: 2
failureThreshold: 1
requests:
cpu: 10m
memory: 10Mi
limits:
memory: 10Mi
- name: nginx-prometheus-exporter
image: nginx/nginx-prometheus-exporter:0.10.0
ports:
- containerPort: 9113
name: metrics
env:
- name: SCRAPE_URI
value: http://localhost:8080/stub_status
requests:
cpu: 5m
memory: 5Mi
limits:
memory: 5Mi
volumes:
- name: nginx-config
configMap:
name: nginx-config
---
apiVersion: v1
kind: Service
metadata:
name: nginx
namespace: sample
annotations:
annotations:
cloud.google.com/neg: '{"ingress": true}'
cloud.google.com/app-protocols: '{"web":"HTTP"}'
cloud.google.com/backend-config: '{"default": "backendconfig"}'
labels:
app: nginx
spec:
ports:
- name: web
port: 80
protocol: TCP
selector:
app: nginx
---
apiVersion: monitoring.googleapis.com/v1
kind: ClusterPodMonitoring
metadata:
name: nginx
namespace: sample
spec:
selector:
matchLabels:
app: nginx
endpoints:
- port: metrics
interval: 30s

View File

@ -0,0 +1,21 @@
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
FROM locustio/locust:latest
ADD locust-files /home/locust/locust-files
ADD run.sh /home/locust/run.sh
ENTRYPOINT ["/home/locust/run.sh"]

View File

@ -0,0 +1,65 @@
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import os
from locust import HttpUser, LoadTestShape, task, between
class TestUser(HttpUser):
host = os.getenv("URL", "http://nginx.sample.svc.cluster.local")
wait_time = between(int(os.getenv('MIN_WAIT_TIME'), 1),
int(os.getenv('MAX_WAIT_TIME'), 2))
@task
def home(self):
with self.client.get("/", catch_response=True) as response:
if response.status_code == 200:
response.success()
else:
logging.info('Response code is ' + str(response.status_code))
class CustomLoadShape(LoadTestShape):
stages = []
num_stages = int(os.getenv('NUM_STAGES', 20))
stage_duration = int(os.getenv('STAGE_DURATION', 60))
spawn_rate = int(os.getenv('SPAWN_RATE', 1))
new_users_per_stage = int(os.getenv('NEW_USERS_PER_STAGE', 10))
for i in range(1, num_stages + 1):
stages.append({
'duration': stage_duration * i,
'users': new_users_per_stage * i,
'spawn_rate': spawn_rate
})
for i in range(1, num_stages):
stages.append({
'duration': stage_duration * (num_stages + i),
'users': new_users_per_stage * (num_stages - i),
'spawn_rate': spawn_rate
})
def tick(self):
run_time = self.get_run_time()
for stage in self.stages:
if run_time < stage['duration']:
tick_data = (stage['users'], stage['spawn_rate'])
return tick_data
return None

View File

@ -0,0 +1,26 @@
#!/bin/bash
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
LOCUS_OPTS="-f /home/locust/locust-files"
LOCUST_MODE=${LOCUST_MODE:-standalone}
if [[ "$LOCUST_MODE" = "master" ]]; then
LOCUS_OPTS="$LOCUS_OPTS --master"
elif [[ "$LOCUST_MODE" = "worker" ]]; then
LOCUS_OPTS="$LOCUS_OPTS --worker --master-host=$LOCUST_MASTER"
fi
locust $LOCUS_OPTS

View File

@ -0,0 +1,42 @@
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
apiVersion: cloud.google.com/v1
kind: BackendConfig
metadata:
name: backendconfig
namespace: locust
spec:
healthCheck:
requestPath: /
port: 8089
type: HTTP
logging:
enable: true
sampleRate: 0.5
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: ingress
namespace: locust
annotations:
kubernetes.io/ingress.global-static-ip-name: "locust"
kubernetes.io/ingress.allow-http: "true"
spec:
defaultBackend:
service:
name: locust-master-web
port:
name: loc-master-web

View File

@ -0,0 +1,66 @@
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
resources:
- namespace.yaml
- master.yaml
- workers.yaml
- ingress.yaml
patches:
- target:
group: apps
version: v1
kind: Deployment
name: locust-master
namespace: locust
patch: |-
apiVersion: apps/v1
kind: Deployment
metadata:
name: locust-master
namespace: locust
spec:
template:
spec:
containers:
- name: locust-master
image: load-test-image
env:
- name: URL
value: {{ app_url }}
- target:
group: apps
version: v1
kind: Deployment
name: locust-worker
namespace: locust
patch: |-
apiVersion: apps/v1
kind: Deployment
metadata:
name: locust-worker
namespace: locust
spec:
template:
spec:
containers:
- name: locust-master
image: load-test-image
env:
- name: URL
value: {{ app_url }}
images:
- name: load-test-image
newName: {{ region }}-docker.pkg.dev/{{ project_id}}/registry/load-test
newTag: latest

View File

@ -0,0 +1,128 @@
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
apiVersion: "apps/v1"
kind: "Deployment"
metadata:
name: locust-master
namespace: locust
labels:
name: locust-master
spec:
replicas: 1
selector:
matchLabels:
app: locust-master
template:
metadata:
labels:
app: locust-master
spec:
tolerations:
- key: group
operator: Equal
value: "locust"
effect: NoSchedule
nodeSelector:
group: "locust"
containers:
- name: locust-master
image: load-test-image
env:
- name: LOCUST_MODE
value: master
ports:
- name: loc-master-web
containerPort: 8089
protocol: TCP
- name: loc-master-p1
containerPort: 5557
protocol: TCP
- name: loc-master-p2
containerPort: 5558
protocol: TCP
resources:
requests:
cpu: 50m
memory: 50Mi
limits:
memory: 50Mi
- name: locust-prometheus-exporter
image: containersol/locust_exporter
ports:
- name: metrics
containerPort: 9646
resources:
requests:
cpu: 5m
memory: 5Mi
limits:
memory: 5Mi
---
kind: Service
apiVersion: v1
metadata:
name: locust-master
namespace: locust
labels:
app: locust-master
spec:
ports:
- port: 5557
targetPort: loc-master-p1
protocol: TCP
name: loc-master-p1
- port: 5558
targetPort: loc-master-p2
protocol: TCP
name: loc-master-p2
- port: 9646
targetPort: metrics
protocol: TCP
name: metrics
selector:
app: locust-master
---
kind: Service
apiVersion: v1
metadata:
name: locust-master-web
namespace: locust
annotations:
cloud.google.com/neg: '{"ingress": true}'
cloud.google.com/app-protocols: '{"loc-master-web":"HTTP"}'
cloud.google.com/backend-config: '{"default": "backendconfig"}'
labels:
app: locust-master
spec:
ports:
- port: 8089
targetPort: loc-master-web
protocol: TCP
name: loc-master-web
selector:
app: locust-master
---
apiVersion: monitoring.googleapis.com/v1
kind: ClusterPodMonitoring
metadata:
name: locust-master
namespace: locust
spec:
selector:
matchLabels:
app: locust-master
endpoints:
- port: metrics
interval: 30s

View File

@ -0,0 +1,18 @@
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
apiVersion: v1
kind: Namespace
metadata:
name: locust

View File

@ -0,0 +1,51 @@
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
apiVersion: "apps/v1"
kind: "Deployment"
metadata:
name: locust-worker
namespace: locust
labels:
name: locust-worker
spec:
replicas: 5
selector:
matchLabels:
app: locust-worker
template:
metadata:
labels:
app: locust-worker
spec:
tolerations:
- key: group
operator: Equal
value: "locust"
effect: NoSchedule
nodeSelector:
group: "locust"
containers:
- name: locust-worker
image: load-test-image
env:
- name: LOCUST_MODE
value: worker
- name: LOCUST_MASTER
value: locust-master
requests:
cpu: 20m
memory: 50Mi
limits:
memory: 50Mi

View File

@ -0,0 +1,184 @@
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https:#www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
apiVersion: v1
kind: ServiceAccount
metadata:
name: custom-metrics-stackdriver-adapter
namespace: monitoring
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: custom-metrics:system:auth-delegator
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: system:auth-delegator
subjects:
- kind: ServiceAccount
name: custom-metrics-stackdriver-adapter
namespace: monitoring
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: custom-metrics-auth-reader
namespace: kube-system
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: extension-apiserver-authentication-reader
subjects:
- kind: ServiceAccount
name: custom-metrics-stackdriver-adapter
namespace: monitoring
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: custom-metrics-resource-reader
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: view
subjects:
- kind: ServiceAccount
name: custom-metrics-stackdriver-adapter
namespace: monitoring
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: custom-metrics-stackdriver-adapter
namespace: monitoring
labels:
run: custom-metrics-stackdriver-adapter
k8s-app: custom-metrics-stackdriver-adapter
spec:
replicas: 1
selector:
matchLabels:
run: custom-metrics-stackdriver-adapter
k8s-app: custom-metrics-stackdriver-adapter
template:
metadata:
labels:
run: custom-metrics-stackdriver-adapter
k8s-app: custom-metrics-stackdriver-adapter
kubernetes.io/cluster-service: "true"
spec:
serviceAccountName: custom-metrics-stackdriver-adapter
containers:
- image: gcr.io/gke-release/custom-metrics-stackdriver-adapter:v0.13.1-gke.0
imagePullPolicy: Always
name: pod-custom-metrics-stackdriver-adapter
command:
- /adapter
- --use-new-resource-model=false
resources:
limits:
cpu: 100m
memory: 150Mi
requests:
memory: 150Mi
---
apiVersion: v1
kind: Service
metadata:
labels:
run: custom-metrics-stackdriver-adapter
k8s-app: custom-metrics-stackdriver-adapter
kubernetes.io/cluster-service: 'true'
kubernetes.io/name: Adapter
name: custom-metrics-stackdriver-adapter
namespace: monitoring
spec:
ports:
- port: 443
protocol: TCP
targetPort: 443
selector:
run: custom-metrics-stackdriver-adapter
k8s-app: custom-metrics-stackdriver-adapter
type: ClusterIP
---
apiVersion: apiregistration.k8s.io/v1
kind: APIService
metadata:
name: v1beta1.custom.metrics.k8s.io
spec:
insecureSkipTLSVerify: true
group: custom.metrics.k8s.io
groupPriorityMinimum: 100
versionPriority: 100
service:
name: custom-metrics-stackdriver-adapter
namespace: monitoring
version: v1beta1
---
apiVersion: apiregistration.k8s.io/v1
kind: APIService
metadata:
name: v1beta2.custom.metrics.k8s.io
spec:
insecureSkipTLSVerify: true
group: custom.metrics.k8s.io
groupPriorityMinimum: 100
versionPriority: 200
service:
name: custom-metrics-stackdriver-adapter
namespace: monitoring
version: v1beta2
---
apiVersion: apiregistration.k8s.io/v1
kind: APIService
metadata:
name: v1beta1.external.metrics.k8s.io
spec:
insecureSkipTLSVerify: true
group: external.metrics.k8s.io
groupPriorityMinimum: 100
versionPriority: 100
service:
name: custom-metrics-stackdriver-adapter
namespace: monitoring
version: v1beta1
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: external-metrics-reader
rules:
- apiGroups:
- "external.metrics.k8s.io"
resources:
- "*"
verbs:
- list
- get
- watch©
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: external-metrics-reader
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: external-metrics-reader
subjects:
- kind: ServiceAccount
name: horizontal-pod-autoscaler
namespace: kube-system

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,79 @@
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https:#www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
apiVersion: v1
kind: ServiceAccount
metadata:
name: frontend
namespace: monitoring
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: frontend
namespace: monitoring
spec:
replicas: 1
selector:
matchLabels:
app: frontend
template:
metadata:
labels:
app: frontend
spec:
serviceAccountName: frontend
tolerations:
- key: group
operator: Equal
value: monitoring
effect: NoSchedule
nodeSelector:
group: monitoring
automountServiceAccountToken: true
containers:
- name: frontend
image: "gke.gcr.io/prometheus-engine/frontend:v0.5.0-gke.0"
args:
- "--web.listen-address=:9090"
ports:
- name: web
containerPort: 9090
resources:
requests:
cpu: 10m
memory: 15Mi
limits:
memory: 15Mi
readinessProbe:
httpGet:
path: /-/ready
port: web
livenessProbe:
httpGet:
path: /-/healthy
port: web
---
apiVersion: v1
kind: Service
metadata:
name: frontend
namespace: monitoring
spec:
clusterIP: None
selector:
app: frontend
ports:
- name: web
port: 9090

View File

@ -0,0 +1,184 @@
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
apiVersion: v1
kind: ConfigMap
metadata:
name: grafana
namespace: monitoring
data:
allow-snippet-annotations: "false"
grafana.ini: |
[analytics]
check_for_updates = true
[grafana_net]
url = https://grafana.net
[log]
mode = console
[paths]
data = /var/lib/grafana/
logs = /var/log/grafana
plugins = /var/lib/grafana/plugins
provisioning = /etc/grafana/provisioning
datasources.yaml: |
apiVersion: 1
datasources:
- access: proxy
editable: true
isDefault: true
jsonData:
timeInterval: 5s
name: Prometheus
orgId: 1
type: prometheus
url: http://frontend.monitoring.svc.cluster.local:9090
dashboardproviders.yaml: |
apiVersion: 1
providers:
- disableDeletion: false
folder: k8s
name: k8s
options:
path: /var/lib/grafana/dashboards/k8s
orgId: 1
type: file
- disableDeletion: false
folder: locust
name: locust
options:
path: /var/lib/grafana/dashboards/locust
orgId: 1
type: file
- disableDeletion: false
folder: nginx
name: nginx
options:
path: /var/lib/grafana/dashboards/nginx
orgId: 1
type: file
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: grafana
namespace: monitoring
spec:
replicas: 1
selector:
matchLabels:
app: grafana
template:
metadata:
labels:
app: grafana
spec:
tolerations:
- key: group
operator: Equal
value: monitoring
effect: NoSchedule
nodeSelector:
group: monitoring
containers:
- name: grafana
image: grafana/grafana:8.3.4
ports:
- name: web
containerPort: 3000
env:
- name: GF_PATHS_DATA
value: /var/lib/grafana/
- name: GF_PATHS_LOGS
value: /var/log/grafana
- name: GF_PATHS_PLUGINS
value: /var/lib/grafana/plugins
- name: GF_PATHS_PROVISIONING
value: /etc/grafana/provisioning
- name: "GF_AUTH_ANONYMOUS_ENABLED"
value: "true"
- name: "GF_AUTH_ANONYMOUS_ORG_ROLE"
value: "Admin"
- name: "GF_AUTH_BASIC_ENABLED"
value: "false"
- name: "GF_SECURITY_ADMIN_PASSWORD"
value: "-"
- name: "GF_SECURITY_ADMIN_USER"
value: "-"
volumeMounts:
- name: config
mountPath: "/etc/grafana/grafana.ini"
subPath: grafana.ini
- name: storage
mountPath: "/var/lib/grafana"
- name: k8s-grafana-dashboards
mountPath: "/var/lib/grafana/dashboards/k8s"
- name: locust-grafana-dashboards
mountPath: "/var/lib/grafana/dashboards/locust"
- name: nginx-grafana-dashboards
mountPath: "/var/lib/grafana/dashboards/nginx"
- name: config
mountPath: "/etc/grafana/provisioning/datasources/datasources.yaml"
subPath: "datasources.yaml"
- name: config
mountPath: "/etc/grafana/provisioning/dashboards/dashboardproviders.yaml"
subPath: "dashboardproviders.yaml"
resources:
requests:
cpu: 30m
memory: 100Mi
limits:
memory: 100Mi
livenessProbe:
failureThreshold: 10
httpGet:
path: /api/health
port: 3000
initialDelaySeconds: 60
timeoutSeconds: 30
readinessProbe:
httpGet:
path: /api/health
port: 3000
volumes:
- name: config
configMap:
name: grafana
- name: k8s-grafana-dashboards
configMap:
name: k8s-grafana-dashboards
- name: locust-grafana-dashboards
configMap:
name: locust-grafana-dashboards
- name: nginx-grafana-dashboards
configMap:
name: nginx-grafana-dashboards
- name: storage
emptyDir: {}
---
apiVersion: v1
kind: Service
metadata:
name: grafana
namespace: monitoring
annotations:
cloud.google.com/neg: '{"ingress": true}'
cloud.google.com/app-protocols: '{"web":"HTTP"}'
cloud.google.com/backend-config: '{"default": "backendconfig"}'
spec:
clusterIP: None
selector:
app: grafana
ports:
- name: web
port: 3000

View File

@ -0,0 +1,43 @@
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
---
apiVersion: cloud.google.com/v1
kind: BackendConfig
metadata:
name: backendconfig
namespace: monitoring
spec:
healthCheck:
requestPath: /api/health
port: 3000
type: HTTP
logging:
enable: true
sampleRate: 0.5
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: ingress
namespace: monitoring
annotations:
kubernetes.io/ingress.global-static-ip-name: "grafana"
kubernetes.io/ingress.allow-http: "true"
spec:
defaultBackend:
service:
name: grafana
port:
name: web

View File

@ -0,0 +1,342 @@
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
apiVersion: apps/v1
kind: StatefulSet
metadata:
labels:
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/version: 2.3.0
namespace: gmp-public
name: kube-state-metrics
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: kube-state-metrics
serviceName: kube-state-metrics
template:
metadata:
labels:
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/version: 2.3.0
spec:
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: kubernetes.io/arch
operator: In
values:
- arm64
- amd64
- key: kubernetes.io/os
operator: In
values:
- linux
containers:
- name: kube-state-metric
image: k8s.gcr.io/kube-state-metrics/kube-state-metrics:v2.3.0
env:
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
args:
- --pod=$(POD_NAME)
- --pod-namespace=$(POD_NAMESPACE)
- --port=8080
- --telemetry-port=8081
ports:
- name: metrics
containerPort: 8080
- name: metrics-self
containerPort: 8081
resources:
requests:
cpu: 10m
memory: 50Mi
limits:
memory: 50Mi
securityContext:
allowPrivilegeEscalation: false
privileged: false
capabilities:
drop:
- all
runAsUser: 1000
runAsGroup: 1000
livenessProbe:
httpGet:
path: /healthz
port: 8080
initialDelaySeconds: 5
timeoutSeconds: 5
readinessProbe:
httpGet:
path: /
port: 8081
initialDelaySeconds: 5
timeoutSeconds: 5
serviceAccountName: kube-state-metrics
---
apiVersion: v1
kind: Service
metadata:
labels:
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/version: 2.3.0
namespace: gmp-public
name: kube-state-metrics
spec:
clusterIP: None
ports:
- name: metrics
port: 8080
targetPort: metrics
- name: metrics-self
port: 8081
targetPort: metrics-self
selector:
app.kubernetes.io/name: kube-state-metrics
---
apiVersion: v1
kind: ServiceAccount
metadata:
namespace: gmp-public
name: kube-state-metrics
labels:
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/version: 2.3.0
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: gmp-public:kube-state-metrics
labels:
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/version: 2.3.0
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: gmp-public:kube-state-metrics
subjects:
- kind: ServiceAccount
namespace: gmp-public
name: kube-state-metrics
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: gmp-public:kube-state-metrics
labels:
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/version: 2.3.0
rules:
- apiGroups:
- ""
resources:
- configmaps
- secrets
- nodes
- pods
- services
- resourcequotas
- replicationcontrollers
- limitranges
- persistentvolumeclaims
- persistentvolumes
- namespaces
- endpoints
verbs:
- list
- watch
- apiGroups:
- ""
resources:
- pods
verbs:
- get
- apiGroups:
- extensions
resources:
- daemonsets
- deployments
- replicasets
- ingresses
verbs:
- list
- watch
- apiGroups:
- apps
resources:
- statefulsets
- daemonsets
- deployments
- replicasets
verbs:
- list
- watch
- apiGroups:
- apps
resources:
- statefulsets
verbs:
- get
- apiGroups:
- batch
resources:
- cronjobs
- jobs
verbs:
- list
- watch
- apiGroups:
- autoscaling
resources:
- horizontalpodautoscalers
verbs:
- list
- watch
- apiGroups:
- authentication.k8s.io
resources:
- tokenreviews
verbs:
- create
- apiGroups:
- authorization.k8s.io
resources:
- subjectaccessreviews
verbs:
- create
- apiGroups:
- policy
resources:
- poddisruptionbudgets
verbs:
- list
- watch
- apiGroups:
- certificates.k8s.io
resources:
- certificatesigningrequests
verbs:
- list
- watch
- apiGroups:
- storage.k8s.io
resources:
- storageclasses
- volumeattachments
verbs:
- list
- watch
- apiGroups:
- admissionregistration.k8s.io
resources:
- mutatingwebhookconfigurations
- validatingwebhookconfigurations
verbs:
- list
- watch
- apiGroups:
- networking.k8s.io
resources:
- networkpolicies
- ingresses
verbs:
- list
- watch
- apiGroups:
- coordination.k8s.io
resources:
- leases
verbs:
- list
- watch
---
# TODO(pintohutch): bump to autoscaling/v2 when 1.23 is the default in the GKE
# stable release channel.
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: kube-state-metrics
namespace: gmp-public
spec:
maxReplicas: 10
minReplicas: 1
scaleTargetRef:
apiVersion: apps/v1
kind: StatefulSet
name: kube-state-metrics
metrics:
- type: Resource
resource:
name: memory
target:
type: Utilization
averageUtilization: 60
behavior:
scaleDown:
policies:
- type: Pods
value: 1
# Under-utilization needs to persist for `periodSeconds` before any action can be taken.
# Current supported max from https://kubernetes.io/docs/reference/kubernetes-api/workload-resources/horizontal-pod-autoscaler-v2beta2/.
periodSeconds: 1800
# Current supported max from https://kubernetes.io/docs/reference/kubernetes-api/workload-resources/horizontal-pod-autoscaler-v2beta2/.
stabilizationWindowSeconds: 3600
---
apiVersion: monitoring.googleapis.com/v1
kind: ClusterPodMonitoring
metadata:
name: kube-state-metrics
labels:
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/part-of: google-cloud-managed-prometheus
spec:
selector:
matchLabels:
app.kubernetes.io/name: kube-state-metrics
endpoints:
- port: metrics
interval: 30s
metricRelabeling:
- action: keep
regex: kube_(daemonset|deployment|pod|namespace|node|statefulset)_.+
sourceLabels: [__name__]
targetLabels:
metadata: [] # explicitly empty so the metric labels are respected
---
apiVersion: monitoring.googleapis.com/v1
kind: PodMonitoring
metadata:
namespace: gmp-public
name: kube-state-metrics
labels:
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/part-of: google-cloud-managed-prometheus
spec:
selector:
matchLabels:
app.kubernetes.io/name: kube-state-metrics
endpoints:
- port: metrics-self
interval: 30s

View File

@ -0,0 +1,72 @@
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
resources:
- namespace.yaml
- frontend.yaml
- grafana.yaml
- ingress.yaml
- custom-stackdriver-metrics-adapter.yaml
- kube-state-metrics.yaml
configMapGenerator:
- name: k8s-grafana-dashboards
namespace: monitoring
options:
disableNameSuffixHash: true
files:
- dashboards/k8s-global.json
- dashboards/k8s-namespaces.json
- dashboards/k8s-nodes.json
- dashboards/k8s-pods.json
- name: locust-grafana-dashboards
namespace: monitoring
options:
disableNameSuffixHash: true
files:
- dashboards/locust.json
- name: nginx-grafana-dashboards
namespace: monitoring
options:
disableNameSuffixHash: true
files:
- dashboards/nginx.json
patches:
- target:
version: v1
kind: ServiceAccount
name: frontend
namespace: monitoring
patch: |-
- op: add
path: /metadata/annotations/iam.gke.io~1gcp-service-account
value: sa-monitoring@{{ project_id }}.iam.gserviceaccount.com
- target:
version: v1
kind: ServiceAccount
name: custom-metrics-stackdriver-adapter
namespace: monitoring
patch: |-
- op: add
path: /metadata/annotations/iam.gke.io~1gcp-service-account
value: sa-monitoring@{{ project_id }}.iam.gserviceaccount.com
- target:
group: apps
version: v1
kind: Deployment
name: frontend
namespace: monitoring
patch: |-
- op: add
path: /spec/template/spec/containers/0/args/-
value: "--query.project-id={{ project_id }}"

View File

@ -0,0 +1,18 @@
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https:#www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
apiVersion: v1
kind: Namespace
metadata:
name: monitoring

View File

@ -0,0 +1,54 @@
/**
* Copyright 2023 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
module "cluster" {
source = "../../../modules/gke-cluster"
project_id = module.project.project_id
name = "cluster"
location = var.region
vpc_config = {
network = module.vpc.self_link
subnetwork = module.vpc.subnet_self_links["${var.region}/subnet-cluster"]
secondary_range_names = {
pods = "pods"
services = "services"
}
master_authorized_ranges = var.cluster_network_config.master_authorized_cidr_blocks
master_ipv4_cidr_block = var.cluster_network_config.master_cidr_block
}
enable_features = {
autopilot = true
}
monitoring_config = {
enenable_components = ["SYSTEM_COMPONENTS"]
managed_prometheus = true
}
cluster_autoscaling = {
auto_provisioning_defaults = {
service_account = module.node_sa.email
}
}
release_channel = "RAPID"
depends_on = [
module.project
]
}
module "node_sa" {
source = "../../../modules/iam-service-account"
project_id = module.project.project_id
name = "sa-node"
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 33 KiB

View File

@ -0,0 +1,25 @@
/**
* Copyright 2023 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
locals {
urls = { for k, v in module.addresses.global_addresses : k => "http://${v.address}" }
}
module "addresses" {
source = "../../../modules/net-address"
project_id = module.project.project_id
global_addresses = ["grafana", "locust", "app"]
}

View File

@ -0,0 +1,65 @@
/**
* Copyright 2023 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
module "project" {
source = "../../../modules/project"
billing_account = (var.project_create != null
? var.project_create.billing_account_id
: null
)
parent = (var.project_create != null
? var.project_create.parent
: null
)
project_create = var.project_create != null
name = var.project_id
services = [
"artifactregistry.googleapis.com",
"cloudbuild.googleapis.com",
"container.googleapis.com"
]
iam = {
"roles/monitoring.viewer" = [module.monitoring_sa.iam_email]
"roles/container.nodeServiceAccount" = [module.node_sa.iam_email]
"roles/container.admin" = [module.mgmt_server.service_account_iam_email]
"roles/storage.admin" = [module.mgmt_server.service_account_iam_email]
"roles/cloudbuild.builds.editor" = [module.mgmt_server.service_account_iam_email]
"roles/viewer" = [module.mgmt_server.service_account_iam_email]
}
}
module "monitoring_sa" {
source = "../../../modules/iam-service-account"
project_id = module.project.project_id
name = "sa-monitoring"
iam = {
"roles/iam.workloadIdentityUser" = [
"serviceAccount:${module.cluster.workload_identity_pool}[monitoring/frontend]",
"serviceAccount:${module.cluster.workload_identity_pool}[monitoring/custom-metrics-stackdriver-adapter]"
]
}
}
module "docker_artifact_registry" {
source = "../../../modules/artifact-registry"
project_id = module.project.project_id
location = var.region
format = "DOCKER"
id = "registry"
iam = {
"roles/artifactregistry.reader" = [module.node_sa.iam_email]
}
}

View File

@ -0,0 +1,39 @@
/**
* Copyright 2023 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
locals {
zone = "${var.region}-b"
}
module "mgmt_server" {
source = "../../../modules/compute-vm"
project_id = module.project.project_id
zone = local.zone
name = "mgmt"
instance_type = var.mgmt_server_config.instance_type
network_interfaces = [{
network = module.vpc.self_link
subnetwork = module.vpc.subnet_self_links["${var.region}/subnet-mgmt"]
nat = false
addresses = null
}]
service_account_create = true
boot_disk = {
image = var.mgmt_server_config.image
type = var.mgmt_server_config.disk_type
size = var.mgmt_server_config.disk_size
}
}

View File

@ -0,0 +1,20 @@
/**
* Copyright 2023 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
output "urls" {
description = "Grafanam, locust and application URLs."
value = local.urls
}

View File

@ -0,0 +1,30 @@
#!/bin/bash
#
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
host="$${@: -2: 1}"
cmd="$${@: -1: 1}"
gcloud_args="
--tunnel-through-iap
--zone=${zone}
--project=${project_id}
--quiet
--no-user-output-enabled
--
-C
"
exec gcloud compute ssh "$host" $gcloud_args "$cmd"

View File

@ -0,0 +1,84 @@
/**
* Copyright 2023 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
variable "cluster_network_config" {
description = "Cluster network configuration."
type = object({
nodes_cidr_block = string
pods_cidr_block = string
services_cidr_block = string
master_authorized_cidr_blocks = map(string)
master_cidr_block = string
})
default = {
nodes_cidr_block = "10.0.1.0/24"
pods_cidr_block = "172.16.0.0/20"
services_cidr_block = "192.168.0.0/24"
master_authorized_cidr_blocks = {
internal = "10.0.0.0/8"
}
master_cidr_block = "10.0.0.0/28"
}
}
variable "mgmt_server_config" {
description = "Management server configuration."
type = object({
disk_size = number
disk_type = string
image = string
instance_type = string
})
default = {
disk_size = 50
disk_type = "pd-ssd"
image = "projects/ubuntu-os-cloud/global/images/family/ubuntu-2204-lts"
instance_type = "n1-standard-2"
}
}
variable "mgmt_subnet_cidr_block" {
description = "Management subnet IP CIDR range."
type = string
default = "10.0.2.0/24"
}
variable "network" {
description = "VPC name."
type = string
default = "vpc"
nullable = false
}
variable "project_create" {
description = "Parameters for the creation of the new project."
type = object({
billing_account_id = string
parent = string
})
default = null
}
variable "project_id" {
description = "Project ID."
type = string
}
variable "region" {
description = "Region."
type = string
default = "europe-west1"
}

View File

@ -0,0 +1,46 @@
/**
* Copyright 2023 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
module "vpc" {
source = "../../../modules/net-vpc"
project_id = module.project.project_id
name = var.network
vpc_create = (var.project_create != null)
subnets = [
{
ip_cidr_range = var.mgmt_subnet_cidr_block
name = "subnet-mgmt"
region = var.region
},
{
ip_cidr_range = var.cluster_network_config.nodes_cidr_block
name = "subnet-cluster"
region = var.region
secondary_ip_ranges = {
pods = var.cluster_network_config.pods_cidr_block
services = var.cluster_network_config.services_cidr_block
}
}
]
}
module "nat" {
source = "../../../modules/net-cloudnat"
project_id = module.project.project_id
region = var.region
name = "nat"
router_network = module.vpc.name
}

View File

@ -1,5 +1,5 @@
/**
* Copyright 2022 Google LLC
* Copyright 2023 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -20,6 +20,7 @@ locals {
# organization authoritative IAM bindings, in an easy to edit format before
# they are combined with var.iam a bit further in locals
_iam = {
"roles/billing.creator" = []
"roles/browser" = [
"domain:${var.organization.domain}"
]

View File

@ -11,3 +11,15 @@
- **How can I fix permission issues when running Terraform apply?**
- Make sure your account is part of the organization admin group defined in variables.
- Make sure you have configured [application default credentials](https://cloud.google.com/docs/authentication/application-default-credentials), rerun `gcloud auth login --update-adc` to fix them.
- **My GCP Org is not empty, what is the best way to save existing work and still install Fast?**
- Background: Fast needs to be installed on the org level - because of many things that one can do only on that level, like the org policy role, secure tags, org policies
- Create a folder, you can call it "Legacy"
- Move all the existing projects and folders into this folder (you can do it by selecting all of them at once on the [resource management page](https://console.cloud.google.com/cloud-resource-manager) of the GCP console)
- Collect the existing defined org policies and save them:
```
gcloud organizations list
export FAST_ORG_ID=123456
for c in $(gcloud org-policies list --organization $FAST_ORG_ID --format='get(constraint)'); do gcloud org-policies describe --organization $FAST_ORG_ID $c ; echo '---' ; done > previous_policies.yaml
```
- Analyze the policies together with [the ones Fast applies](1-resman/data/org-policies) and apply the ones that still make sense on that "Legacy" folder level
- Proceed with installing Fast normally

View File

@ -62,16 +62,88 @@ module "vm" {
}
# tftest modules=1 resources=1
```
### Example with advanced routing capabilities
Find below a sample terraform example for bootstrapping a simple NVA powered by [COS](https://cloud.google.com/container-optimized-os/docs) and running [FRRouting](https://frrouting.org/) container.
Please find below a sample frr.conf file based on the documentation available [here](https://docs.frrouting.org/en/latest/basic.html) for hosting a BGP service with ASN 65001 on FRR container establishing a BGP session with a remote neighbor with IP address 10.128.0.2 and ASN 65002.
```
# tftest-file id=frr_conf path=./frr.conf
# Example frr.conmf file
log syslog informational
no ipv6 forwarding
router bgp 65001
neighbor 10.128.0.2 remote-as 65002
line vty
```
Following code assumes a file in the same folder named frr.conf exists.
```hcl
locals {
network_interfaces = [
{
addresses = null
name = "dev"
nat = false
network = "dev_vpc_self_link"
routes = ["10.128.0.0/9"]
subnetwork = "dev_vpc_nva_subnet_self_link"
enable_masquerading = true
non_masq_cidrs = ["10.0.0.0/8"]
},
{
addresses = null
name = "prod"
nat = false
network = "prod_vpc_self_link"
routes = ["10.0.0.0/9"]
subnetwork = "prod_vpc_nva_subnet_self_link"
}
]
}
module "cos-nva" {
source = "./fabric/modules/cloud-config-container/simple-nva"
enable_health_checks = true
network_interfaces = local.network_interfaces
frr_config = { config_file = "./frr.conf", daemons_enabled = ["bgpd"] }
optional_run_cmds = ["ls -l"]
}
module "vm" {
source = "./fabric/modules/compute-vm"
project_id = "my-project"
zone = "europe-west8-b"
name = "cos-nva"
network_interfaces = local.network_interfaces
metadata = {
user-data = module.cos-nva.cloud_config
google-logging-enabled = true
}
boot_disk = {
image = "projects/cos-cloud/global/images/family/cos-stable"
type = "pd-ssd"
size = 10
}
tags = ["nva", "ssh"]
}
# tftest modules=1 resources=1 files=frr_conf
```
<!-- BEGIN TFDOC -->
## Variables
| name | description | type | required | default |
|---|---|:---:|:---:|:---:|
| [network_interfaces](variables.tf#L39) | Network interfaces configuration. | <code title="list&#40;object&#40;&#123;&#10; routes &#61; optional&#40;list&#40;string&#41;&#41;&#10;&#125;&#41;&#41;">list&#40;object&#40;&#123;&#8230;&#125;&#41;&#41;</code> | ✓ | |
| [network_interfaces](variables.tf#L75) | Network interfaces configuration. | <code title="list&#40;object&#40;&#123;&#10; routes &#61; optional&#40;list&#40;string&#41;&#41;&#10; enable_masquerading &#61; optional&#40;bool, false&#41;&#10; non_masq_cidrs &#61; optional&#40;list&#40;string&#41;&#41;&#10;&#125;&#41;&#41;">list&#40;object&#40;&#123;&#8230;&#125;&#41;&#41;</code> | ✓ | |
| [cloud_config](variables.tf#L17) | Cloud config template path. If null default will be used. | <code>string</code> | | <code>null</code> |
| [enable_health_checks](variables.tf#L23) | Configures routing to enable responses to health check probes. | <code>bool</code> | | <code>false</code> |
| [files](variables.tf#L29) | Map of extra files to create on the instance, path as key. Owner and permissions will use defaults if null. | <code title="map&#40;object&#40;&#123;&#10; content &#61; string&#10; owner &#61; string&#10; permissions &#61; string&#10;&#125;&#41;&#41;">map&#40;object&#40;&#123;&#8230;&#125;&#41;&#41;</code> | | <code>&#123;&#125;</code> |
| [frr_config](variables.tf#L39) | FRR configuration for container running on the NVA. | <code title="object&#40;&#123;&#10; daemons_enabled &#61; optional&#40;list&#40;string&#41;&#41;&#10; config_file &#61; string&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | | <code>null</code> |
| [optional_run_cmds](variables.tf#L84) | Optional Cloud Init run commands to execute. | <code>list&#40;string&#41;</code> | | <code>&#91;&#93;</code> |
## Outputs

View File

@ -1,6 +1,6 @@
#cloud-config
# Copyright 2022 Google LLC
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -22,6 +22,7 @@ write_files:
content: |
${indent(6, data.content)}
%{ endfor }
- path: /etc/systemd/system/routing.service
permissions: 0644
owner: root
@ -34,6 +35,7 @@ write_files:
Wants=network-online.target
[Service]
ExecStart=/bin/sh -c "/var/run/nva/start-routing.sh"
- path: /var/run/nva/start-routing.sh
permissions: 0744
owner: root
@ -43,6 +45,12 @@ write_files:
%{ if enable_health_checks ~}
/var/run/nva/policy_based_routing.sh ${interface.name}
%{ endif ~}
%{ if interface.enable_masquerading ~}
%{ for cidr in interface.non_masq_cidrs ~}
iptables -t nat -A POSTROUTING -o ${interface.name} -d ${cidr} -j ACCEPT
%{ endfor ~}
iptables -t nat -A POSTROUTING -o ${interface.name} -j MASQUERADE
%{ endif ~}
%{ for route in interface.routes ~}
ip route add ${route} via `curl http://metadata.google.internal/computeMetadata/v1/instance/network-interfaces/${interface.number}/gateway -H "Metadata-Flavor:Google"` dev ${interface.name}
%{ endfor ~}
@ -55,4 +63,6 @@ runcmd:
- systemctl daemon-reload
- systemctl enable routing
- systemctl start routing
%{ for cmd in optional_run_cmds ~}
- ${cmd}
%{ endfor ~}

View File

@ -0,0 +1,65 @@
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
zebra=${zebra_enabled}
bgpd=${bgpd_enabled}
ospfd=${ospfd_enabled}
ospf6d=${ospf6d_enabled}
ripd=${ripd_enabled}
ripngd=${ripngd_enabled}
isisd=${isisd_enabled}
pimd=${pimd_enabled}
ldpd=${ldpd_enabled}
nhrpd=${nhrpd_enabled}
eigrpd=${eigrpd_enabled}
babeld=${babeld_enabled}
sharpd=${sharpd_enabled}
staticd=${staticd_enabled}
pbrd=${pbrd_enabled}
bfdd=${bfdd_enabled}
fabricd=${fabricd_enabled}
# If this option is set the /etc/init.d/frr script automatically loads
# the config via "vtysh -b" when the servers are started.
# Check /etc/pam.d/frr if you intend to use "vtysh"!
vtysh_enable=yes
zebra_options=" -A 127.0.0.1 -s 90000000"
bgpd_options=" -A 127.0.0.1"
ospfd_options=" --daemon -A 127.0.0.1"
ospf6d_options=" --daemon -A ::1"
ripd_options=" --daemon -A 127.0.0.1"
ripngd_options=" --daemon -A ::1"
isisd_options=" --daemon -A 127.0.0.1"
pimd_options=" --daemon -A 127.0.0.1"
ldpd_options=" --daemon -A 127.0.0.1"
nhrpd_options=" --daemon -A 127.0.0.1"
eigrpd_options=" --daemon -A 127.0.0.1"
babeld_options=" --daemon -A 127.0.0.1"
sharpd_options=" --daemon -A 127.0.0.1"
staticd_options=" --daemon -A 127.0.0.1"
pbrd_options=" --daemon -A 127.0.0.1"
bfdd_options=" --daemon -A 127.0.0.1"
fabricd_options=" --daemon -A 127.0.0.1"
#MAX_FDS=1024
# The list of daemons to watch is automatically generated by the init script.
#watchfrr_options=""
# for debugging purposes, you can specify a "wrap" command to start instead
# of starting the daemon directly, e.g. to use valgrind on ospfd:
# ospfd_wrap="/usr/bin/valgrind"
# or you can use "all_wrap" for all daemons, e.g. to use perf record:
# all_wrap="/usr/bin/perf record --call-graph -"
# the normal daemon command is added to this at the end.

View File

@ -0,0 +1,27 @@
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
[Unit]
Description=Start FRR container
After=gcr-online.target docker.socket
Wants=gcr-online.target docker.socket docker-events-collector.service
[Service]
Environment="HOME=/home/frr"
ExecStart=/usr/bin/docker run --rm --name=frr \
--privileged \
--network host \
-v /etc/frr:/etc/frr \
frrouting/frr
ExecStop=/usr/bin/docker stop frr
ExecStopPost=/usr/bin/docker rm frr

View File

@ -1,6 +1,6 @@
#!/bin/bash
# Copyright 2022 Google LLC
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.

View File

@ -1,6 +1,6 @@
#!/bin/bash
# Copyright 2022 Google LLC
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.

View File

@ -1,5 +1,5 @@
/**
* Copyright 2022 Google LLC
* Copyright 2023 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -15,42 +15,109 @@
*/
locals {
cloud_config = templatefile(local.template, merge({
files = local.files
enable_health_checks = var.enable_health_checks
network_interfaces = local.network_interfaces
}))
_files = merge(
{
"/var/run/nva/ipprefix_by_netmask.sh" = {
content = file("${path.module}/files/ipprefix_by_netmask.sh")
owner = "root"
permissions = "0744"
}
"/var/run/nva/policy_based_routing.sh" = {
content = file("${path.module}/files/policy_based_routing.sh")
owner = "root"
permissions = "0744"
}
}, {
for path, attrs in var.files : path => {
content = attrs.content,
owner = attrs.owner,
permissions = attrs.permissions
}
},
try(var.frr_config != null, false) ? {
"/etc/frr/daemons" = {
content = templatefile("${path.module}/files/frr/daemons", local._frr_daemons_enabled)
owner = "root"
permissions = "0744"
}
"/etc/frr/frr.conf" = {
content = file(var.frr_config.config_file)
owner = "root"
permissions = "0744"
}
"/etc/systemd/system/frr.service" = {
content = file("${path.module}/files/frr/frr.service")
owner = "root"
permissions = "0644"
}
"/var/lib/docker/daemon.json" = {
content = <<EOF
{
"live-restore": true,
"storage-driver": "overlay2",
"log-opts": {
"max-size": "1024m"
}
}
EOF
owner = "root"
permissions = "0644"
}
} : {}
)
files = merge({
"/var/run/nva/ipprefix_by_netmask.sh" = {
content = file("${path.module}/files/ipprefix_by_netmask.sh")
owner = "root"
permissions = "0744"
}
"/var/run/nva/policy_based_routing.sh" = {
content = file("${path.module}/files/policy_based_routing.sh")
owner = "root"
permissions = "0744"
}
}, {
for path, attrs in var.files : path => {
content = attrs.content,
owner = attrs.owner,
permissions = attrs.permissions
}
})
_frr_daemons = [
"zebra",
"bgpd",
"ospfd",
"ospf6d",
"ripd",
"ripngd",
"isisd",
"pimd",
"ldpd",
"nhrpd",
"eigrpd",
"babeld",
"sharpd",
"staticd",
"pbrd",
"bfdd",
"fabricd"
]
network_interfaces = [
_frr_daemons_enabled = try(
{
for daemon in local._frr_daemons :
"${daemon}_enabled" => contains(var.frr_config.daemons_enabled, daemon) ? "yes" : "no"
}, {})
_network_interfaces = [
for index, interface in var.network_interfaces : {
name = "eth${index}"
number = index
routes = interface.routes
name = "eth${index}"
number = index
routes = interface.routes
enable_masquerading = interface.enable_masquerading != null ? interface.enable_masquerading : false
non_masq_cidrs = interface.non_masq_cidrs != null ? interface.non_masq_cidrs : []
}
]
template = (
_optional_run_cmds = (
try(var.frr_config != null, false)
? concat(["systemctl start frr"], var.optional_run_cmds)
: var.optional_run_cmds
)
_template = (
var.cloud_config == null
? "${path.module}/cloud-config.yaml"
: var.cloud_config
)
cloud_config = templatefile(local._template, {
enable_health_checks = var.enable_health_checks
files = local._files
network_interfaces = local._network_interfaces
optional_run_cmds = local._optional_run_cmds
})
}

View File

@ -1,5 +1,5 @@
/**
* Copyright 2022 Google LLC
* Copyright 2023 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.

View File

@ -1,5 +1,5 @@
/**
* Copyright 2022 Google LLC
* Copyright 2023 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -36,9 +36,53 @@ variable "files" {
default = {}
}
variable "frr_config" {
description = "FRR configuration for container running on the NVA."
type = object({
daemons_enabled = optional(list(string))
config_file = string
})
default = null
validation {
condition = try(alltrue([
for daemon in var.frr_config.daemons_enabled : contains([
"zebra",
"bgpd",
"ospfd",
"ospf6d",
"ripd",
"ripngd",
"isisd",
"pimd",
"ldpd",
"nhrpd",
"eigrpd",
"babeld",
"sharpd",
"staticd",
"pbrd",
"bfdd",
"fabricd"
], daemon)
]), true)
error_message = <<EOF
Invalid entry specified in daemons_enabled list, must be one of [zebra, bgpd, ospfd, ospf6d,
ripd, ripngd, isisd, pimd, ldpd, nhrpd, eigrpd, babeld, sharpd, staticd, pbrd, bfdd, fabricd]
EOF
}
}
variable "network_interfaces" {
description = "Network interfaces configuration."
type = list(object({
routes = optional(list(string))
routes = optional(list(string))
enable_masquerading = optional(bool, false)
non_masq_cidrs = optional(list(string))
}))
}
variable "optional_run_cmds" {
description = "Optional Cloud Init run commands to execute."
type = list(string)
default = []
}

View File

@ -1,4 +1,4 @@
# Copyright 2022 Google LLC
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.

View File

@ -83,6 +83,7 @@ locals {
"multiclusteringress.googleapis.com", # grant roles/multiclusteringress.serviceAgent to multicluster-ingress
"pubsub.googleapis.com", # grant roles/pubsub.serviceAgent to pubsub
"meshconfig.googleapis.com", # grant roles/anthosservicemesh.serviceAgent to meshconfig
"notebooks.googleapis.com", # no grants needed
"secretmanager.googleapis.com", # no grants needed
"sqladmin.googleapis.com", # grant roles/cloudsql.serviceAgent to sqladmin (TODO: verify)
]

View File

@ -1,4 +1,4 @@
# Copyright 2022 Google LLC
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -16,7 +16,7 @@ counts:
google_bigquery_dataset: 1
google_bigquery_default_service_account: 3
google_logging_organization_sink: 2
google_organization_iam_binding: 19
google_organization_iam_binding: 20
google_organization_iam_custom_role: 3
google_organization_iam_member: 16
google_project: 3