Merge branch 'master' into sql-multi-region

This commit is contained in:
lcaggio 2022-04-13 16:06:20 +02:00 committed by GitHub
commit ac213c77f2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 160 additions and 55 deletions

View File

@ -18,13 +18,14 @@ from code import interact
import os import os
from pickletools import int4 from pickletools import int4
import time import time
import http
import yaml import yaml
from collections import defaultdict from collections import defaultdict
from google.api import metric_pb2 as ga_metric from google.api import metric_pb2 as ga_metric
from google.api_core import protobuf_helpers from google.api_core import exceptions, protobuf_helpers
from google.cloud import monitoring_v3, asset_v1 from google.cloud import monitoring_v3, asset_v1
from google.protobuf import field_mask_pb2 from google.protobuf import field_mask_pb2
from googleapiclient import discovery from googleapiclient import discovery, errors
# Organization ID containing the projects to be monitored # Organization ID containing the projects to be monitored
ORGANIZATION_ID = os.environ.get("ORGANIZATION_ID") ORGANIZATION_ID = os.environ.get("ORGANIZATION_ID")
@ -66,6 +67,9 @@ def main(event, context):
get_l4_forwarding_rules_data( get_l4_forwarding_rules_data(
metrics_dict, l4_forwarding_rules_dict, metrics_dict, l4_forwarding_rules_dict,
limits_dict['internal_forwarding_rules_l4_limit']) limits_dict['internal_forwarding_rules_l4_limit'])
get_l7_forwarding_rules_data(
metrics_dict, l7_forwarding_rules_dict,
limits_dict['internal_forwarding_rules_l7_limit'])
get_vpc_peering_data(metrics_dict, get_vpc_peering_data(metrics_dict,
limits_dict['number_of_vpc_peerings_limit']) limits_dict['number_of_vpc_peerings_limit'])
dynamic_routes_dict = get_dynamic_routes( dynamic_routes_dict = get_dynamic_routes(
@ -366,6 +370,11 @@ def get_gce_instances_data(metrics_dict, gce_instance_dict, limit_dict):
current_quota_limit = get_quota_current_limit(f"projects/{project}", current_quota_limit = get_quota_current_limit(f"projects/{project}",
metric_instances_limit) metric_instances_limit)
if current_quota_limit is None:
print(
f"Could not write number of instances for projects/{project} due to missing quotas"
)
current_quota_limit_view = customize_quota_view(current_quota_limit) current_quota_limit_view = customize_quota_view(current_quota_limit)
for net in network_dict: for net in network_dict:
@ -519,6 +528,12 @@ def get_l4_forwarding_rules_data(metrics_dict, forwarding_rules_dict,
current_quota_limit = get_quota_current_limit( current_quota_limit = get_quota_current_limit(
f"projects/{project}", L4_FORWARDING_RULES_LIMIT_METRIC) f"projects/{project}", L4_FORWARDING_RULES_LIMIT_METRIC)
if current_quota_limit is None:
print(
f"Could not write L4 forwarding rules to metric for projects/{project} due to missing quotas"
)
continue
current_quota_limit_view = customize_quota_view(current_quota_limit) current_quota_limit_view = customize_quota_view(current_quota_limit)
for net in network_dict: for net in network_dict:
@ -545,6 +560,55 @@ def get_l4_forwarding_rules_data(metrics_dict, forwarding_rules_dict,
f"Wrote number of L4 forwarding rules to metric for projects/{project}") f"Wrote number of L4 forwarding rules to metric for projects/{project}")
def get_l7_forwarding_rules_data(metrics_dict, forwarding_rules_dict,
limit_dict):
'''
Gets the data for L7 Internal Forwarding Rules per VPC Network and writes it to the metric defined in forwarding_rules_metric.
Parameters:
metrics_dict (dictionary of dictionary of string: string): metrics names and descriptions.
forwarding_rules_dict (dictionary of string: int): Keys are the network links and values are the number of Forwarding Rules per network.
limit_dict (dictionary of string:int): Dictionary with the network link as key and the limit as value.
Returns:
None
'''
for project in MONITORED_PROJECTS_LIST:
network_dict = get_networks(project)
current_quota_limit = get_quota_current_limit(
f"projects/{project}", L7_FORWARDING_RULES_LIMIT_METRIC)
if current_quota_limit is None:
print(
f"Could not write number of L7 forwarding rules to metric for projects/{project} due to missing quotas"
)
continue
current_quota_limit_view = customize_quota_view(current_quota_limit)
for net in network_dict:
set_limits(net, current_quota_limit_view, limit_dict)
usage = 0
if net['self_link'] in forwarding_rules_dict:
usage = forwarding_rules_dict[net['self_link']]
write_data_to_metric(
project, usage, metrics_dict["metrics_per_network"]
["l7_forwarding_rules_per_network"]["usage"]["name"],
net['network_name'])
write_data_to_metric(
project, net['limit'], metrics_dict["metrics_per_network"]
["l7_forwarding_rules_per_network"]["limit"]["name"],
net['network_name'])
write_data_to_metric(
project, usage / net['limit'], metrics_dict["metrics_per_network"]
["l7_forwarding_rules_per_network"]["utilization"]["name"],
net['network_name'])
print(
f"Wrote number of L7 forwarding rules to metric for projects/{project}")
def get_pgg_data(metric_dict, usage_dict, limit_metric, limit_dict): def get_pgg_data(metric_dict, usage_dict, limit_metric, limit_dict):
''' '''
This function gets the usage, limit and utilization per VPC peering group for a specific metric for all projects to be monitored. This function gets the usage, limit and utilization per VPC peering group for a specific metric for all projects to be monitored.
@ -564,14 +628,25 @@ def get_pgg_data(metric_dict, usage_dict, limit_metric, limit_dict):
# project_id, network_name, network_id, usage, limit, peerings (list of peered networks) # project_id, network_name, network_id, usage, limit, peerings (list of peered networks)
# peerings is a list of dictionary (one for each peered network) and contains: # peerings is a list of dictionary (one for each peered network) and contains:
# project_id, network_name, network_id # project_id, network_name, network_id
current_quota_limit = get_quota_current_limit(f"projects/{project}",
limit_metric)
if current_quota_limit is None:
print(
f"Could not write number of L7 forwarding rules to metric for projects/{project} due to missing quotas"
)
continue
current_quota_limit_view = customize_quota_view(current_quota_limit)
# For each network in this GCP project # For each network in this GCP project
for network_dict in network_dict_list: for network_dict in network_dict_list:
if network_dict['network_id'] == 0:
print(
f"Could not write {metric_dict['usage']['name']} for peering group {network_dict['network_name']} in {project} due to missing permissions."
)
continue
network_link = f"https://www.googleapis.com/compute/v1/projects/{project}/global/networks/{network_dict['network_name']}" network_link = f"https://www.googleapis.com/compute/v1/projects/{project}/global/networks/{network_dict['network_name']}"
current_quota_limit = get_quota_current_limit(f"projects/{project}",
limit_metric)
current_quota_limit_view = customize_quota_view(current_quota_limit)
limit = get_limit_network(network_dict, network_link, limit = get_limit_network(network_dict, network_link,
current_quota_limit_view, limit_dict) current_quota_limit_view, limit_dict)
@ -590,9 +665,15 @@ def get_pgg_data(metric_dict, usage_dict, limit_metric, limit_dict):
if peered_network_link in usage_dict: if peered_network_link in usage_dict:
peered_usage = usage_dict[peered_network_link] peered_usage = usage_dict[peered_network_link]
peering_project_limit = customize_quota_view( current_peered_quota_limit = get_quota_current_limit(
get_quota_current_limit( f"projects/{peered_network_dict['project_id']}", limit_metric)
f"projects/{peered_network_dict['project_id']}", limit_metric)) if current_peered_quota_limit is None:
print(
f"Could not write metrics for peering to projects/{peered_network_dict['project_id']} due to missing quotas"
)
continue
peering_project_limit = customize_quota_view(current_peered_quota_limit)
peered_limit = get_limit_network(peered_network_dict, peered_limit = get_limit_network(peered_network_dict,
peered_network_link, peered_network_link,
@ -681,6 +762,11 @@ def count_effective_limit(project_id, network_dict, usage_metric_name,
# Get usage: Sums usage for current network + all peered networks # Get usage: Sums usage for current network + all peered networks
peering_group_usage = network_dict['usage'] peering_group_usage = network_dict['usage']
for peered_network in network_dict['peerings']: for peered_network in network_dict['peerings']:
if 'usage' not in peered_network:
print(
f"Can not add metrics for peered network in projects/{project_id} as no usage metrics exist due to missing permissions"
)
continue
peering_group_usage += peered_network['usage'] peering_group_usage += peered_network['usage']
network_link = f"https://www.googleapis.com/compute/v1/projects/{project_id}/global/networks/{network_dict['network_name']}" network_link = f"https://www.googleapis.com/compute/v1/projects/{project_id}/global/networks/{network_dict['network_name']}"
@ -694,12 +780,21 @@ def count_effective_limit(project_id, network_dict, usage_metric_name,
for peered_network in network_dict['peerings']: for peered_network in network_dict['peerings']:
peered_network_link = f"https://www.googleapis.com/compute/v1/projects/{peered_network['project_id']}/global/networks/{peered_network['network_name']}" peered_network_link = f"https://www.googleapis.com/compute/v1/projects/{peered_network['project_id']}/global/networks/{peered_network['network_name']}"
limit_step2.append( if 'limit' in peered_network:
max(peered_network['limit'], limit_step2.append(
get_limit_ppg(peered_network_link, limit_dict))) max(peered_network['limit'],
get_limit_ppg(peered_network_link, limit_dict)))
else:
print(
f"Ignoring projects/{peered_network['project_id']} for limits in peering group of project {project_id} as no limits are available."
+
"This can happen if you don't have permissions on the project, for example if the project is in another organization or a Google managed project"
)
# Calculates effective limit: Step 3: Find minimum from the list created by Step 2 # Calculates effective limit: Step 3: Find minimum from the list created by Step 2
limit_step3 = min(limit_step2) limit_step3 = 0
if len(limit_step2) > 0:
limit_step3 = min(limit_step2)
# Calculates effective limit: Step 4: Find maximum from step 1 and step 3 # Calculates effective limit: Step 4: Find maximum from step 1 and step 3
effective_limit = max(limit_step1, limit_step3) effective_limit = max(limit_step1, limit_step3)
@ -837,8 +932,9 @@ def get_routes_for_router(project_id, router_region, router_name):
sum_routes = 0 sum_routes = 0
if 'result' in response: if 'result' in response:
for peer in response['result']['bgpPeerStatus']: if 'bgpPeerStatus' in response['result']:
sum_routes += peer['numLearnedRoutes'] for peer in response['result']['bgpPeerStatus']:
sum_routes += peer['numLearnedRoutes']
return sum_routes return sum_routes
@ -939,7 +1035,18 @@ def get_network_id(project_id, network_name):
network_id (int): Network ID. network_id (int): Network ID.
''' '''
request = service.networks().list(project=project_id) request = service.networks().list(project=project_id)
response = request.execute() try:
response = request.execute()
except errors.HttpError as err:
# TODO: log proper warning
if err.resp.status == http.HTTPStatus.FORBIDDEN:
print(
f"Warning: error reading networks for {project_id}. " +
f"This can happen if you don't have permissions on the project, for example if the project is in another organization or a Google managed project"
)
else:
print(f"Warning: error reading networks for {project_id}: {err}")
return 0
network_id = 0 network_id = 0
@ -967,15 +1074,22 @@ def get_quota_current_limit(project_link, metric_name):
''' '''
client, interval = create_client() client, interval = create_client()
results = client.list_time_series( try:
request={ results = client.list_time_series(
"name": project_link, request={
"filter": f'metric.type = "{metric_name}"', "name": project_link,
"interval": interval, "filter": f'metric.type = "{metric_name}"',
"view": monitoring_v3.ListTimeSeriesRequest.TimeSeriesView.FULL "interval": interval,
}) "view": monitoring_v3.ListTimeSeriesRequest.TimeSeriesView.FULL
results_list = list(results) })
return results_list results_list = list(results)
return results_list
except exceptions.PermissionDenied as err:
print(
f"Warning: error reading quotas for {project_link}. " +
f"This can happen if you don't have permissions on the project, for example if the project is in another organization or a Google managed project"
)
return None
def customize_quota_view(quota_results): def customize_quota_view(quota_results):

View File

@ -15,8 +15,9 @@
*/ */
locals { locals {
project_id_list = toset(var.monitored_projects_list) project_id_list = toset(var.monitored_projects_list)
projects = join(",", local.project_id_list) projects = join(",", local.project_id_list)
monitoring_project = var.monitoring_project_id == "" ? module.project-monitoring[0].project_id : var.monitoring_project_id
} }
################################################ ################################################
@ -24,6 +25,7 @@ locals {
################################################ ################################################
module "project-monitoring" { module "project-monitoring" {
count = var.monitoring_project_id == "" ? 1 : 0
source = "../../../modules/project" source = "../../../modules/project"
name = "monitoring" name = "monitoring"
parent = "organizations/${var.organization_id}" parent = "organizations/${var.organization_id}"
@ -38,7 +40,7 @@ module "project-monitoring" {
module "service-account-function" { module "service-account-function" {
source = "../../../modules/iam-service-account" source = "../../../modules/iam-service-account"
project_id = module.project-monitoring.project_id project_id = local.monitoring_project
name = "sa-dash" name = "sa-dash"
generate_key = false generate_key = false
@ -54,7 +56,7 @@ module "service-account-function" {
} }
iam_project_roles = { iam_project_roles = {
"${module.project-monitoring.project_id}" = [ "${local.monitoring_project}" = [
"roles/monitoring.metricWriter" "roles/monitoring.metricWriter"
] ]
} }
@ -66,7 +68,7 @@ module "service-account-function" {
module "pubsub" { module "pubsub" {
source = "../../../modules/pubsub" source = "../../../modules/pubsub"
project_id = module.project-monitoring.project_id project_id = local.monitoring_project
name = "network-dashboard-pubsub" name = "network-dashboard-pubsub"
subscriptions = { subscriptions = {
"network-dashboard-pubsub-default" = null "network-dashboard-pubsub-default" = null
@ -76,7 +78,7 @@ module "pubsub" {
} }
resource "google_cloud_scheduler_job" "job" { resource "google_cloud_scheduler_job" "job" {
project = module.project-monitoring.project_id project = local.monitoring_project
region = var.region region = var.region
name = "network-dashboard-scheduler" name = "network-dashboard-scheduler"
schedule = var.schedule_cron schedule = var.schedule_cron
@ -90,9 +92,9 @@ resource "google_cloud_scheduler_job" "job" {
module "cloud-function" { module "cloud-function" {
source = "../../../modules/cloud-function" source = "../../../modules/cloud-function"
project_id = module.project-monitoring.project_id project_id = local.monitoring_project
name = "network-dashboard-cloud-function" name = "network-dashboard-cloud-function"
bucket_name = "network-dashboard-bucket" bucket_name = "${local.monitoring_project}-network-dashboard-bucket"
bucket_config = { bucket_config = {
location = var.region location = var.region
lifecycle_delete_age = null lifecycle_delete_age = null
@ -114,7 +116,7 @@ module "cloud-function" {
environment_variables = { environment_variables = {
MONITORED_PROJECTS_LIST = local.projects MONITORED_PROJECTS_LIST = local.projects
MONITORING_PROJECT_ID = module.project-monitoring.project_id MONITORING_PROJECT_ID = local.monitoring_project
ORGANIZATION_ID = var.organization_id ORGANIZATION_ID = var.organization_id
} }
@ -133,5 +135,5 @@ module "cloud-function" {
resource "google_monitoring_dashboard" "dashboard" { resource "google_monitoring_dashboard" "dashboard" {
dashboard_json = file("${path.module}/dashboards/quotas-utilization.json") dashboard_json = file("${path.module}/dashboards/quotas-utilization.json")
project = module.project-monitoring.project_id project = local.monitoring_project
} }

View File

@ -22,8 +22,14 @@ variable "billing_account" {
description = "The ID of the billing account to associate this project with" description = "The ID of the billing account to associate this project with"
} }
variable "monitoring_project_id" {
description = "Monitoring project where the dashboard will be created and the solution deployed; a project will be created if set to empty string"
default = ""
}
variable "prefix" { variable "prefix" {
description = "Customer name to use as prefix for resources' naming" description = "Customer name to use as prefix for monitoring project"
default = ""
} }
# TODO: support folder instead of a list of projects? # TODO: support folder instead of a list of projects?
@ -38,8 +44,9 @@ variable "schedule_cron" {
} }
variable "project_monitoring_services" { variable "project_monitoring_services" {
description = "Service APIs enabled by default in new projects." description = "Service APIs enabled in the monitoring project if it will be created."
default = [ default = [
"cloudasset.googleapis.com",
"cloudbilling.googleapis.com", "cloudbilling.googleapis.com",
"cloudbuild.googleapis.com", "cloudbuild.googleapis.com",
"cloudresourcemanager.googleapis.com", "cloudresourcemanager.googleapis.com",
@ -50,29 +57,11 @@ variable "project_monitoring_services" {
"iamcredentials.googleapis.com", "iamcredentials.googleapis.com",
"logging.googleapis.com", "logging.googleapis.com",
"monitoring.googleapis.com", "monitoring.googleapis.com",
"oslogin.googleapis.com",
"servicenetworking.googleapis.com",
"serviceusage.googleapis.com", "serviceusage.googleapis.com",
] ]
} }
variable "project_vm_services" {
description = "Service APIs enabled by default in new projects."
default = [
"cloudbilling.googleapis.com",
"compute.googleapis.com",
"logging.googleapis.com",
"monitoring.googleapis.com",
"servicenetworking.googleapis.com",
]
}
variable "region" { variable "region" {
description = "Region used to deploy subnets" description = "Region used to deploy the cloud functions and scheduler"
default = "europe-west1" default = "europe-west1"
} }
variable "zone" {
description = "Zone used to deploy vms"
default = "europe-west1-b"
}