Merge branch 'master' into sql-multi-region

This commit is contained in:
lcaggio 2022-04-13 16:06:20 +02:00 committed by GitHub
commit ac213c77f2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 160 additions and 55 deletions

View File

@ -18,13 +18,14 @@ from code import interact
import os
from pickletools import int4
import time
import http
import yaml
from collections import defaultdict
from google.api import metric_pb2 as ga_metric
from google.api_core import protobuf_helpers
from google.api_core import exceptions, protobuf_helpers
from google.cloud import monitoring_v3, asset_v1
from google.protobuf import field_mask_pb2
from googleapiclient import discovery
from googleapiclient import discovery, errors
# Organization ID containing the projects to be monitored
ORGANIZATION_ID = os.environ.get("ORGANIZATION_ID")
@ -66,6 +67,9 @@ def main(event, context):
get_l4_forwarding_rules_data(
metrics_dict, l4_forwarding_rules_dict,
limits_dict['internal_forwarding_rules_l4_limit'])
get_l7_forwarding_rules_data(
metrics_dict, l7_forwarding_rules_dict,
limits_dict['internal_forwarding_rules_l7_limit'])
get_vpc_peering_data(metrics_dict,
limits_dict['number_of_vpc_peerings_limit'])
dynamic_routes_dict = get_dynamic_routes(
@ -366,6 +370,11 @@ def get_gce_instances_data(metrics_dict, gce_instance_dict, limit_dict):
current_quota_limit = get_quota_current_limit(f"projects/{project}",
metric_instances_limit)
if current_quota_limit is None:
print(
f"Could not write number of instances for projects/{project} due to missing quotas"
)
current_quota_limit_view = customize_quota_view(current_quota_limit)
for net in network_dict:
@ -519,6 +528,12 @@ def get_l4_forwarding_rules_data(metrics_dict, forwarding_rules_dict,
current_quota_limit = get_quota_current_limit(
f"projects/{project}", L4_FORWARDING_RULES_LIMIT_METRIC)
if current_quota_limit is None:
print(
f"Could not write L4 forwarding rules to metric for projects/{project} due to missing quotas"
)
continue
current_quota_limit_view = customize_quota_view(current_quota_limit)
for net in network_dict:
@ -545,6 +560,55 @@ def get_l4_forwarding_rules_data(metrics_dict, forwarding_rules_dict,
f"Wrote number of L4 forwarding rules to metric for projects/{project}")
def get_l7_forwarding_rules_data(metrics_dict, forwarding_rules_dict,
limit_dict):
'''
Gets the data for L7 Internal Forwarding Rules per VPC Network and writes it to the metric defined in forwarding_rules_metric.
Parameters:
metrics_dict (dictionary of dictionary of string: string): metrics names and descriptions.
forwarding_rules_dict (dictionary of string: int): Keys are the network links and values are the number of Forwarding Rules per network.
limit_dict (dictionary of string:int): Dictionary with the network link as key and the limit as value.
Returns:
None
'''
for project in MONITORED_PROJECTS_LIST:
network_dict = get_networks(project)
current_quota_limit = get_quota_current_limit(
f"projects/{project}", L7_FORWARDING_RULES_LIMIT_METRIC)
if current_quota_limit is None:
print(
f"Could not write number of L7 forwarding rules to metric for projects/{project} due to missing quotas"
)
continue
current_quota_limit_view = customize_quota_view(current_quota_limit)
for net in network_dict:
set_limits(net, current_quota_limit_view, limit_dict)
usage = 0
if net['self_link'] in forwarding_rules_dict:
usage = forwarding_rules_dict[net['self_link']]
write_data_to_metric(
project, usage, metrics_dict["metrics_per_network"]
["l7_forwarding_rules_per_network"]["usage"]["name"],
net['network_name'])
write_data_to_metric(
project, net['limit'], metrics_dict["metrics_per_network"]
["l7_forwarding_rules_per_network"]["limit"]["name"],
net['network_name'])
write_data_to_metric(
project, usage / net['limit'], metrics_dict["metrics_per_network"]
["l7_forwarding_rules_per_network"]["utilization"]["name"],
net['network_name'])
print(
f"Wrote number of L7 forwarding rules to metric for projects/{project}")
def get_pgg_data(metric_dict, usage_dict, limit_metric, limit_dict):
'''
This function gets the usage, limit and utilization per VPC peering group for a specific metric for all projects to be monitored.
@ -564,14 +628,25 @@ def get_pgg_data(metric_dict, usage_dict, limit_metric, limit_dict):
# project_id, network_name, network_id, usage, limit, peerings (list of peered networks)
# peerings is a list of dictionary (one for each peered network) and contains:
# project_id, network_name, network_id
current_quota_limit = get_quota_current_limit(f"projects/{project}",
limit_metric)
if current_quota_limit is None:
print(
f"Could not write number of L7 forwarding rules to metric for projects/{project} due to missing quotas"
)
continue
current_quota_limit_view = customize_quota_view(current_quota_limit)
# For each network in this GCP project
for network_dict in network_dict_list:
if network_dict['network_id'] == 0:
print(
f"Could not write {metric_dict['usage']['name']} for peering group {network_dict['network_name']} in {project} due to missing permissions."
)
continue
network_link = f"https://www.googleapis.com/compute/v1/projects/{project}/global/networks/{network_dict['network_name']}"
current_quota_limit = get_quota_current_limit(f"projects/{project}",
limit_metric)
current_quota_limit_view = customize_quota_view(current_quota_limit)
limit = get_limit_network(network_dict, network_link,
current_quota_limit_view, limit_dict)
@ -590,9 +665,15 @@ def get_pgg_data(metric_dict, usage_dict, limit_metric, limit_dict):
if peered_network_link in usage_dict:
peered_usage = usage_dict[peered_network_link]
peering_project_limit = customize_quota_view(
get_quota_current_limit(
f"projects/{peered_network_dict['project_id']}", limit_metric))
current_peered_quota_limit = get_quota_current_limit(
f"projects/{peered_network_dict['project_id']}", limit_metric)
if current_peered_quota_limit is None:
print(
f"Could not write metrics for peering to projects/{peered_network_dict['project_id']} due to missing quotas"
)
continue
peering_project_limit = customize_quota_view(current_peered_quota_limit)
peered_limit = get_limit_network(peered_network_dict,
peered_network_link,
@ -681,6 +762,11 @@ def count_effective_limit(project_id, network_dict, usage_metric_name,
# Get usage: Sums usage for current network + all peered networks
peering_group_usage = network_dict['usage']
for peered_network in network_dict['peerings']:
if 'usage' not in peered_network:
print(
f"Can not add metrics for peered network in projects/{project_id} as no usage metrics exist due to missing permissions"
)
continue
peering_group_usage += peered_network['usage']
network_link = f"https://www.googleapis.com/compute/v1/projects/{project_id}/global/networks/{network_dict['network_name']}"
@ -694,12 +780,21 @@ def count_effective_limit(project_id, network_dict, usage_metric_name,
for peered_network in network_dict['peerings']:
peered_network_link = f"https://www.googleapis.com/compute/v1/projects/{peered_network['project_id']}/global/networks/{peered_network['network_name']}"
limit_step2.append(
max(peered_network['limit'],
get_limit_ppg(peered_network_link, limit_dict)))
if 'limit' in peered_network:
limit_step2.append(
max(peered_network['limit'],
get_limit_ppg(peered_network_link, limit_dict)))
else:
print(
f"Ignoring projects/{peered_network['project_id']} for limits in peering group of project {project_id} as no limits are available."
+
"This can happen if you don't have permissions on the project, for example if the project is in another organization or a Google managed project"
)
# Calculates effective limit: Step 3: Find minimum from the list created by Step 2
limit_step3 = min(limit_step2)
limit_step3 = 0
if len(limit_step2) > 0:
limit_step3 = min(limit_step2)
# Calculates effective limit: Step 4: Find maximum from step 1 and step 3
effective_limit = max(limit_step1, limit_step3)
@ -837,8 +932,9 @@ def get_routes_for_router(project_id, router_region, router_name):
sum_routes = 0
if 'result' in response:
for peer in response['result']['bgpPeerStatus']:
sum_routes += peer['numLearnedRoutes']
if 'bgpPeerStatus' in response['result']:
for peer in response['result']['bgpPeerStatus']:
sum_routes += peer['numLearnedRoutes']
return sum_routes
@ -939,7 +1035,18 @@ def get_network_id(project_id, network_name):
network_id (int): Network ID.
'''
request = service.networks().list(project=project_id)
response = request.execute()
try:
response = request.execute()
except errors.HttpError as err:
# TODO: log proper warning
if err.resp.status == http.HTTPStatus.FORBIDDEN:
print(
f"Warning: error reading networks for {project_id}. " +
f"This can happen if you don't have permissions on the project, for example if the project is in another organization or a Google managed project"
)
else:
print(f"Warning: error reading networks for {project_id}: {err}")
return 0
network_id = 0
@ -967,15 +1074,22 @@ def get_quota_current_limit(project_link, metric_name):
'''
client, interval = create_client()
results = client.list_time_series(
request={
"name": project_link,
"filter": f'metric.type = "{metric_name}"',
"interval": interval,
"view": monitoring_v3.ListTimeSeriesRequest.TimeSeriesView.FULL
})
results_list = list(results)
return results_list
try:
results = client.list_time_series(
request={
"name": project_link,
"filter": f'metric.type = "{metric_name}"',
"interval": interval,
"view": monitoring_v3.ListTimeSeriesRequest.TimeSeriesView.FULL
})
results_list = list(results)
return results_list
except exceptions.PermissionDenied as err:
print(
f"Warning: error reading quotas for {project_link}. " +
f"This can happen if you don't have permissions on the project, for example if the project is in another organization or a Google managed project"
)
return None
def customize_quota_view(quota_results):

View File

@ -15,8 +15,9 @@
*/
locals {
project_id_list = toset(var.monitored_projects_list)
projects = join(",", local.project_id_list)
project_id_list = toset(var.monitored_projects_list)
projects = join(",", local.project_id_list)
monitoring_project = var.monitoring_project_id == "" ? module.project-monitoring[0].project_id : var.monitoring_project_id
}
################################################
@ -24,6 +25,7 @@ locals {
################################################
module "project-monitoring" {
count = var.monitoring_project_id == "" ? 1 : 0
source = "../../../modules/project"
name = "monitoring"
parent = "organizations/${var.organization_id}"
@ -38,7 +40,7 @@ module "project-monitoring" {
module "service-account-function" {
source = "../../../modules/iam-service-account"
project_id = module.project-monitoring.project_id
project_id = local.monitoring_project
name = "sa-dash"
generate_key = false
@ -54,7 +56,7 @@ module "service-account-function" {
}
iam_project_roles = {
"${module.project-monitoring.project_id}" = [
"${local.monitoring_project}" = [
"roles/monitoring.metricWriter"
]
}
@ -66,7 +68,7 @@ module "service-account-function" {
module "pubsub" {
source = "../../../modules/pubsub"
project_id = module.project-monitoring.project_id
project_id = local.monitoring_project
name = "network-dashboard-pubsub"
subscriptions = {
"network-dashboard-pubsub-default" = null
@ -76,7 +78,7 @@ module "pubsub" {
}
resource "google_cloud_scheduler_job" "job" {
project = module.project-monitoring.project_id
project = local.monitoring_project
region = var.region
name = "network-dashboard-scheduler"
schedule = var.schedule_cron
@ -90,9 +92,9 @@ resource "google_cloud_scheduler_job" "job" {
module "cloud-function" {
source = "../../../modules/cloud-function"
project_id = module.project-monitoring.project_id
project_id = local.monitoring_project
name = "network-dashboard-cloud-function"
bucket_name = "network-dashboard-bucket"
bucket_name = "${local.monitoring_project}-network-dashboard-bucket"
bucket_config = {
location = var.region
lifecycle_delete_age = null
@ -114,7 +116,7 @@ module "cloud-function" {
environment_variables = {
MONITORED_PROJECTS_LIST = local.projects
MONITORING_PROJECT_ID = module.project-monitoring.project_id
MONITORING_PROJECT_ID = local.monitoring_project
ORGANIZATION_ID = var.organization_id
}
@ -133,5 +135,5 @@ module "cloud-function" {
resource "google_monitoring_dashboard" "dashboard" {
dashboard_json = file("${path.module}/dashboards/quotas-utilization.json")
project = module.project-monitoring.project_id
project = local.monitoring_project
}

View File

@ -22,8 +22,14 @@ variable "billing_account" {
description = "The ID of the billing account to associate this project with"
}
variable "monitoring_project_id" {
description = "Monitoring project where the dashboard will be created and the solution deployed; a project will be created if set to empty string"
default = ""
}
variable "prefix" {
description = "Customer name to use as prefix for resources' naming"
description = "Customer name to use as prefix for monitoring project"
default = ""
}
# TODO: support folder instead of a list of projects?
@ -38,8 +44,9 @@ variable "schedule_cron" {
}
variable "project_monitoring_services" {
description = "Service APIs enabled by default in new projects."
description = "Service APIs enabled in the monitoring project if it will be created."
default = [
"cloudasset.googleapis.com",
"cloudbilling.googleapis.com",
"cloudbuild.googleapis.com",
"cloudresourcemanager.googleapis.com",
@ -50,29 +57,11 @@ variable "project_monitoring_services" {
"iamcredentials.googleapis.com",
"logging.googleapis.com",
"monitoring.googleapis.com",
"oslogin.googleapis.com",
"servicenetworking.googleapis.com",
"serviceusage.googleapis.com",
]
}
variable "project_vm_services" {
description = "Service APIs enabled by default in new projects."
default = [
"cloudbilling.googleapis.com",
"compute.googleapis.com",
"logging.googleapis.com",
"monitoring.googleapis.com",
"servicenetworking.googleapis.com",
]
}
variable "region" {
description = "Region used to deploy subnets"
description = "Region used to deploy the cloud functions and scheduler"
default = "europe-west1"
}
variable "zone" {
description = "Zone used to deploy vms"
default = "europe-west1-b"
}