Refactored how limits are managed, now you can edit the metrics.yaml file to set specific metrics per network.

This commit is contained in:
Aurélien Legrand 2022-03-28 18:44:16 +02:00
parent 302afb6dcd
commit 02dc53d0d4
6 changed files with 184 additions and 248 deletions

View File

@ -43,3 +43,12 @@ The Cloud Function currently tracks usage, limit and utilization of:
- internal forwarding rules for internal L7 load balancers per VPC peering group - internal forwarding rules for internal L7 load balancers per VPC peering group
It writes this values to custom metrics in Cloud Monitoring and creates a dashboard to visualize the current utilization of these metrics in Cloud Monitoring. It writes this values to custom metrics in Cloud Monitoring and creates a dashboard to visualize the current utilization of these metrics in Cloud Monitoring.
## Next steps and ideas
In a future release, we could support:
- Static routes per VPC / per VPC peering group
- Dynamic routes per VPC / per VPC peering group
- Google managed VPCs that are peered with PSA (such as Cloud SQL or Memorystore)
- Subnet IP ranges utilization
If you are interested in this and/or would like to contribute, please contact legranda@google.com.

View File

@ -17,6 +17,7 @@
import os import os
import time import time
import yaml import yaml
from collections import defaultdict
from google.api import metric_pb2 as ga_metric from google.api import metric_pb2 as ga_metric
from google.api_core import protobuf_helpers from google.api_core import protobuf_helpers
from google.cloud import monitoring_v3, asset_v1 from google.cloud import monitoring_v3, asset_v1
@ -32,25 +33,11 @@ MONITORING_PROJECT_ID = os.environ.get("MONITORING_PROJECT_ID")
MONITORING_PROJECT_LINK = f"projects/{MONITORING_PROJECT_ID}" MONITORING_PROJECT_LINK = f"projects/{MONITORING_PROJECT_ID}"
service = discovery.build('compute', 'v1') service = discovery.build('compute', 'v1')
# DEFAULT LIMITS:
LIMIT_INSTANCES = os.environ.get("LIMIT_INSTANCES").split(",")
LIMIT_INSTANCES_PPG = os.environ.get("LIMIT_INSTANCES_PPG").split(",")
LIMIT_L4 = os.environ.get("LIMIT_L4").split(",")
LIMIT_L4_PPG = os.environ.get("LIMIT_L4_PPG").split(",")
LIMIT_L7 = os.environ.get("LIMIT_L7").split(",")
LIMIT_L7_PPG = os.environ.get("LIMIT_L7_PPG").split(",")
LIMIT_SUBNETS = os.environ.get("LIMIT_SUBNETS").split(",")
LIMIT_VPC_PEER = os.environ.get("LIMIT_VPC_PEER").split(",")
# Existing GCP metrics per network # Existing GCP metrics per network
GCE_INSTANCES_LIMIT_METRIC = "compute.googleapis.com/quota/instances_per_vpc_network/limit" GCE_INSTANCES_LIMIT_METRIC = "compute.googleapis.com/quota/instances_per_vpc_network/limit"
GCE_INSTANCES_USAGE_METRIC = "compute.googleapis.com/quota/instances_per_vpc_network/usage"
L4_FORWARDING_RULES_LIMIT_METRIC = "compute.googleapis.com/quota/internal_lb_forwarding_rules_per_vpc_network/limit" L4_FORWARDING_RULES_LIMIT_METRIC = "compute.googleapis.com/quota/internal_lb_forwarding_rules_per_vpc_network/limit"
L4_FORWARDING_RULES_USAGE_METRIC = "compute.googleapis.com/quota/internal_lb_forwarding_rules_per_vpc_network/usage"
L7_FORWARDING_RULES_LIMIT_METRIC = "compute.googleapis.com/quota/internal_managed_forwarding_rules_per_vpc_network/limit" L7_FORWARDING_RULES_LIMIT_METRIC = "compute.googleapis.com/quota/internal_managed_forwarding_rules_per_vpc_network/limit"
L7_FORWARDING_RULES_USAGE_METRIC = "compute.googleapis.com/quota/internal_managed_forwarding_rules_per_vpc_network/usage"
SUBNET_RANGES_LIMIT_METRIC = "compute.googleapis.com/quota/subnet_ranges_per_vpc_network/limit" SUBNET_RANGES_LIMIT_METRIC = "compute.googleapis.com/quota/subnet_ranges_per_vpc_network/limit"
SUBNET_RANGES_USAGE_METRIC = "compute.googleapis.com/quota/subnet_ranges_per_vpc_network/usage"
def main(event, context): def main(event, context):
@ -63,7 +50,7 @@ def main(event, context):
Returns: Returns:
'Function executed successfully' 'Function executed successfully'
''' '''
metrics_dict = create_metrics() metrics_dict, limits_dict = create_metrics()
# Asset inventory queries # Asset inventory queries
gce_instance_dict = get_gce_instance_dict() gce_instance_dict = get_gce_instance_dict()
@ -72,28 +59,28 @@ def main(event, context):
subnet_range_dict = get_subnet_ranges_dict() subnet_range_dict = get_subnet_ranges_dict()
# Per Network metrics # Per Network metrics
get_gce_instances_data(metrics_dict, gce_instance_dict) get_gce_instances_data(metrics_dict, gce_instance_dict, limits_dict['number_of_instances_limit'])
get_l4_forwarding_rules_data(metrics_dict, l4_forwarding_rules_dict) get_l4_forwarding_rules_data(metrics_dict, l4_forwarding_rules_dict, limits_dict['internal_forwarding_rules_l4_limit'])
get_vpc_peering_data(metrics_dict) get_vpc_peering_data(metrics_dict, limits_dict['number_of_vpc_peerings_limit'])
get_pgg_data( get_pgg_data(
metrics_dict["metrics_per_peering_group"]["instance_per_peering_group"], metrics_dict["metrics_per_peering_group"]["instance_per_peering_group"],
gce_instance_dict, GCE_INSTANCES_LIMIT_METRIC, LIMIT_INSTANCES_PPG) gce_instance_dict, GCE_INSTANCES_LIMIT_METRIC, limits_dict['number_of_instances_ppg_limit'])
get_pgg_data( get_pgg_data(
metrics_dict["metrics_per_peering_group"] metrics_dict["metrics_per_peering_group"]
["l4_forwarding_rules_per_peering_group"], l4_forwarding_rules_dict, ["l4_forwarding_rules_per_peering_group"], l4_forwarding_rules_dict,
L4_FORWARDING_RULES_LIMIT_METRIC, LIMIT_L4_PPG) L4_FORWARDING_RULES_LIMIT_METRIC, limits_dict['internal_forwarding_rules_l4_ppg_limit'])
get_pgg_data( get_pgg_data(
metrics_dict["metrics_per_peering_group"] metrics_dict["metrics_per_peering_group"]
["l7_forwarding_rules_per_peering_group"], l7_forwarding_rules_dict, ["l7_forwarding_rules_per_peering_group"], l7_forwarding_rules_dict,
L7_FORWARDING_RULES_LIMIT_METRIC, LIMIT_L7_PPG) L7_FORWARDING_RULES_LIMIT_METRIC, limits_dict['internal_forwarding_rules_l7_ppg_limit'])
get_pgg_data( get_pgg_data(
metrics_dict["metrics_per_peering_group"] metrics_dict["metrics_per_peering_group"]
["subnet_ranges_per_peering_group"], subnet_range_dict, ["subnet_ranges_per_peering_group"], subnet_range_dict,
SUBNET_RANGES_LIMIT_METRIC, LIMIT_SUBNETS) SUBNET_RANGES_LIMIT_METRIC, limits_dict['number_of_subnet_IP_ranges_limit'])
return 'Function executed successfully' return 'Function executed successfully'
@ -112,7 +99,7 @@ def get_l4_forwarding_rules_dict():
read_mask = field_mask_pb2.FieldMask() read_mask = field_mask_pb2.FieldMask()
read_mask.FromJsonString('name,versionedResources') read_mask.FromJsonString('name,versionedResources')
forwarding_rules_dict = {} forwarding_rules_dict = defaultdict(int)
response = client.search_all_resources( response = client.search_all_resources(
request={ request={
@ -152,7 +139,7 @@ def get_l7_forwarding_rules_dict():
read_mask = field_mask_pb2.FieldMask() read_mask = field_mask_pb2.FieldMask()
read_mask.FromJsonString('name,versionedResources') read_mask.FromJsonString('name,versionedResources')
forwarding_rules_dict = {} forwarding_rules_dict = defaultdict(int)
response = client.search_all_resources( response = client.search_all_resources(
request={ request={
@ -189,7 +176,7 @@ def get_gce_instance_dict():
''' '''
client = asset_v1.AssetServiceClient() client = asset_v1.AssetServiceClient()
gce_instance_dict = {} gce_instance_dict = defaultdict(int)
response = client.search_all_resources( response = client.search_all_resources(
request={ request={
@ -218,7 +205,7 @@ def get_subnet_ranges_dict():
subnet_range_dict (dictionary of string: int): Keys are the network links and values are the number of subnet ranges per network. subnet_range_dict (dictionary of string: int): Keys are the network links and values are the number of subnet ranges per network.
''' '''
client = asset_v1.AssetServiceClient() client = asset_v1.AssetServiceClient()
subnet_range_dict = {} subnet_range_dict = defaultdict(int)
read_mask = field_mask_pb2.FieldMask() read_mask = field_mask_pb2.FieldMask()
read_mask.FromJsonString('name,versionedResources') read_mask.FromJsonString('name,versionedResources')
@ -280,10 +267,21 @@ def create_client():
def create_metrics(): def create_metrics():
'''
Creates all Cloud Monitoring custom metrics based on the metric.yaml file
Parameters:
None
Returns:
metrics_dict (dictionary of dictionary of string: string): metrics names and descriptions
limits_dict (dictionary of dictionary of string: int): limits_dict[metric_name]: dict[network_name] = limit_value
'''
client = monitoring_v3.MetricServiceClient() client = monitoring_v3.MetricServiceClient()
existing_metrics = [] existing_metrics = []
for desc in client.list_metric_descriptors(name=MONITORING_PROJECT_LINK): for desc in client.list_metric_descriptors(name=MONITORING_PROJECT_LINK):
existing_metrics.append(desc.type) existing_metrics.append(desc.type)
limits_dict = {}
with open("metrics.yaml", 'r') as stream: with open("metrics.yaml", 'r') as stream:
try: try:
@ -291,13 +289,19 @@ def create_metrics():
for metric_list in metrics_dict.values(): for metric_list in metrics_dict.values():
for metric in metric_list.values(): for metric in metric_list.values():
for sub_metric in metric.values(): for sub_metric_key, sub_metric in metric.items():
metric_link = f"custom.googleapis.com/{sub_metric['name']}" metric_link = f"custom.googleapis.com/{sub_metric['name']}"
# If the metric doesn't exist yet, then we create it # If the metric doesn't exist yet, then we create it
if metric_link not in existing_metrics: if metric_link not in existing_metrics:
create_metric(sub_metric["name"], sub_metric["description"]) create_metric(sub_metric["name"], sub_metric["description"])
# Parse limits (both default values and network specific ones)
if sub_metric_key == "limit":
limits_dict_for_metric = {}
for network_link, limit_value in sub_metric["values"].items():
limits_dict_for_metric[network_link] = limit_value
limits_dict[sub_metric["name"]] = limits_dict_for_metric
return metrics_dict return metrics_dict, limits_dict
except yaml.YAMLError as exc: except yaml.YAMLError as exc:
print(exc) print(exc)
@ -325,13 +329,14 @@ def create_metric(metric_name, description):
print("Created {}.".format(descriptor.name)) print("Created {}.".format(descriptor.name))
def get_gce_instances_data(metrics_dict, gce_instance_dict): def get_gce_instances_data(metrics_dict, gce_instance_dict, limit_dict):
''' '''
Gets the data for GCE instances per VPC Network and writes it to the metric defined in instance_metric. Gets the data for GCE instances per VPC Network and writes it to the metric defined in instance_metric.
Parameters: Parameters:
metrics_dict (dictionary of dictionary of string: string): metrics names and descriptions metrics_dict (dictionary of dictionary of string: string): metrics names and descriptions
gce_instance_dict (dictionary of string: int): Keys are the network links and values are the number of GCE Instances per network. gce_instance_dict (dictionary of string: int): Keys are the network links and values are the number of GCE Instances per network.
limit_dict (dictionary of string:int): Dictionary with the network link as key and the limit as value
Returns: Returns:
gce_instance_dict gce_instance_dict
''' '''
@ -346,9 +351,9 @@ def get_gce_instances_data(metrics_dict, gce_instance_dict):
current_quota_limit_view = customize_quota_view(current_quota_limit) current_quota_limit_view = customize_quota_view(current_quota_limit)
for net in network_dict: for net in network_dict:
set_limits(net, current_quota_limit_view, LIMIT_INSTANCES) set_limits(net, current_quota_limit_view, limit_dict)
network_link = f"https://www.googleapis.com/compute/v1/projects/{project}/global/networks/{net['network name']}" network_link = f"https://www.googleapis.com/compute/v1/projects/{project}/global/networks/{net['network_name']}"
usage = 0 usage = 0
if network_link in gce_instance_dict: if network_link in gce_instance_dict:
@ -356,29 +361,29 @@ def get_gce_instances_data(metrics_dict, gce_instance_dict):
write_data_to_metric( write_data_to_metric(
project, usage, metrics_dict["metrics_per_network"] project, usage, metrics_dict["metrics_per_network"]
["instance_per_network"]["usage"]["name"], net['network name']) ["instance_per_network"]["usage"]["name"], net['network_name'])
write_data_to_metric( write_data_to_metric(
project, net['limit'], metrics_dict["metrics_per_network"] project, net['limit'], metrics_dict["metrics_per_network"]
["instance_per_network"]["limit"]["name"], net['network name']) ["instance_per_network"]["limit"]["name"], net['network_name'])
write_data_to_metric( write_data_to_metric(
project, usage / net['limit'], metrics_dict["metrics_per_network"] project, usage / net['limit'], metrics_dict["metrics_per_network"]
["instance_per_network"]["utilization"]["name"], net['network name']) ["instance_per_network"]["utilization"]["name"], net['network_name'])
print(f"Wrote number of instances to metric for projects/{project}") print(f"Wrote number of instances to metric for projects/{project}")
def get_vpc_peering_data(metrics_dict): def get_vpc_peering_data(metrics_dict, limit_dict):
''' '''
Gets the data for VPC peerings (active or not) and writes it to the metric defined (vpc_peering_active_metric and vpc_peering_metric). Gets the data for VPC peerings (active or not) and writes it to the metric defined (vpc_peering_active_metric and vpc_peering_metric).
Parameters: Parameters:
metrics_dict (dictionary of dictionary of string: string): metrics names and descriptions metrics_dict (dictionary of dictionary of string: string): metrics names and descriptions
limit_dict (dictionary of string:int): Dictionary with the network link as key and the limit as value
Returns: Returns:
None None
''' '''
for project in MONITORED_PROJECTS_LIST: for project in MONITORED_PROJECTS_LIST:
active_vpc_peerings, vpc_peerings = gather_vpc_peerings_data( active_vpc_peerings, vpc_peerings = gather_vpc_peerings_data(project, limit_dict)
project, LIMIT_VPC_PEER)
for peering in active_vpc_peerings: for peering in active_vpc_peerings:
write_data_to_metric( write_data_to_metric(
project, peering['active_peerings'], project, peering['active_peerings'],
@ -409,13 +414,13 @@ def get_vpc_peering_data(metrics_dict):
print("Wrote number of VPC peerings to custom metric for project:", project) print("Wrote number of VPC peerings to custom metric for project:", project)
def gather_vpc_peerings_data(project_id, limit_list): def gather_vpc_peerings_data(project_id, limit_dict):
''' '''
Gets the data for all VPC peerings (active or not) in project_id and writes it to the metric defined in vpc_peering_active_metric and vpc_peering_metric. Gets the data for all VPC peerings (active or not) in project_id and writes it to the metric defined in vpc_peering_active_metric and vpc_peering_metric.
Parameters: Parameters:
project_id (string): We will take all VPCs in that project_id and look for all peerings to these VPCs. project_id (string): We will take all VPCs in that project_id and look for all peerings to these VPCs.
limit_list (list of string): Used to get the limit per VPC or the default limit. limit_dict (dictionary of string:int): Dictionary with the network link as key and the limit as value
Returns: Returns:
active_peerings_dict (dictionary of string: string): Contains project_id, network_name, network_limit for each active VPC peering. active_peerings_dict (dictionary of string: string): Contains project_id, network_name, network_limit for each active VPC peering.
peerings_dict (dictionary of string: string): Contains project_id, network_name, network_limit for each VPC peering. peerings_dict (dictionary of string: string): Contains project_id, network_name, network_limit for each VPC peering.
@ -438,68 +443,70 @@ def gather_vpc_peerings_data(project_id, limit_list):
peerings_count = 0 peerings_count = 0
active_peerings_count = 0 active_peerings_count = 0
network_link = f"https://www.googleapis.com/compute/v1/projects/{project_id}/global/networks/{network['name']}"
network_limit = get_limit_ppg(network_link, limit_dict)
active_d = { active_d = {
'project_id': project_id, 'project_id': project_id,
'network_name': network['name'], 'network_name': network['name'],
'active_peerings': active_peerings_count, 'active_peerings': active_peerings_count,
'network_limit': get_limit(network['name'], limit_list) 'network_limit': network_limit
} }
active_peerings_dict.append(active_d) active_peerings_dict.append(active_d)
d = { d = {
'project_id': project_id, 'project_id': project_id,
'network_name': network['name'], 'network_name': network['name'],
'peerings': peerings_count, 'peerings': peerings_count,
'network_limit': get_limit(network['name'], limit_list) 'network_limit': network_limit
} }
peerings_dict.append(d) peerings_dict.append(d)
return active_peerings_dict, peerings_dict return active_peerings_dict, peerings_dict
def get_limit(network_name, limit_list): def get_limit_ppg(network_link, limit_dict):
''' '''
Checks if this network has a specific limit for a metric, if so, returns that limit, if not, returns the default limit. Checks if this network has a specific limit for a metric, if so, returns that limit, if not, returns the default limit.
Parameters: Parameters:
network_name (string): Name of the VPC network. network_link (string): VPC network link.
limit_list (list of string): Used to get the limit per VPC or the default limit. limit_list (list of string): Used to get the limit per VPC or the default limit.
Returns: Returns:
limit (int): Limit for that VPC and that metric. limit_dict (dictionary of string:int): Dictionary with the network link as key and the limit as value
''' '''
if network_name in limit_list: if network_link in limit_dict:
return int(limit_list[limit_list.index(network_name) + 1]) return limit_dict[network_link]
else: else:
if 'default_value' in limit_list: if 'default_value' in limit_dict:
return int(limit_list[limit_list.index('default_value') + 1]) return limit_dict['default_value']
else: else:
print(f"Error: limit not found for {network_link}")
return 0 return 0
def get_l4_forwarding_rules_data(metrics_dict, forwarding_rules_dict): def get_l4_forwarding_rules_data(metrics_dict, forwarding_rules_dict, limit_dict):
''' '''
Gets the data for L4 Internal Forwarding Rules per VPC Network and writes it to the metric defined in forwarding_rules_metric. Gets the data for L4 Internal Forwarding Rules per VPC Network and writes it to the metric defined in forwarding_rules_metric.
Parameters: Parameters:
metrics_dict (dictionary of dictionary of string: string): metrics names and descriptions metrics_dict (dictionary of dictionary of string: string): metrics names and descriptions.
forwarding_rules_dict (dictionary of string: int): Keys are the network links and values are the number of Forwarding Rules per network. forwarding_rules_dict (dictionary of string: int): Keys are the network links and values are the number of Forwarding Rules per network.
limit_dict (dictionary of string:int): Dictionary with the network link as key and the limit as value.
Returns: Returns:
None None
''' '''
# Existing GCP Monitoring metrics for L4 Forwarding Rules
l4_forwarding_rules_limit = "compute.googleapis.com/quota/internal_lb_forwarding_rules_per_vpc_network/limit"
for project in MONITORED_PROJECTS_LIST: for project in MONITORED_PROJECTS_LIST:
network_dict = get_networks(project) network_dict = get_networks(project)
current_quota_limit = get_quota_current_limit(f"projects/{project}", current_quota_limit = get_quota_current_limit(f"projects/{project}",
l4_forwarding_rules_limit) L4_FORWARDING_RULES_LIMIT_METRIC)
current_quota_limit_view = customize_quota_view(current_quota_limit) current_quota_limit_view = customize_quota_view(current_quota_limit)
for net in network_dict: for net in network_dict:
set_limits(net, current_quota_limit_view, LIMIT_L4) set_limits(net, current_quota_limit_view, limit_dict)
network_link = f"https://www.googleapis.com/compute/v1/projects/{project}/global/networks/{net['network name']}" network_link = f"https://www.googleapis.com/compute/v1/projects/{project}/global/networks/{net['network_name']}"
usage = 0 usage = 0
if network_link in forwarding_rules_dict: if network_link in forwarding_rules_dict:
@ -508,21 +515,21 @@ def get_l4_forwarding_rules_data(metrics_dict, forwarding_rules_dict):
write_data_to_metric( write_data_to_metric(
project, usage, metrics_dict["metrics_per_network"] project, usage, metrics_dict["metrics_per_network"]
["l4_forwarding_rules_per_network"]["usage"]["name"], ["l4_forwarding_rules_per_network"]["usage"]["name"],
net['network name']) net['network_name'])
write_data_to_metric( write_data_to_metric(
project, net['limit'], metrics_dict["metrics_per_network"] project, net['limit'], metrics_dict["metrics_per_network"]
["l4_forwarding_rules_per_network"]["limit"]["name"], ["l4_forwarding_rules_per_network"]["limit"]["name"],
net['network name']) net['network_name'])
write_data_to_metric( write_data_to_metric(
project, usage / net['limit'], metrics_dict["metrics_per_network"] project, usage / net['limit'], metrics_dict["metrics_per_network"]
["l4_forwarding_rules_per_network"]["utilization"]["name"], ["l4_forwarding_rules_per_network"]["utilization"]["name"],
net['network name']) net['network_name'])
print( print(
f"Wrote number of L4 forwarding rules to metric for projects/{project}") f"Wrote number of L4 forwarding rules to metric for projects/{project}")
def get_pgg_data(metric_dict, usage_dict, limit_metric, limit_ppg): def get_pgg_data(metric_dict, usage_dict, limit_metric, limit_dict):
''' '''
This function gets the usage, limit and utilization per VPC peering group for a specific metric for all projects to be monitored. This function gets the usage, limit and utilization per VPC peering group for a specific metric for all projects to be monitored.
@ -530,7 +537,7 @@ def get_pgg_data(metric_dict, usage_dict, limit_metric, limit_ppg):
metric_dict (dictionary of string: string): Dictionary with the metric names and description, that will be used to populate the metrics metric_dict (dictionary of string: string): Dictionary with the metric names and description, that will be used to populate the metrics
usage_metric (string): Name of the existing GCP metric for usage per VPC network. usage_metric (string): Name of the existing GCP metric for usage per VPC network.
usage_dict (dictionnary of string:int): Dictionary with the network link as key and the number of resources as value usage_dict (dictionnary of string:int): Dictionary with the network link as key and the number of resources as value
limit_ppg (list of string): List containing the limit per peering group (either VPC specific or default limit). limit_dict (dictionary of string:int): Dictionary with the network link as key and the limit as value
Returns: Returns:
None None
''' '''
@ -544,13 +551,13 @@ def get_pgg_data(metric_dict, usage_dict, limit_metric, limit_ppg):
# For each network in this GCP project # For each network in this GCP project
for network_dict in network_dict_list: for network_dict in network_dict_list:
network_link = f"https://www.googleapis.com/compute/v1/projects/{project}/global/networks/{network_dict['network_name']}"
current_quota_limit = get_quota_current_limit(f"projects/{project}", current_quota_limit = get_quota_current_limit(f"projects/{project}",
limit_metric) limit_metric)
current_quota_limit_view = customize_quota_view(current_quota_limit) current_quota_limit_view = customize_quota_view(current_quota_limit)
limit = get_limit_values(network_dict, current_quota_limit_view, limit = get_limit_network(network_dict, network_link, current_quota_limit_view,
limit_ppg) limit_dict)
network_link = f"https://www.googleapis.com/compute/v1/projects/{project}/global/networks/{network_dict['network_name']}"
usage = 0 usage = 0
if network_link in usage_dict: if network_link in usage_dict:
@ -561,25 +568,25 @@ def get_pgg_data(metric_dict, usage_dict, limit_metric, limit_ppg):
network_dict["limit"] = limit network_dict["limit"] = limit
# For every peered network, get usage and limits # For every peered network, get usage and limits
for peered_network in network_dict['peerings']: for peered_network_dict in network_dict['peerings']:
peered_network_link = f"https://www.googleapis.com/compute/v1/projects/{peered_network['project_id']}/global/networks/{peered_network['network_name']}" peered_network_link = f"https://www.googleapis.com/compute/v1/projects/{peered_network_dict['project_id']}/global/networks/{peered_network_dict['network_name']}"
peered_usage = 0 peered_usage = 0
if peered_network_link in usage_dict: if peered_network_link in usage_dict:
peered_usage = usage_dict[peered_network_link] peered_usage = usage_dict[peered_network_link]
peering_project_limit = customize_quota_view( peering_project_limit = customize_quota_view(
get_quota_current_limit(f"projects/{peered_network['project_id']}", get_quota_current_limit(f"projects/{peered_network_dict['project_id']}",
limit_metric)) limit_metric))
peered_limit = get_limit_values(peered_network, peering_project_limit, peered_limit = get_limit_network(peered_network_dict, peered_network_link, peering_project_limit,
limit_ppg) limit_dict)
# Here we add usage and limit to the peered network dictionary # Here we add usage and limit to the peered network dictionary
peered_network["usage"] = peered_usage peered_network_dict["usage"] = peered_usage
peered_network["limit"] = peered_limit peered_network_dict["limit"] = peered_limit
count_effective_limit(project, network_dict, metric_dict["usage"]["name"], count_effective_limit(project, network_dict, metric_dict["usage"]["name"],
metric_dict["limit"]["name"], metric_dict["limit"]["name"],
metric_dict["utilization"]["name"], limit_ppg) metric_dict["utilization"]["name"], limit_dict)
print( print(
f"Wrote {metric_dict['usage']['name']} to metric for peering group {network_dict['network_name']} in {project}" f"Wrote {metric_dict['usage']['name']} to metric for peering group {network_dict['network_name']} in {project}"
) )
@ -587,7 +594,7 @@ def get_pgg_data(metric_dict, usage_dict, limit_metric, limit_ppg):
def count_effective_limit(project_id, network_dict, usage_metric_name, def count_effective_limit(project_id, network_dict, usage_metric_name,
limit_metric_name, utilization_metric_name, limit_metric_name, utilization_metric_name,
limit_ppg): limit_dict):
''' '''
Calculates the effective limits (using algorithm in the link below) for peering groups and writes data (usage, limit, utilization) to the custom metrics. Calculates the effective limits (using algorithm in the link below) for peering groups and writes data (usage, limit, utilization) to the custom metrics.
Source: https://cloud.google.com/vpc/docs/quota#vpc-peering-effective-limit Source: https://cloud.google.com/vpc/docs/quota#vpc-peering-effective-limit
@ -598,7 +605,7 @@ def count_effective_limit(project_id, network_dict, usage_metric_name,
usage_metric_name (string): Name of the custom metric to be populated for usage per VPC peering group. usage_metric_name (string): Name of the custom metric to be populated for usage per VPC peering group.
limit_metric_name (string): Name of the custom metric to be populated for limit per VPC peering group. limit_metric_name (string): Name of the custom metric to be populated for limit per VPC peering group.
utilization_metric_name (string): Name of the custom metric to be populated for utilization per VPC peering group. utilization_metric_name (string): Name of the custom metric to be populated for utilization per VPC peering group.
limit_ppg (list of string): List containing the limit per peering group (either VPC specific or default limit). limit_dict (dictionary of string:int): Dictionary containing the limit per peering group (either VPC specific or default limit).
Returns: Returns:
None None
''' '''
@ -611,16 +618,19 @@ def count_effective_limit(project_id, network_dict, usage_metric_name,
for peered_network in network_dict['peerings']: for peered_network in network_dict['peerings']:
peering_group_usage += peered_network['usage'] peering_group_usage += peered_network['usage']
network_link = f"https://www.googleapis.com/compute/v1/projects/{project_id}/global/networks/{network_dict['network_name']}"
# Calculates effective limit: Step 1: max(per network limit, per network_peering_group limit) # Calculates effective limit: Step 1: max(per network limit, per network_peering_group limit)
limit_step1 = max(network_dict['limit'], limit_step1 = max(network_dict['limit'], get_limit_ppg(network_link, limit_dict))
get_limit(network_dict['network_name'], limit_ppg))
# Calculates effective limit: Step 2: List of max(per network limit, per network_peering_group limit) for each peered network # Calculates effective limit: Step 2: List of max(per network limit, per network_peering_group limit) for each peered network
limit_step2 = [] limit_step2 = []
for peered_network in network_dict['peerings']: for peered_network in network_dict['peerings']:
peered_network_link = f"https://www.googleapis.com/compute/v1/projects/{peered_network['project_id']}/global/networks/{peered_network['network_name']}"
limit_step2.append( limit_step2.append(
max(peered_network['limit'], max(peered_network['limit'],
get_limit(peered_network['network_name'], limit_ppg))) get_limit_ppg(peered_network_link, limit_dict)))
# Calculates effective limit: Step 3: Find minimum from the list created by Step 2 # Calculates effective limit: Step 3: Find minimum from the list created by Step 2
limit_step3 = min(limit_step2) limit_step3 = min(limit_step2)
@ -636,7 +646,6 @@ def count_effective_limit(project_id, network_dict, usage_metric_name,
write_data_to_metric(project_id, utilization, utilization_metric_name, write_data_to_metric(project_id, utilization, utilization_metric_name,
network_dict['network_name']) network_dict['network_name'])
def get_networks(project_id): def get_networks(project_id):
''' '''
Returns a dictionary of all networks in a project. Returns a dictionary of all networks in a project.
@ -651,12 +660,32 @@ def get_networks(project_id):
network_dict = [] network_dict = []
if 'items' in response: if 'items' in response:
for network in response['items']: for network in response['items']:
NETWORK = network['name'] network_name = network['name']
ID = network['id'] network_id = network['id']
d = {'project_id': project_id, 'network name': NETWORK, 'network id': ID} d = {'project_id': project_id, 'network_name': network_name, 'network_id': network_id}
network_dict.append(d) network_dict.append(d)
return network_dict return network_dict
# TODO: list all routers (https://cloud.google.com/compute/docs/reference/rest/v1/routers/list) then https://cloud.google.com/compute/docs/reference/rest/v1/routers/getRouterStatus
def get_routes(project_id):
'''
Returns a dictionary of all dynamic routes in a project.
Parameters:
project_id (string): Project ID for the project containing the networks.
Returns:
network_dict (dictionary of string: string): Contains the project_id, network_name(s) and network_id(s)
'''
request = service.routers().list(project=project_id)
response = request.execute()
network_dict = []
if 'items' in response:
for router in response['items']:
network_name = router['name']
network_id = router['id']
d = {'project_id': project_id, 'network name': network_name, 'network id': network_id}
network_dict.append(d)
return network_dict
def gather_peering_data(project_id): def gather_peering_data(project_id):
''' '''
@ -729,30 +758,6 @@ def get_network_id(project_id, network_name):
return network_id return network_id
def get_quota_current_usage(project_link, metric_name):
'''
Retrieves quota usage for a specific metric.
Parameters:
project_link (string): Project link.
metric_name (string): Name of the metric.
Returns:
results_list (list of string): Current usage.
'''
client, interval = create_client()
results = client.list_time_series(
request={
"name": project_link,
"filter": f'metric.type = "{metric_name}"',
"interval": interval,
"view": monitoring_v3.ListTimeSeriesRequest.TimeSeriesView.FULL
})
results_list = list(results)
return (results_list)
def get_quota_current_limit(project_link, metric_name): def get_quota_current_limit(project_link, metric_name):
''' '''
Retrieves limit for a specific metric. Retrieves limit for a specific metric.
@ -796,76 +801,64 @@ def customize_quota_view(quota_results):
return quotaViewList return quotaViewList
def set_limits(network_dict, quota_limit, limit_list): def set_limits(network_dict, quota_limit, limit_dict):
''' '''
Updates the network dictionary with quota limit values. Updates the network dictionary with quota limit values.
Parameters: Parameters:
network_dict (dictionary of string: string): Contains network information. network_dict (dictionary of string: string): Contains network information.
quota_limit (list of dictionaries of string: string): Current quota limit. quota_limit (list of dictionaries of string: string): Current quota limit.
limit_list (list of string): List containing the limit per VPC (either VPC specific or default limit). limit_dict (dictionary of string:int): Dictionary with the network link as key and the limit as value
Returns: Returns:
None None
''' '''
network_dict['limit'] = None
if quota_limit: if quota_limit:
for net in quota_limit: for net in quota_limit:
if net['network_id'] == network_dict[ if net['network_id'] == network_dict[
'network id']: # if network ids in GCP quotas and in dictionary (using API) are the same 'network_id']:
network_dict['limit'] = net['value'] # set network limit in dictionary network_dict['limit'] = net['value']
break return
else:
if network_dict[ network_link = f"https://www.googleapis.com/compute/v1/projects/{network_dict['project_id']}/global/networks/{network_dict['network_name']}"
'network name'] in limit_list: # if network limit is in the environmental variables
network_dict['limit'] = int( if network_link in limit_dict:
limit_list[limit_list.index(network_dict['network name']) + 1]) network_dict['limit'] = limit_dict[network_link]
else: else:
network_dict['limit'] = int( if 'default_value' in limit_dict:
limit_list[limit_list.index('default_value') + network_dict['limit'] = limit_dict['default_value']
1]) # set default value
else: # if quotas does not appear in GCP quotas
if network_dict['network name'] in limit_list:
network_dict['limit'] = int(
limit_list[limit_list.index(network_dict['network name']) +
1]) # ["default", 100, "networkname", 200]
else: else:
network_dict['limit'] = int(limit_list[limit_list.index('default_value') + print(f"Error: Couldn't find limit for {network_link}")
1]) network_dict['limit'] = 0
def get_limit_network(network_dict, network_link, quota_limit, limit_dict):
def get_limit_values(network, quota_limit, limit_list):
''' '''
Returns uslimit for a specific network and metric. Returns limit for a specific network and metric, using the GCP quota metrics or the values in the yaml file if not found.
Parameters: Parameters:
network_dict (dictionary of string: string): Contains network information. network_dict (dictionary of string: string): Contains network information.
network_link (string): Contains network link
quota_limit (list of dictionaries of string: string): Current quota limit for all networks in that project. quota_limit (list of dictionaries of string: string): Current quota limit for all networks in that project.
limit_list (list of string): List containing the limit per VPC (either VPC specific or default limit). limit_dict (dictionary of string:int): Dictionary with the network link as key and the limit as value
Returns: Returns:
limit (int): Current limit for that network. limit (int): Current limit for that network.
''' '''
limit = 0
if quota_limit: if quota_limit:
for net in quota_limit: for net in quota_limit:
if net['network_id'] == network[ if net['network_id'] == network_dict['network_id']:
'network_id']: # if network ids in GCP quotas and in dictionary (using API) are the same return net['value']
limit = net['value'] # set network limit in dictionary
break
else:
if network[
'network_name'] in limit_list: # if network limit is in the environmental variables
limit = int(limit_list[limit_list.index(network['network_name']) + 1])
else:
limit = int(limit_list[limit_list.index('default_value') +
1]) # set default value
else: # if quotas does not appear in GCP quotas
if network['network_name'] in limit_list:
limit = int(limit_list[limit_list.index(network['network_name']) +
1]) # ["default", 100, "networkname", 200]
else:
limit = int(limit_list[limit_list.index('default_value') + 1])
return limit if network_link in limit_dict:
return limit_dict[network_link]
else:
if 'default_value' in limit_dict:
return limit_dict['default_value']
else:
print(f"Error: Couldn't find limit for {network_link}")
return 0
def write_data_to_metric(monitored_project_id, value, metric_name, def write_data_to_metric(monitored_project_id, value, metric_name,
@ -906,4 +899,9 @@ def write_data_to_metric(monitored_project_id, value, metric_name,
}) })
series.points = [point] series.points = [point]
client.create_time_series(name=MONITORING_PROJECT_LINK, time_series=[series]) # TODO: sometimes this cashes with 'DeadlineExceeded: 504 Deadline expired before operation could complete' error
# Implement exponential backoff retries?
try:
client.create_time_series(name=MONITORING_PROJECT_LINK, time_series=[series])
except Exception as e:
print(e)

View File

@ -22,6 +22,8 @@ metrics_per_network:
limit: limit:
name: number_of_instances_limit name: number_of_instances_limit
description: Number of instances per VPC network - limit. description: Number of instances per VPC network - limit.
values:
default_value: 15000
utilization: utilization:
name: number_of_instances_utilization name: number_of_instances_utilization
description: Number of instances per VPC network - utilization. description: Number of instances per VPC network - utilization.
@ -32,6 +34,8 @@ metrics_per_network:
limit: limit:
name: number_of_active_vpc_peerings_limit name: number_of_active_vpc_peerings_limit
description: Number of active VPC Peerings per VPC - limit. description: Number of active VPC Peerings per VPC - limit.
values:
default_value: 25
utilization: utilization:
name: number_of_active_vpc_peerings_utilization name: number_of_active_vpc_peerings_utilization
description: Number of active VPC Peerings per VPC - utilization. description: Number of active VPC Peerings per VPC - utilization.
@ -42,6 +46,9 @@ metrics_per_network:
limit: limit:
name: number_of_vpc_peerings_limit name: number_of_vpc_peerings_limit
description: Number of VPC Peerings per VPC - limit. description: Number of VPC Peerings per VPC - limit.
values:
default_value: 25
https://www.googleapis.com/compute/v1/projects/net-dash-test-host-prod/global/networks/vpc-prod: 40
utilization: utilization:
name: number_of_vpc_peerings_utilization name: number_of_vpc_peerings_utilization
description: Number of VPC Peerings per VPC - utilization. description: Number of VPC Peerings per VPC - utilization.
@ -52,6 +59,8 @@ metrics_per_network:
limit: limit:
name: internal_forwarding_rules_l4_limit name: internal_forwarding_rules_l4_limit
description: Number of Internal Forwarding Rules for Internal L4 Load Balancers - limit. description: Number of Internal Forwarding Rules for Internal L4 Load Balancers - limit.
values:
default_value: 75
utilization: utilization:
name: internal_forwarding_rules_l4_utilization name: internal_forwarding_rules_l4_utilization
description: Number of Internal Forwarding Rules for Internal L4 Load Balancers - utilization. description: Number of Internal Forwarding Rules for Internal L4 Load Balancers - utilization.
@ -62,6 +71,8 @@ metrics_per_network:
limit: limit:
name: internal_forwarding_rules_l7_limit name: internal_forwarding_rules_l7_limit
description: Number of Internal Forwarding Rules for Internal L7 Load Balancers per network - effective limit. description: Number of Internal Forwarding Rules for Internal L7 Load Balancers per network - effective limit.
values:
default_value: 75
utilization: utilization:
name: internal_forwarding_rules_l7_utilization name: internal_forwarding_rules_l7_utilization
description: Number of Internal Forwarding Rules for Internal L7 Load Balancers per Vnetwork - utilization. description: Number of Internal Forwarding Rules for Internal L7 Load Balancers per Vnetwork - utilization.
@ -73,6 +84,8 @@ metrics_per_peering_group:
limit: limit:
name: internal_forwarding_rules_l4_ppg_limit name: internal_forwarding_rules_l4_ppg_limit
description: Number of Internal Forwarding Rules for Internal L4 Load Balancers per VPC peering group - effective limit. description: Number of Internal Forwarding Rules for Internal L4 Load Balancers per VPC peering group - effective limit.
values:
default_value: 175
utilization: utilization:
name: internal_forwarding_rules_l4_ppg_utilization name: internal_forwarding_rules_l4_ppg_utilization
description: Number of Internal Forwarding Rules for Internal L4 Load Balancers per VPC peering group - utilization. description: Number of Internal Forwarding Rules for Internal L4 Load Balancers per VPC peering group - utilization.
@ -83,18 +96,22 @@ metrics_per_peering_group:
limit: limit:
name: internal_forwarding_rules_l7_ppg_limit name: internal_forwarding_rules_l7_ppg_limit
description: Number of Internal Forwarding Rules for Internal L7 Load Balancers per VPC peering group - effective limit. description: Number of Internal Forwarding Rules for Internal L7 Load Balancers per VPC peering group - effective limit.
values:
default_value: 175
utilization: utilization:
name: internal_forwarding_rules_l7_ppg_utilization name: internal_forwarding_rules_l7_ppg_utilization
description: Number of Internal Forwarding Rules for Internal L7 Load Balancers per VPC peering group - utilization. description: Number of Internal Forwarding Rules for Internal L7 Load Balancers per VPC peering group - utilization.
subnet_ranges_per_peering_group: subnet_ranges_per_peering_group:
usage: usage:
name: number_of_subnet_IP_ranges_usage name: number_of_subnet_IP_ranges_ppg_usage
description: Number of Subnet Ranges per peering group - usage. description: Number of Subnet Ranges per peering group - usage.
limit: limit:
name: number_of_subnet_IP_ranges_limit name: number_of_subnet_IP_ranges_ppg_limit
description: Number of Subnet Ranges per peering group - effective limit. description: Number of Subnet Ranges per peering group - effective limit.
values:
default_value: 400
utilization: utilization:
name: number_of_subnet_IP_ranges_utilization name: number_of_subnet_IP_ranges_ppg_utilization
description: Number of Subnet Ranges per peering group - utilization. description: Number of Subnet Ranges per peering group - utilization.
instance_per_peering_group: instance_per_peering_group:
usage: usage:
@ -103,6 +120,8 @@ metrics_per_peering_group:
limit: limit:
name: number_of_instances_ppg_limit name: number_of_instances_ppg_limit
description: Number of instances per peering group - limit. description: Number of instances per peering group - limit.
values:
default_value: 15500
utilization: utilization:
name: number_of_instances_ppg_utilization name: number_of_instances_ppg_utilization
description: Number of instances per peering group - utilization. description: Number of instances per peering group - utilization.

View File

@ -196,7 +196,7 @@
{ {
"height": 4, "height": 4,
"widget": { "widget": {
"title": "number_of_subnet_IP_ranges_utilization", "title": "number_of_subnet_IP_ranges_ppg_utilization",
"xyChart": { "xyChart": {
"chartOptions": { "chartOptions": {
"mode": "COLOR" "mode": "COLOR"
@ -212,7 +212,7 @@
"alignmentPeriod": "3600s", "alignmentPeriod": "3600s",
"perSeriesAligner": "ALIGN_NEXT_OLDER" "perSeriesAligner": "ALIGN_NEXT_OLDER"
}, },
"filter": "metric.type=\"custom.googleapis.com/number_of_subnet_IP_ranges_utilization\" resource.type=\"global\"", "filter": "metric.type=\"custom.googleapis.com/number_of_subnet_IP_ranges_ppg_utilization\" resource.type=\"global\"",
"secondaryAggregation": { "secondaryAggregation": {
"alignmentPeriod": "3600s", "alignmentPeriod": "3600s",
"perSeriesAligner": "ALIGN_MEAN" "perSeriesAligner": "ALIGN_MEAN"

View File

@ -17,23 +17,6 @@
locals { locals {
project_id_list = toset(var.monitored_projects_list) project_id_list = toset(var.monitored_projects_list)
projects = join(",", local.project_id_list) projects = join(",", local.project_id_list)
limit_instances = join(",", local.limit_instances_list)
limit_instances_list = tolist(var.limit_instances)
limit_instances_ppg = join(",", local.limit_instances_ppg_list)
limit_instances_ppg_list = tolist(var.limit_instances_ppg)
limit_l4 = join(",", local.limit_l4_list)
limit_l4_list = tolist(var.limit_l4)
limit_l4_ppg = join(",", local.limit_l4_ppg_list)
limit_l4_ppg_list = tolist(var.limit_l4_ppg)
limit_l7 = join(",", local.limit_l7_list)
limit_l7_list = tolist(var.limit_l7)
limit_l7_ppg = join(",", local.limit_l7_ppg_list)
limit_l7_ppg_list = tolist(var.limit_l7_ppg)
limit_subnets = join(",", local.limit_subnets_list)
limit_subnets_list = tolist(var.limit_subnets)
limit_vpc_peer = join(",", local.limit_vpc_peer_list)
limit_vpc_peer_list = tolist(var.limit_vpc_peer)
} }
################################################ ################################################
@ -130,14 +113,6 @@ module "cloud-function" {
} }
environment_variables = { environment_variables = {
LIMIT_INSTANCES = local.limit_instances
LIMIT_INSTANCES_PPG = local.limit_instances_ppg
LIMIT_L4 = local.limit_l4
LIMIT_L4_PPG = local.limit_l4_ppg
LIMIT_L7 = local.limit_l7
LIMIT_L7_PPG = local.limit_l7_ppg
LIMIT_SUBNETS = local.limit_subnets
LIMIT_VPC_PEER = local.limit_vpc_peer
MONITORED_PROJECTS_LIST = local.projects MONITORED_PROJECTS_LIST = local.projects
MONITORING_PROJECT_ID = module.project-monitoring.project_id MONITORING_PROJECT_ID = module.project-monitoring.project_id
ORGANIZATION_ID = var.organization_id ORGANIZATION_ID = var.organization_id

View File

@ -76,68 +76,3 @@ variable "zone" {
description = "Zone used to deploy vms" description = "Zone used to deploy vms"
default = "europe-west1-b" default = "europe-west1-b"
} }
variable "limit_l4" {
description = "Maximum number of forwarding rules for Internal TCP/UDP Load Balancing per network."
type = list(string)
default = [
"default_value", "75",
]
}
variable "limit_l7" {
description = "Maximum number of forwarding rules for Internal HTTP(S) Load Balancing per network."
type = list(string)
default = [
"default_value", "75",
]
}
variable "limit_subnets" {
description = "Maximum number of subnet IP ranges (primary and secondary) per peering group"
type = list(string)
default = [
"default_value", "400",
]
}
variable "limit_instances" {
description = "Maximum number of instances per network"
type = list(string)
default = [
"default_value", "15000",
]
}
variable "limit_instances_ppg" {
description = "Maximum number of instances per peering group."
type = list(string)
default = [
"default_value", "15000",
]
}
variable "limit_vpc_peer" {
description = "Maximum number of peering VPC peerings per network."
type = list(string)
default = [
"default_value", "25",
"test-vpc", "40",
]
}
variable "limit_l4_ppg" {
description = "Maximum number of forwarding rules for Internal TCP/UDP Load Balancing per network."
type = list(string)
default = [
"default_value", "175",
]
}
variable "limit_l7_ppg" {
description = "Maximum number of forwarding rules for Internal HTTP(S) Load Balancing per network."
type = list(string)
default = [
"default_value", "175",
]
}