From 02dc53d0d433d931ca254e6496b488dd5760570f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Legrand?= Date: Mon, 28 Mar 2022 18:44:16 +0200 Subject: [PATCH] Refactored how limits are managed, now you can edit the metrics.yaml file to set specific metrics per network. --- .../network-dashboard/README.md | 11 +- .../network-dashboard/cloud-function/main.py | 302 +++++++++--------- .../cloud-function/metrics.yaml | 25 +- .../dashboards/quotas-utilization.json | 4 +- .../network-dashboard/main.tf | 25 -- .../network-dashboard/variables.tf | 65 ---- 6 files changed, 184 insertions(+), 248 deletions(-) diff --git a/examples/cloud-operations/network-dashboard/README.md b/examples/cloud-operations/network-dashboard/README.md index 3e47fadf..2f958375 100644 --- a/examples/cloud-operations/network-dashboard/README.md +++ b/examples/cloud-operations/network-dashboard/README.md @@ -42,4 +42,13 @@ The Cloud Function currently tracks usage, limit and utilization of: - internal forwarding rules for internal L4 load balancers per VPC peering group - internal forwarding rules for internal L7 load balancers per VPC peering group -It writes this values to custom metrics in Cloud Monitoring and creates a dashboard to visualize the current utilization of these metrics in Cloud Monitoring. \ No newline at end of file +It writes this values to custom metrics in Cloud Monitoring and creates a dashboard to visualize the current utilization of these metrics in Cloud Monitoring. + +## Next steps and ideas +In a future release, we could support: +- Static routes per VPC / per VPC peering group +- Dynamic routes per VPC / per VPC peering group +- Google managed VPCs that are peered with PSA (such as Cloud SQL or Memorystore) +- Subnet IP ranges utilization + +If you are interested in this and/or would like to contribute, please contact legranda@google.com. \ No newline at end of file diff --git a/examples/cloud-operations/network-dashboard/cloud-function/main.py b/examples/cloud-operations/network-dashboard/cloud-function/main.py index b374d879..6f7e2135 100644 --- a/examples/cloud-operations/network-dashboard/cloud-function/main.py +++ b/examples/cloud-operations/network-dashboard/cloud-function/main.py @@ -17,6 +17,7 @@ import os import time import yaml +from collections import defaultdict from google.api import metric_pb2 as ga_metric from google.api_core import protobuf_helpers from google.cloud import monitoring_v3, asset_v1 @@ -32,25 +33,11 @@ MONITORING_PROJECT_ID = os.environ.get("MONITORING_PROJECT_ID") MONITORING_PROJECT_LINK = f"projects/{MONITORING_PROJECT_ID}" service = discovery.build('compute', 'v1') -# DEFAULT LIMITS: -LIMIT_INSTANCES = os.environ.get("LIMIT_INSTANCES").split(",") -LIMIT_INSTANCES_PPG = os.environ.get("LIMIT_INSTANCES_PPG").split(",") -LIMIT_L4 = os.environ.get("LIMIT_L4").split(",") -LIMIT_L4_PPG = os.environ.get("LIMIT_L4_PPG").split(",") -LIMIT_L7 = os.environ.get("LIMIT_L7").split(",") -LIMIT_L7_PPG = os.environ.get("LIMIT_L7_PPG").split(",") -LIMIT_SUBNETS = os.environ.get("LIMIT_SUBNETS").split(",") -LIMIT_VPC_PEER = os.environ.get("LIMIT_VPC_PEER").split(",") - # Existing GCP metrics per network GCE_INSTANCES_LIMIT_METRIC = "compute.googleapis.com/quota/instances_per_vpc_network/limit" -GCE_INSTANCES_USAGE_METRIC = "compute.googleapis.com/quota/instances_per_vpc_network/usage" L4_FORWARDING_RULES_LIMIT_METRIC = "compute.googleapis.com/quota/internal_lb_forwarding_rules_per_vpc_network/limit" -L4_FORWARDING_RULES_USAGE_METRIC = "compute.googleapis.com/quota/internal_lb_forwarding_rules_per_vpc_network/usage" L7_FORWARDING_RULES_LIMIT_METRIC = "compute.googleapis.com/quota/internal_managed_forwarding_rules_per_vpc_network/limit" -L7_FORWARDING_RULES_USAGE_METRIC = "compute.googleapis.com/quota/internal_managed_forwarding_rules_per_vpc_network/usage" SUBNET_RANGES_LIMIT_METRIC = "compute.googleapis.com/quota/subnet_ranges_per_vpc_network/limit" -SUBNET_RANGES_USAGE_METRIC = "compute.googleapis.com/quota/subnet_ranges_per_vpc_network/usage" def main(event, context): @@ -63,7 +50,7 @@ def main(event, context): Returns: 'Function executed successfully' ''' - metrics_dict = create_metrics() + metrics_dict, limits_dict = create_metrics() # Asset inventory queries gce_instance_dict = get_gce_instance_dict() @@ -72,28 +59,28 @@ def main(event, context): subnet_range_dict = get_subnet_ranges_dict() # Per Network metrics - get_gce_instances_data(metrics_dict, gce_instance_dict) - get_l4_forwarding_rules_data(metrics_dict, l4_forwarding_rules_dict) - get_vpc_peering_data(metrics_dict) + get_gce_instances_data(metrics_dict, gce_instance_dict, limits_dict['number_of_instances_limit']) + get_l4_forwarding_rules_data(metrics_dict, l4_forwarding_rules_dict, limits_dict['internal_forwarding_rules_l4_limit']) + get_vpc_peering_data(metrics_dict, limits_dict['number_of_vpc_peerings_limit']) get_pgg_data( metrics_dict["metrics_per_peering_group"]["instance_per_peering_group"], - gce_instance_dict, GCE_INSTANCES_LIMIT_METRIC, LIMIT_INSTANCES_PPG) + gce_instance_dict, GCE_INSTANCES_LIMIT_METRIC, limits_dict['number_of_instances_ppg_limit']) get_pgg_data( metrics_dict["metrics_per_peering_group"] ["l4_forwarding_rules_per_peering_group"], l4_forwarding_rules_dict, - L4_FORWARDING_RULES_LIMIT_METRIC, LIMIT_L4_PPG) + L4_FORWARDING_RULES_LIMIT_METRIC, limits_dict['internal_forwarding_rules_l4_ppg_limit']) get_pgg_data( metrics_dict["metrics_per_peering_group"] ["l7_forwarding_rules_per_peering_group"], l7_forwarding_rules_dict, - L7_FORWARDING_RULES_LIMIT_METRIC, LIMIT_L7_PPG) + L7_FORWARDING_RULES_LIMIT_METRIC, limits_dict['internal_forwarding_rules_l7_ppg_limit']) get_pgg_data( metrics_dict["metrics_per_peering_group"] ["subnet_ranges_per_peering_group"], subnet_range_dict, - SUBNET_RANGES_LIMIT_METRIC, LIMIT_SUBNETS) + SUBNET_RANGES_LIMIT_METRIC, limits_dict['number_of_subnet_IP_ranges_limit']) return 'Function executed successfully' @@ -112,7 +99,7 @@ def get_l4_forwarding_rules_dict(): read_mask = field_mask_pb2.FieldMask() read_mask.FromJsonString('name,versionedResources') - forwarding_rules_dict = {} + forwarding_rules_dict = defaultdict(int) response = client.search_all_resources( request={ @@ -152,7 +139,7 @@ def get_l7_forwarding_rules_dict(): read_mask = field_mask_pb2.FieldMask() read_mask.FromJsonString('name,versionedResources') - forwarding_rules_dict = {} + forwarding_rules_dict = defaultdict(int) response = client.search_all_resources( request={ @@ -189,7 +176,7 @@ def get_gce_instance_dict(): ''' client = asset_v1.AssetServiceClient() - gce_instance_dict = {} + gce_instance_dict = defaultdict(int) response = client.search_all_resources( request={ @@ -218,7 +205,7 @@ def get_subnet_ranges_dict(): subnet_range_dict (dictionary of string: int): Keys are the network links and values are the number of subnet ranges per network. ''' client = asset_v1.AssetServiceClient() - subnet_range_dict = {} + subnet_range_dict = defaultdict(int) read_mask = field_mask_pb2.FieldMask() read_mask.FromJsonString('name,versionedResources') @@ -280,10 +267,21 @@ def create_client(): def create_metrics(): + ''' + Creates all Cloud Monitoring custom metrics based on the metric.yaml file + + Parameters: + None + + Returns: + metrics_dict (dictionary of dictionary of string: string): metrics names and descriptions + limits_dict (dictionary of dictionary of string: int): limits_dict[metric_name]: dict[network_name] = limit_value + ''' client = monitoring_v3.MetricServiceClient() existing_metrics = [] for desc in client.list_metric_descriptors(name=MONITORING_PROJECT_LINK): existing_metrics.append(desc.type) + limits_dict = {} with open("metrics.yaml", 'r') as stream: try: @@ -291,13 +289,19 @@ def create_metrics(): for metric_list in metrics_dict.values(): for metric in metric_list.values(): - for sub_metric in metric.values(): + for sub_metric_key, sub_metric in metric.items(): metric_link = f"custom.googleapis.com/{sub_metric['name']}" # If the metric doesn't exist yet, then we create it if metric_link not in existing_metrics: create_metric(sub_metric["name"], sub_metric["description"]) - - return metrics_dict + # Parse limits (both default values and network specific ones) + if sub_metric_key == "limit": + limits_dict_for_metric = {} + for network_link, limit_value in sub_metric["values"].items(): + limits_dict_for_metric[network_link] = limit_value + limits_dict[sub_metric["name"]] = limits_dict_for_metric + + return metrics_dict, limits_dict except yaml.YAMLError as exc: print(exc) @@ -325,13 +329,14 @@ def create_metric(metric_name, description): print("Created {}.".format(descriptor.name)) -def get_gce_instances_data(metrics_dict, gce_instance_dict): +def get_gce_instances_data(metrics_dict, gce_instance_dict, limit_dict): ''' Gets the data for GCE instances per VPC Network and writes it to the metric defined in instance_metric. Parameters: metrics_dict (dictionary of dictionary of string: string): metrics names and descriptions gce_instance_dict (dictionary of string: int): Keys are the network links and values are the number of GCE Instances per network. + limit_dict (dictionary of string:int): Dictionary with the network link as key and the limit as value Returns: gce_instance_dict ''' @@ -346,9 +351,9 @@ def get_gce_instances_data(metrics_dict, gce_instance_dict): current_quota_limit_view = customize_quota_view(current_quota_limit) for net in network_dict: - set_limits(net, current_quota_limit_view, LIMIT_INSTANCES) + set_limits(net, current_quota_limit_view, limit_dict) - network_link = f"https://www.googleapis.com/compute/v1/projects/{project}/global/networks/{net['network name']}" + network_link = f"https://www.googleapis.com/compute/v1/projects/{project}/global/networks/{net['network_name']}" usage = 0 if network_link in gce_instance_dict: @@ -356,29 +361,29 @@ def get_gce_instances_data(metrics_dict, gce_instance_dict): write_data_to_metric( project, usage, metrics_dict["metrics_per_network"] - ["instance_per_network"]["usage"]["name"], net['network name']) + ["instance_per_network"]["usage"]["name"], net['network_name']) write_data_to_metric( project, net['limit'], metrics_dict["metrics_per_network"] - ["instance_per_network"]["limit"]["name"], net['network name']) + ["instance_per_network"]["limit"]["name"], net['network_name']) write_data_to_metric( project, usage / net['limit'], metrics_dict["metrics_per_network"] - ["instance_per_network"]["utilization"]["name"], net['network name']) + ["instance_per_network"]["utilization"]["name"], net['network_name']) print(f"Wrote number of instances to metric for projects/{project}") -def get_vpc_peering_data(metrics_dict): +def get_vpc_peering_data(metrics_dict, limit_dict): ''' Gets the data for VPC peerings (active or not) and writes it to the metric defined (vpc_peering_active_metric and vpc_peering_metric). Parameters: metrics_dict (dictionary of dictionary of string: string): metrics names and descriptions + limit_dict (dictionary of string:int): Dictionary with the network link as key and the limit as value Returns: None ''' for project in MONITORED_PROJECTS_LIST: - active_vpc_peerings, vpc_peerings = gather_vpc_peerings_data( - project, LIMIT_VPC_PEER) + active_vpc_peerings, vpc_peerings = gather_vpc_peerings_data(project, limit_dict) for peering in active_vpc_peerings: write_data_to_metric( project, peering['active_peerings'], @@ -409,13 +414,13 @@ def get_vpc_peering_data(metrics_dict): print("Wrote number of VPC peerings to custom metric for project:", project) -def gather_vpc_peerings_data(project_id, limit_list): +def gather_vpc_peerings_data(project_id, limit_dict): ''' Gets the data for all VPC peerings (active or not) in project_id and writes it to the metric defined in vpc_peering_active_metric and vpc_peering_metric. Parameters: project_id (string): We will take all VPCs in that project_id and look for all peerings to these VPCs. - limit_list (list of string): Used to get the limit per VPC or the default limit. + limit_dict (dictionary of string:int): Dictionary with the network link as key and the limit as value Returns: active_peerings_dict (dictionary of string: string): Contains project_id, network_name, network_limit for each active VPC peering. peerings_dict (dictionary of string: string): Contains project_id, network_name, network_limit for each VPC peering. @@ -437,69 +442,71 @@ def gather_vpc_peerings_data(project_id, limit_list): else: peerings_count = 0 active_peerings_count = 0 + + network_link = f"https://www.googleapis.com/compute/v1/projects/{project_id}/global/networks/{network['name']}" + network_limit = get_limit_ppg(network_link, limit_dict) active_d = { 'project_id': project_id, 'network_name': network['name'], 'active_peerings': active_peerings_count, - 'network_limit': get_limit(network['name'], limit_list) + 'network_limit': network_limit } active_peerings_dict.append(active_d) d = { 'project_id': project_id, 'network_name': network['name'], 'peerings': peerings_count, - 'network_limit': get_limit(network['name'], limit_list) + 'network_limit': network_limit } peerings_dict.append(d) return active_peerings_dict, peerings_dict -def get_limit(network_name, limit_list): +def get_limit_ppg(network_link, limit_dict): ''' Checks if this network has a specific limit for a metric, if so, returns that limit, if not, returns the default limit. Parameters: - network_name (string): Name of the VPC network. + network_link (string): VPC network link. limit_list (list of string): Used to get the limit per VPC or the default limit. Returns: - limit (int): Limit for that VPC and that metric. + limit_dict (dictionary of string:int): Dictionary with the network link as key and the limit as value ''' - if network_name in limit_list: - return int(limit_list[limit_list.index(network_name) + 1]) + if network_link in limit_dict: + return limit_dict[network_link] else: - if 'default_value' in limit_list: - return int(limit_list[limit_list.index('default_value') + 1]) + if 'default_value' in limit_dict: + return limit_dict['default_value'] else: + print(f"Error: limit not found for {network_link}") return 0 -def get_l4_forwarding_rules_data(metrics_dict, forwarding_rules_dict): +def get_l4_forwarding_rules_data(metrics_dict, forwarding_rules_dict, limit_dict): ''' Gets the data for L4 Internal Forwarding Rules per VPC Network and writes it to the metric defined in forwarding_rules_metric. Parameters: - metrics_dict (dictionary of dictionary of string: string): metrics names and descriptions + metrics_dict (dictionary of dictionary of string: string): metrics names and descriptions. forwarding_rules_dict (dictionary of string: int): Keys are the network links and values are the number of Forwarding Rules per network. + limit_dict (dictionary of string:int): Dictionary with the network link as key and the limit as value. Returns: None ''' - # Existing GCP Monitoring metrics for L4 Forwarding Rules - l4_forwarding_rules_limit = "compute.googleapis.com/quota/internal_lb_forwarding_rules_per_vpc_network/limit" - for project in MONITORED_PROJECTS_LIST: network_dict = get_networks(project) current_quota_limit = get_quota_current_limit(f"projects/{project}", - l4_forwarding_rules_limit) + L4_FORWARDING_RULES_LIMIT_METRIC) current_quota_limit_view = customize_quota_view(current_quota_limit) for net in network_dict: - set_limits(net, current_quota_limit_view, LIMIT_L4) + set_limits(net, current_quota_limit_view, limit_dict) - network_link = f"https://www.googleapis.com/compute/v1/projects/{project}/global/networks/{net['network name']}" + network_link = f"https://www.googleapis.com/compute/v1/projects/{project}/global/networks/{net['network_name']}" usage = 0 if network_link in forwarding_rules_dict: @@ -508,21 +515,21 @@ def get_l4_forwarding_rules_data(metrics_dict, forwarding_rules_dict): write_data_to_metric( project, usage, metrics_dict["metrics_per_network"] ["l4_forwarding_rules_per_network"]["usage"]["name"], - net['network name']) + net['network_name']) write_data_to_metric( project, net['limit'], metrics_dict["metrics_per_network"] ["l4_forwarding_rules_per_network"]["limit"]["name"], - net['network name']) + net['network_name']) write_data_to_metric( project, usage / net['limit'], metrics_dict["metrics_per_network"] ["l4_forwarding_rules_per_network"]["utilization"]["name"], - net['network name']) + net['network_name']) print( f"Wrote number of L4 forwarding rules to metric for projects/{project}") -def get_pgg_data(metric_dict, usage_dict, limit_metric, limit_ppg): +def get_pgg_data(metric_dict, usage_dict, limit_metric, limit_dict): ''' This function gets the usage, limit and utilization per VPC peering group for a specific metric for all projects to be monitored. @@ -530,7 +537,7 @@ def get_pgg_data(metric_dict, usage_dict, limit_metric, limit_ppg): metric_dict (dictionary of string: string): Dictionary with the metric names and description, that will be used to populate the metrics usage_metric (string): Name of the existing GCP metric for usage per VPC network. usage_dict (dictionnary of string:int): Dictionary with the network link as key and the number of resources as value - limit_ppg (list of string): List containing the limit per peering group (either VPC specific or default limit). + limit_dict (dictionary of string:int): Dictionary with the network link as key and the limit as value Returns: None ''' @@ -544,13 +551,13 @@ def get_pgg_data(metric_dict, usage_dict, limit_metric, limit_ppg): # For each network in this GCP project for network_dict in network_dict_list: + network_link = f"https://www.googleapis.com/compute/v1/projects/{project}/global/networks/{network_dict['network_name']}" + current_quota_limit = get_quota_current_limit(f"projects/{project}", limit_metric) current_quota_limit_view = customize_quota_view(current_quota_limit) - limit = get_limit_values(network_dict, current_quota_limit_view, - limit_ppg) - - network_link = f"https://www.googleapis.com/compute/v1/projects/{project}/global/networks/{network_dict['network_name']}" + limit = get_limit_network(network_dict, network_link, current_quota_limit_view, + limit_dict) usage = 0 if network_link in usage_dict: @@ -561,25 +568,25 @@ def get_pgg_data(metric_dict, usage_dict, limit_metric, limit_ppg): network_dict["limit"] = limit # For every peered network, get usage and limits - for peered_network in network_dict['peerings']: - peered_network_link = f"https://www.googleapis.com/compute/v1/projects/{peered_network['project_id']}/global/networks/{peered_network['network_name']}" + for peered_network_dict in network_dict['peerings']: + peered_network_link = f"https://www.googleapis.com/compute/v1/projects/{peered_network_dict['project_id']}/global/networks/{peered_network_dict['network_name']}" peered_usage = 0 if peered_network_link in usage_dict: peered_usage = usage_dict[peered_network_link] peering_project_limit = customize_quota_view( - get_quota_current_limit(f"projects/{peered_network['project_id']}", + get_quota_current_limit(f"projects/{peered_network_dict['project_id']}", limit_metric)) - peered_limit = get_limit_values(peered_network, peering_project_limit, - limit_ppg) + peered_limit = get_limit_network(peered_network_dict, peered_network_link, peering_project_limit, + limit_dict) # Here we add usage and limit to the peered network dictionary - peered_network["usage"] = peered_usage - peered_network["limit"] = peered_limit + peered_network_dict["usage"] = peered_usage + peered_network_dict["limit"] = peered_limit count_effective_limit(project, network_dict, metric_dict["usage"]["name"], metric_dict["limit"]["name"], - metric_dict["utilization"]["name"], limit_ppg) + metric_dict["utilization"]["name"], limit_dict) print( f"Wrote {metric_dict['usage']['name']} to metric for peering group {network_dict['network_name']} in {project}" ) @@ -587,7 +594,7 @@ def get_pgg_data(metric_dict, usage_dict, limit_metric, limit_ppg): def count_effective_limit(project_id, network_dict, usage_metric_name, limit_metric_name, utilization_metric_name, - limit_ppg): + limit_dict): ''' Calculates the effective limits (using algorithm in the link below) for peering groups and writes data (usage, limit, utilization) to the custom metrics. Source: https://cloud.google.com/vpc/docs/quota#vpc-peering-effective-limit @@ -598,7 +605,7 @@ def count_effective_limit(project_id, network_dict, usage_metric_name, usage_metric_name (string): Name of the custom metric to be populated for usage per VPC peering group. limit_metric_name (string): Name of the custom metric to be populated for limit per VPC peering group. utilization_metric_name (string): Name of the custom metric to be populated for utilization per VPC peering group. - limit_ppg (list of string): List containing the limit per peering group (either VPC specific or default limit). + limit_dict (dictionary of string:int): Dictionary containing the limit per peering group (either VPC specific or default limit). Returns: None ''' @@ -611,16 +618,19 @@ def count_effective_limit(project_id, network_dict, usage_metric_name, for peered_network in network_dict['peerings']: peering_group_usage += peered_network['usage'] + network_link = f"https://www.googleapis.com/compute/v1/projects/{project_id}/global/networks/{network_dict['network_name']}" + # Calculates effective limit: Step 1: max(per network limit, per network_peering_group limit) - limit_step1 = max(network_dict['limit'], - get_limit(network_dict['network_name'], limit_ppg)) + limit_step1 = max(network_dict['limit'], get_limit_ppg(network_link, limit_dict)) # Calculates effective limit: Step 2: List of max(per network limit, per network_peering_group limit) for each peered network limit_step2 = [] for peered_network in network_dict['peerings']: + peered_network_link = f"https://www.googleapis.com/compute/v1/projects/{peered_network['project_id']}/global/networks/{peered_network['network_name']}" + limit_step2.append( max(peered_network['limit'], - get_limit(peered_network['network_name'], limit_ppg))) + get_limit_ppg(peered_network_link, limit_dict))) # Calculates effective limit: Step 3: Find minimum from the list created by Step 2 limit_step3 = min(limit_step2) @@ -636,7 +646,6 @@ def count_effective_limit(project_id, network_dict, usage_metric_name, write_data_to_metric(project_id, utilization, utilization_metric_name, network_dict['network_name']) - def get_networks(project_id): ''' Returns a dictionary of all networks in a project. @@ -651,12 +660,32 @@ def get_networks(project_id): network_dict = [] if 'items' in response: for network in response['items']: - NETWORK = network['name'] - ID = network['id'] - d = {'project_id': project_id, 'network name': NETWORK, 'network id': ID} + network_name = network['name'] + network_id = network['id'] + d = {'project_id': project_id, 'network_name': network_name, 'network_id': network_id} network_dict.append(d) return network_dict +# TODO: list all routers (https://cloud.google.com/compute/docs/reference/rest/v1/routers/list) then https://cloud.google.com/compute/docs/reference/rest/v1/routers/getRouterStatus +def get_routes(project_id): + ''' + Returns a dictionary of all dynamic routes in a project. + + Parameters: + project_id (string): Project ID for the project containing the networks. + Returns: + network_dict (dictionary of string: string): Contains the project_id, network_name(s) and network_id(s) + ''' + request = service.routers().list(project=project_id) + response = request.execute() + network_dict = [] + if 'items' in response: + for router in response['items']: + network_name = router['name'] + network_id = router['id'] + d = {'project_id': project_id, 'network name': network_name, 'network id': network_id} + network_dict.append(d) + return network_dict def gather_peering_data(project_id): ''' @@ -729,30 +758,6 @@ def get_network_id(project_id, network_name): return network_id - -def get_quota_current_usage(project_link, metric_name): - ''' - Retrieves quota usage for a specific metric. - - Parameters: - project_link (string): Project link. - metric_name (string): Name of the metric. - Returns: - results_list (list of string): Current usage. - ''' - client, interval = create_client() - - results = client.list_time_series( - request={ - "name": project_link, - "filter": f'metric.type = "{metric_name}"', - "interval": interval, - "view": monitoring_v3.ListTimeSeriesRequest.TimeSeriesView.FULL - }) - results_list = list(results) - return (results_list) - - def get_quota_current_limit(project_link, metric_name): ''' Retrieves limit for a specific metric. @@ -796,76 +801,64 @@ def customize_quota_view(quota_results): return quotaViewList -def set_limits(network_dict, quota_limit, limit_list): +def set_limits(network_dict, quota_limit, limit_dict): ''' Updates the network dictionary with quota limit values. Parameters: network_dict (dictionary of string: string): Contains network information. quota_limit (list of dictionaries of string: string): Current quota limit. - limit_list (list of string): List containing the limit per VPC (either VPC specific or default limit). + limit_dict (dictionary of string:int): Dictionary with the network link as key and the limit as value Returns: None ''' + + network_dict['limit'] = None + if quota_limit: for net in quota_limit: if net['network_id'] == network_dict[ - 'network id']: # if network ids in GCP quotas and in dictionary (using API) are the same - network_dict['limit'] = net['value'] # set network limit in dictionary - break - else: - if network_dict[ - 'network name'] in limit_list: # if network limit is in the environmental variables - network_dict['limit'] = int( - limit_list[limit_list.index(network_dict['network name']) + 1]) - else: - network_dict['limit'] = int( - limit_list[limit_list.index('default_value') + - 1]) # set default value - else: # if quotas does not appear in GCP quotas - if network_dict['network name'] in limit_list: - network_dict['limit'] = int( - limit_list[limit_list.index(network_dict['network name']) + - 1]) # ["default", 100, "networkname", 200] + 'network_id']: + network_dict['limit'] = net['value'] + return + + network_link = f"https://www.googleapis.com/compute/v1/projects/{network_dict['project_id']}/global/networks/{network_dict['network_name']}" + + if network_link in limit_dict: + network_dict['limit'] = limit_dict[network_link] + else: + if 'default_value' in limit_dict: + network_dict['limit'] = limit_dict['default_value'] else: - network_dict['limit'] = int(limit_list[limit_list.index('default_value') + - 1]) + print(f"Error: Couldn't find limit for {network_link}") + network_dict['limit'] = 0 - -def get_limit_values(network, quota_limit, limit_list): +def get_limit_network(network_dict, network_link, quota_limit, limit_dict): ''' - Returns uslimit for a specific network and metric. + Returns limit for a specific network and metric, using the GCP quota metrics or the values in the yaml file if not found. Parameters: network_dict (dictionary of string: string): Contains network information. + network_link (string): Contains network link quota_limit (list of dictionaries of string: string): Current quota limit for all networks in that project. - limit_list (list of string): List containing the limit per VPC (either VPC specific or default limit). + limit_dict (dictionary of string:int): Dictionary with the network link as key and the limit as value Returns: limit (int): Current limit for that network. ''' - limit = 0 - if quota_limit: for net in quota_limit: - if net['network_id'] == network[ - 'network_id']: # if network ids in GCP quotas and in dictionary (using API) are the same - limit = net['value'] # set network limit in dictionary - break - else: - if network[ - 'network_name'] in limit_list: # if network limit is in the environmental variables - limit = int(limit_list[limit_list.index(network['network_name']) + 1]) - else: - limit = int(limit_list[limit_list.index('default_value') + - 1]) # set default value - else: # if quotas does not appear in GCP quotas - if network['network_name'] in limit_list: - limit = int(limit_list[limit_list.index(network['network_name']) + - 1]) # ["default", 100, "networkname", 200] + if net['network_id'] == network_dict['network_id']: + return net['value'] + + if network_link in limit_dict: + return limit_dict[network_link] + else: + if 'default_value' in limit_dict: + return limit_dict['default_value'] else: - limit = int(limit_list[limit_list.index('default_value') + 1]) + print(f"Error: Couldn't find limit for {network_link}") - return limit + return 0 def write_data_to_metric(monitored_project_id, value, metric_name, @@ -906,4 +899,9 @@ def write_data_to_metric(monitored_project_id, value, metric_name, }) series.points = [point] - client.create_time_series(name=MONITORING_PROJECT_LINK, time_series=[series]) + # TODO: sometimes this cashes with 'DeadlineExceeded: 504 Deadline expired before operation could complete' error + # Implement exponential backoff retries? + try: + client.create_time_series(name=MONITORING_PROJECT_LINK, time_series=[series]) + except Exception as e: + print(e) \ No newline at end of file diff --git a/examples/cloud-operations/network-dashboard/cloud-function/metrics.yaml b/examples/cloud-operations/network-dashboard/cloud-function/metrics.yaml index 233dc9be..a9772b5c 100644 --- a/examples/cloud-operations/network-dashboard/cloud-function/metrics.yaml +++ b/examples/cloud-operations/network-dashboard/cloud-function/metrics.yaml @@ -22,6 +22,8 @@ metrics_per_network: limit: name: number_of_instances_limit description: Number of instances per VPC network - limit. + values: + default_value: 15000 utilization: name: number_of_instances_utilization description: Number of instances per VPC network - utilization. @@ -32,6 +34,8 @@ metrics_per_network: limit: name: number_of_active_vpc_peerings_limit description: Number of active VPC Peerings per VPC - limit. + values: + default_value: 25 utilization: name: number_of_active_vpc_peerings_utilization description: Number of active VPC Peerings per VPC - utilization. @@ -42,6 +46,9 @@ metrics_per_network: limit: name: number_of_vpc_peerings_limit description: Number of VPC Peerings per VPC - limit. + values: + default_value: 25 + https://www.googleapis.com/compute/v1/projects/net-dash-test-host-prod/global/networks/vpc-prod: 40 utilization: name: number_of_vpc_peerings_utilization description: Number of VPC Peerings per VPC - utilization. @@ -52,6 +59,8 @@ metrics_per_network: limit: name: internal_forwarding_rules_l4_limit description: Number of Internal Forwarding Rules for Internal L4 Load Balancers - limit. + values: + default_value: 75 utilization: name: internal_forwarding_rules_l4_utilization description: Number of Internal Forwarding Rules for Internal L4 Load Balancers - utilization. @@ -62,6 +71,8 @@ metrics_per_network: limit: name: internal_forwarding_rules_l7_limit description: Number of Internal Forwarding Rules for Internal L7 Load Balancers per network - effective limit. + values: + default_value: 75 utilization: name: internal_forwarding_rules_l7_utilization description: Number of Internal Forwarding Rules for Internal L7 Load Balancers per Vnetwork - utilization. @@ -73,6 +84,8 @@ metrics_per_peering_group: limit: name: internal_forwarding_rules_l4_ppg_limit description: Number of Internal Forwarding Rules for Internal L4 Load Balancers per VPC peering group - effective limit. + values: + default_value: 175 utilization: name: internal_forwarding_rules_l4_ppg_utilization description: Number of Internal Forwarding Rules for Internal L4 Load Balancers per VPC peering group - utilization. @@ -83,18 +96,22 @@ metrics_per_peering_group: limit: name: internal_forwarding_rules_l7_ppg_limit description: Number of Internal Forwarding Rules for Internal L7 Load Balancers per VPC peering group - effective limit. + values: + default_value: 175 utilization: name: internal_forwarding_rules_l7_ppg_utilization description: Number of Internal Forwarding Rules for Internal L7 Load Balancers per VPC peering group - utilization. subnet_ranges_per_peering_group: usage: - name: number_of_subnet_IP_ranges_usage + name: number_of_subnet_IP_ranges_ppg_usage description: Number of Subnet Ranges per peering group - usage. limit: - name: number_of_subnet_IP_ranges_limit + name: number_of_subnet_IP_ranges_ppg_limit description: Number of Subnet Ranges per peering group - effective limit. + values: + default_value: 400 utilization: - name: number_of_subnet_IP_ranges_utilization + name: number_of_subnet_IP_ranges_ppg_utilization description: Number of Subnet Ranges per peering group - utilization. instance_per_peering_group: usage: @@ -103,6 +120,8 @@ metrics_per_peering_group: limit: name: number_of_instances_ppg_limit description: Number of instances per peering group - limit. + values: + default_value: 15500 utilization: name: number_of_instances_ppg_utilization description: Number of instances per peering group - utilization. \ No newline at end of file diff --git a/examples/cloud-operations/network-dashboard/dashboards/quotas-utilization.json b/examples/cloud-operations/network-dashboard/dashboards/quotas-utilization.json index af812061..794d6123 100644 --- a/examples/cloud-operations/network-dashboard/dashboards/quotas-utilization.json +++ b/examples/cloud-operations/network-dashboard/dashboards/quotas-utilization.json @@ -196,7 +196,7 @@ { "height": 4, "widget": { - "title": "number_of_subnet_IP_ranges_utilization", + "title": "number_of_subnet_IP_ranges_ppg_utilization", "xyChart": { "chartOptions": { "mode": "COLOR" @@ -212,7 +212,7 @@ "alignmentPeriod": "3600s", "perSeriesAligner": "ALIGN_NEXT_OLDER" }, - "filter": "metric.type=\"custom.googleapis.com/number_of_subnet_IP_ranges_utilization\" resource.type=\"global\"", + "filter": "metric.type=\"custom.googleapis.com/number_of_subnet_IP_ranges_ppg_utilization\" resource.type=\"global\"", "secondaryAggregation": { "alignmentPeriod": "3600s", "perSeriesAligner": "ALIGN_MEAN" diff --git a/examples/cloud-operations/network-dashboard/main.tf b/examples/cloud-operations/network-dashboard/main.tf index 343f3c31..8fb7963c 100644 --- a/examples/cloud-operations/network-dashboard/main.tf +++ b/examples/cloud-operations/network-dashboard/main.tf @@ -17,23 +17,6 @@ locals { project_id_list = toset(var.monitored_projects_list) projects = join(",", local.project_id_list) - - limit_instances = join(",", local.limit_instances_list) - limit_instances_list = tolist(var.limit_instances) - limit_instances_ppg = join(",", local.limit_instances_ppg_list) - limit_instances_ppg_list = tolist(var.limit_instances_ppg) - limit_l4 = join(",", local.limit_l4_list) - limit_l4_list = tolist(var.limit_l4) - limit_l4_ppg = join(",", local.limit_l4_ppg_list) - limit_l4_ppg_list = tolist(var.limit_l4_ppg) - limit_l7 = join(",", local.limit_l7_list) - limit_l7_list = tolist(var.limit_l7) - limit_l7_ppg = join(",", local.limit_l7_ppg_list) - limit_l7_ppg_list = tolist(var.limit_l7_ppg) - limit_subnets = join(",", local.limit_subnets_list) - limit_subnets_list = tolist(var.limit_subnets) - limit_vpc_peer = join(",", local.limit_vpc_peer_list) - limit_vpc_peer_list = tolist(var.limit_vpc_peer) } ################################################ @@ -130,14 +113,6 @@ module "cloud-function" { } environment_variables = { - LIMIT_INSTANCES = local.limit_instances - LIMIT_INSTANCES_PPG = local.limit_instances_ppg - LIMIT_L4 = local.limit_l4 - LIMIT_L4_PPG = local.limit_l4_ppg - LIMIT_L7 = local.limit_l7 - LIMIT_L7_PPG = local.limit_l7_ppg - LIMIT_SUBNETS = local.limit_subnets - LIMIT_VPC_PEER = local.limit_vpc_peer MONITORED_PROJECTS_LIST = local.projects MONITORING_PROJECT_ID = module.project-monitoring.project_id ORGANIZATION_ID = var.organization_id diff --git a/examples/cloud-operations/network-dashboard/variables.tf b/examples/cloud-operations/network-dashboard/variables.tf index 7170a513..7e4237ca 100644 --- a/examples/cloud-operations/network-dashboard/variables.tf +++ b/examples/cloud-operations/network-dashboard/variables.tf @@ -75,69 +75,4 @@ variable "region" { variable "zone" { description = "Zone used to deploy vms" default = "europe-west1-b" -} - -variable "limit_l4" { - description = "Maximum number of forwarding rules for Internal TCP/UDP Load Balancing per network." - type = list(string) - default = [ - "default_value", "75", - ] -} - -variable "limit_l7" { - description = "Maximum number of forwarding rules for Internal HTTP(S) Load Balancing per network." - type = list(string) - default = [ - "default_value", "75", - ] -} - -variable "limit_subnets" { - description = "Maximum number of subnet IP ranges (primary and secondary) per peering group" - type = list(string) - default = [ - "default_value", "400", - ] -} - -variable "limit_instances" { - description = "Maximum number of instances per network" - type = list(string) - default = [ - "default_value", "15000", - ] -} - -variable "limit_instances_ppg" { - description = "Maximum number of instances per peering group." - type = list(string) - default = [ - "default_value", "15000", - ] -} - -variable "limit_vpc_peer" { - description = "Maximum number of peering VPC peerings per network." - type = list(string) - default = [ - "default_value", "25", - "test-vpc", "40", - ] -} - -variable "limit_l4_ppg" { - description = "Maximum number of forwarding rules for Internal TCP/UDP Load Balancing per network." - type = list(string) - default = [ - "default_value", "175", - ] -} - -variable "limit_l7_ppg" { - description = "Maximum number of forwarding rules for Internal HTTP(S) Load Balancing per network." - type = list(string) - default = [ - "default_value", "175", - ] } \ No newline at end of file