Maunope/network added firewall metrics (#856)

* support for project level VPC firewall metrics * updated GCP networking dashboard (.json file) Co-authored-by: Maurizio Noseda Pedraglio <mnoseda@google.com>, Aurélien Legrand <aurelien.legrand01@gmail.com>
2022-10-10 10:46:22 +02:00 · 2022-10-10 10:46:22 +02:00 · d5b42d5378
parent e13bce1147
commit d5b42d5378
7 changed files with 315 additions and 60 deletions
--- a/blueprints/cloud-operations/network-dashboard/README.md
+++ b/blueprints/cloud-operations/network-dashboard/README.md
@ -9,7 +9,7 @@ Here is an example of dashboard you can get with this solution:

 Here you see utilization (usage compared to the limit) for a specific metric (number of instances per VPC) for multiple VPCs and projects.

-3 metrics are created: Usage, limit and utilization. You can follow each of these and create alerting policies if a threshold is reached.
+Three metric descriptors are created for each monitored resource: usage, limit and utilization. You can follow each of these and create alerting policies if a threshold is reached.

 ## Usage

@ -45,6 +45,7 @@ The Cloud Function currently tracks usage, limit and utilization of:
 - Dynamic routes per VPC
 - Dynamic routes per VPC peering group
 - IP utilization per subnet (% of IP addresses used in a subnet)
+- VPC firewall rules per project (VPC drill down is available for usage)

 It writes this values to custom metrics in Cloud Monitoring and creates a dashboard to visualize the current utilization of these metrics in Cloud Monitoring.

--- a/blueprints/cloud-operations/network-dashboard/cloud-function/main.py
+++ b/blueprints/cloud-operations/network-dashboard/cloud-function/main.py
@ -20,7 +20,7 @@ import time
 from google.cloud import monitoring_v3, asset_v1
 from google.protobuf import field_mask_pb2
 from googleapiclient import discovery
-from metrics import ilb_fwrules, instances, networks, metrics, limits, peerings, routes, subnets
+from metrics import ilb_fwrules, instances, networks, metrics, limits, peerings, routes, subnets, vpc_firewalls


 def get_monitored_projects_list(config):
@ -33,7 +33,7 @@ def get_monitored_projects_list(config):
        monitored_projects (List of strings): Full list of projects to be monitored
    '''
  monitored_projects = config["monitored_projects"]
-  monitored_folders = os.environ.get("MONITORED_FOLDERS_LIST").split(",")
+  monitored_folders = []  #os.environ.get("MONITORED_FOLDERS_LIST").split(",")

  # Handling empty monitored folders list
  if monitored_folders == ['']:
@ -94,7 +94,7 @@ config = {
    # list of projects from which function will get quotas information
    "monitored_projects":
        os.environ.get("MONITORED_PROJECTS_LIST").split(","),
-    "monitoring_project_link":
+    "monitoring_project":
        os.environ.get('MONITORING_PROJECT_ID'),
    "monitoring_project_link":
        f"projects/{os.environ.get('MONITORING_PROJECT_ID')}",
@ -143,6 +143,9 @@ def main(event, context):

  metrics_dict, limits_dict = metrics.create_metrics(
      config["monitoring_project_link"])
+  project_quotas_dict = limits.get_quota_project_limit(config)
+
+  firewalls_dict = vpc_firewalls.get_firewalls_dict(config)

  # IP utilization subnet level metrics
  subnets.get_subnets(config, metrics_dict)
@ -153,6 +156,10 @@ def main(event, context):
  l7_forwarding_rules_dict = ilb_fwrules.get_forwarding_rules_dict(config, "L7")
  subnet_range_dict = networks.get_subnet_ranges_dict(config)

+  # Per Project metrics
+  vpc_firewalls.get_firewalls_data(config, metrics_dict, project_quotas_dict,
+                                   firewalls_dict)
+
  # Per Network metrics
  instances.get_gce_instances_data(config, metrics_dict, gce_instance_dict,
                                   limits_dict['number_of_instances_limit'])
@ -197,4 +204,4 @@ def main(event, context):


 if __name__ == "__main__":
-  main(None, None)
+  main(None, None)
--- a/blueprints/cloud-operations/network-dashboard/cloud-function/metrics.yaml
+++ b/blueprints/cloud-operations/network-dashboard/cloud-function/metrics.yaml
@ -159,4 +159,16 @@ metrics_per_peering_group:
        default_value: 300
    utilization:
      name: dynamic_routes_per_peering_group_utilization
-      description: Number of Dynamic routes per peering group - utilization.
+      description: Number of Dynamic routes per peering group - utilization.
+metrics_per_project:
+  firewalls:
+    usage:
+      name: firewalls_per_project_vpc_usage
+      description: Number of VPC firewall rules in a project - usage.
+    limit:
+      # Firewalls limit is per project and we get the limit for the GCP quota API in vpc_firewalls.py
+      name: firewalls_per_project_limit
+      description: Number of VPC firewall rules in a project - limit.
+    utilization:
+      name: firewalls_per_project_utilization
+      description: Number of VPC firewall rules in a project - utilization.
--- a/blueprints/cloud-operations/network-dashboard/cloud-function/metrics/limits.py
+++ b/blueprints/cloud-operations/network-dashboard/cloud-function/metrics/limits.py
@ -19,6 +19,60 @@ from google.cloud import monitoring_v3
 from . import metrics


+def get_quotas_dict(quotas_list):
+  '''
+    Creates a dictionary of quotas from a list, with lower case quota name as keys
+      Parameters:
+        quotas_array (array): array of quotas
+      Returns:
+        quotas_dict (dict): dictionary of quotas
+  '''
+  quota_keys = [q['metric'] for q in quotas_list]
+  quotas_dict = dict()
+  i = 0
+  for key in quota_keys:
+    if ("metric" in quotas_list[i]):
+      del (quotas_list[i]["metric"])
+    quotas_dict[key.lower()] = quotas_list[i]
+    i += 1
+  return quotas_dict
+
+
+def get_quota_project_limit(config, regions=["global"]):
+  '''
+    Retrieves limit for a specific project quota 
+      Parameters:
+        project_link (string): Project link.
+      Returns:
+        quotas (dict): quotas for all selected regions, default 'global'
+  '''
+  try:
+    request = {}
+    quotas = dict()
+    for project in config["monitored_projects"]:
+      quotas[project] = dict()
+      if regions != ["global"]:
+        for region in regions:
+          request = config["clients"]["discovery_client"].compute.regions().get(
+              region=region, project=project)
+          response = request.execute()
+          quotas[project][region] = get_quotas_dict(response['quotas'])
+      else:
+        region = "global"
+        request = config["clients"]["discovery_client"].projects().get(
+            project=project, fields="quotas")
+        response = request.execute()
+        quotas[project][region] = get_quotas_dict(response['quotas'])
+
+    return quotas
+  except exceptions.PermissionDenied as err:
+    print(
+        f"Warning: error reading quotas for {project}. " +
+        f"This can happen if you don't have permissions on the project, for example if the project is in another organization or a Google managed project"
+    )
+  return None
+
+
 def get_ppg(network_link, limit_dict):
  '''
    Checks if this network has a specific limit for a metric, if so, returns that limit, if not, returns the default limit.
--- a/blueprints/cloud-operations/network-dashboard/cloud-function/metrics/metrics.py
+++ b/blueprints/cloud-operations/network-dashboard/cloud-function/metrics/metrics.py
@ -36,7 +36,7 @@ def create_metrics(monitoring_project):
    existing_metrics.append(desc.type)
  limits_dict = {}

-  with open("metrics.yaml", 'r') as stream:
+  with open("./metrics.yaml", 'r') as stream:
    try:
      metrics_dict = yaml.safe_load(stream)

@ -52,8 +52,9 @@ def create_metrics(monitoring_project):
            # Subnet level metrics have a different limit: the subnet IP range size
            if sub_metric_key == "limit" and metric_name != "ip_usage_per_subnet":
              limits_dict_for_metric = {}
-              for network_link, limit_value in sub_metric["values"].items():
-                limits_dict_for_metric[network_link] = limit_value
+              if "values" in sub_metric:
+                for network_link, limit_value in sub_metric["values"].items():
+                  limits_dict_for_metric[network_link] = limit_value
              limits_dict[sub_metric["name"]] = limits_dict_for_metric

      return metrics_dict, limits_dict
@ -84,7 +85,7 @@ def create_metric(metric_name, description, monitoring_project):


 def write_data_to_metric(config, monitored_project_id, value, metric_name,
-                         network_name, subnet_id=None):
+                         network_name=None, subnet_id=None):
  '''
    Writes data to Cloud Monitoring custom metrics.
      Parameters:
@ -103,9 +104,10 @@ def write_data_to_metric(config, monitored_project_id, value, metric_name,
  series = monitoring_v3.TimeSeries()
  series.metric.type = f"custom.googleapis.com/{metric_name}"
  series.resource.type = "global"
-  series.metric.labels["network_name"] = network_name
  series.metric.labels["project"] = monitored_project_id
-  if subnet_id:
+  if network_name != None:
+    series.metric.labels["network_name"] = network_name
+  if subnet_id != None:
    series.metric.labels["subnet_id"] = subnet_id

  now = time.time()
--- a/blueprints/cloud-operations/network-dashboard/cloud-function/metrics/vpc_firewalls.py
+++ b/blueprints/cloud-operations/network-dashboard/cloud-function/metrics/vpc_firewalls.py
@ -0,0 +1,111 @@
+#
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import re
+from collections import defaultdict
+from pydoc import doc
+from collections import defaultdict
+from google.protobuf import field_mask_pb2
+from . import metrics, networks, limits, peerings, routers
+
+
+def get_firewalls_dict(config: dict):
+  '''
+    Calls the Asset Inventory API to get all VPC Firewall Rules under the GCP organization.
+
+      Parameters:
+        config (dict): The dict containing config like clients and limits
+      Returns:
+        firewalls_dict (dictionary of dictionary: int): Keys are projects, subkeys are networks, values count #of VPC Firewall Rules
+  '''
+
+  firewalls_dict = defaultdict(int)
+  read_mask = field_mask_pb2.FieldMask()
+  read_mask.FromJsonString('name,versionedResources')
+
+  response = config["clients"]["asset_client"].search_all_resources(
+      request={
+          "scope": f"organizations/{config['organization']}",
+          "asset_types": ["compute.googleapis.com/Firewall"],
+          "read_mask": read_mask,
+      })
+  for resource in response:
+    project_id = re.search("(compute.googleapis.com/projects/)([\w\-\d]+)",
+                           resource.name).group(2)
+    network_name = ""
+    for versioned in resource.versioned_resources:
+      for field_name, field_value in versioned.resource.items():
+        if field_name == "network":
+          network_name = re.search("[a-z0-9\-]*$", field_value).group(0)
+          firewalls_dict[project_id] = defaultdict(
+              int
+          ) if not project_id in firewalls_dict else firewalls_dict[project_id]
+          firewalls_dict[project_id][
+              network_name] = 1 if not network_name in firewalls_dict[
+                  project_id] else firewalls_dict[project_id][network_name] + 1
+          break
+      break
+  return firewalls_dict
+
+
+def get_firewalls_data(config, metrics_dict, project_quotas_dict,
+                       firewalls_dict):
+  '''
+    Gets the data for VPC Firewall Rules per VPC Network and writes it to the metric defined in vpc_firewalls_metric.
+
+      Parameters:
+        config (dict): The dict containing config like clients and limits
+        metrics_dict (dictionary of dictionary of string: string): metrics names and descriptions.
+        limit_dict (dictionary of string:int): Dictionary with the network link as key and the limit as value.
+        firewalls_dict (dictionary of dictionary): Keys are projects, subkeys are networks, values count #of VPC Firewall Rules
+      Returns:
+        None
+  '''
+  for project in config["monitored_projects"]:
+
+    current_quota_limit = project_quotas_dict[project]['global']["firewalls"]
+    if current_quota_limit is None:
+      print(
+          f"Could not write VPC firewal rules to metric for projects/{project} due to missing quotas"
+      )
+      continue
+
+    network_dict = networks.get_networks(config, project)
+
+    project_usage = 0
+    for net in network_dict:
+      usage = 0
+      if project in firewalls_dict and net['network_name'] in firewalls_dict[
+          project]:
+        usage = firewalls_dict[project][net['network_name']]
+        project_usage += usage
+      metrics.write_data_to_metric(
+          config, project, usage,
+          metrics_dict["metrics_per_project"][f"firewalls"]["usage"]["name"],
+          net['network_name'])
+
+    # firewall quotas are per project, not per single VPC
+    metrics.write_data_to_metric(
+        config, project, current_quota_limit['limit'],
+        metrics_dict["metrics_per_project"][f"firewalls"]["limit"]["name"])
+    metrics.write_data_to_metric(
+        config, project, project_usage / current_quota_limit['limit']
+        if current_quota_limit['limit'] != 0 else 0,
+        metrics_dict["metrics_per_project"][f"firewalls"]["utilization"]
+        ["name"])
+
+    print(
+        f"Wrote number of VPC Firewall Rules to metric for projects/{project}")
--- a/blueprints/cloud-operations/network-dashboard/dashboards/quotas-utilization.json
+++ b/blueprints/cloud-operations/network-dashboard/dashboards/quotas-utilization.json
@ -1,4 +1,5 @@
 {
+  "category": "CUSTOM",
  "displayName": "quotas_utilization",
  "mosaicLayout": {
    "columns": 12,
@ -17,6 +18,7 @@
                "plotType": "LINE",
                "targetAxis": "Y1",
                "timeSeriesQuery": {
+                  "apiSource": "DEFAULT_CLOUD",
                  "timeSeriesFilter": {
                    "aggregation": {
                      "alignmentPeriod": "3600s",
@ -38,7 +40,9 @@
            }
          }
        },
-        "width": 6
+        "width": 6,
+        "xPos": 0,
+        "yPos": 0
      },
      {
        "height": 4,
@ -54,6 +58,7 @@
                "plotType": "LINE",
                "targetAxis": "Y1",
                "timeSeriesQuery": {
+                  "apiSource": "DEFAULT_CLOUD",
                  "timeSeriesFilter": {
                    "aggregation": {
                      "alignmentPeriod": "3600s",
@ -76,6 +81,7 @@
          }
        },
        "width": 6,
+        "xPos": 0,
        "yPos": 12
      },
      {
@ -92,6 +98,7 @@
                "plotType": "LINE",
                "targetAxis": "Y1",
                "timeSeriesQuery": {
+                  "apiSource": "DEFAULT_CLOUD",
                  "timeSeriesFilter": {
                    "aggregation": {
                      "alignmentPeriod": "3600s",
@ -114,6 +121,7 @@
          }
        },
        "width": 6,
+        "xPos": 0,
        "yPos": 8
      },
      {
@ -130,6 +138,7 @@
                "plotType": "LINE",
                "targetAxis": "Y1",
                "timeSeriesQuery": {
+                  "apiSource": "DEFAULT_CLOUD",
                  "timeSeriesFilter": {
                    "aggregation": {
                      "alignmentPeriod": "3600s",
@ -169,6 +178,7 @@
                "plotType": "LINE",
                "targetAxis": "Y1",
                "timeSeriesQuery": {
+                  "apiSource": "DEFAULT_CLOUD",
                  "timeSeriesFilter": {
                    "aggregation": {
                      "alignmentPeriod": "3600s",
@ -191,6 +201,7 @@
          }
        },
        "width": 6,
+        "xPos": 0,
        "yPos": 4
      },
      {
@ -207,6 +218,7 @@
                "plotType": "LINE",
                "targetAxis": "Y1",
                "timeSeriesQuery": {
+                  "apiSource": "DEFAULT_CLOUD",
                  "timeSeriesFilter": {
                    "aggregation": {
                      "alignmentPeriod": "3600s",
@ -229,6 +241,7 @@
          }
        },
        "width": 6,
+        "xPos": 0,
        "yPos": 16
      },
      {
@ -245,6 +258,7 @@
                "plotType": "LINE",
                "targetAxis": "Y1",
                "timeSeriesQuery": {
+                  "apiSource": "DEFAULT_CLOUD",
                  "timeSeriesFilter": {
                    "aggregation": {
                      "alignmentPeriod": "3600s",
@ -267,7 +281,8 @@
          }
        },
        "width": 6,
-        "xPos": 6
+        "xPos": 6,
+        "yPos": 0
      },
      {
        "height": 4,
@ -283,6 +298,7 @@
                "plotType": "LINE",
                "targetAxis": "Y1",
                "timeSeriesQuery": {
+                  "apiSource": "DEFAULT_CLOUD",
                  "timeSeriesFilter": {
                    "aggregation": {
                      "alignmentPeriod": "3600s",
@ -322,6 +338,7 @@
                "plotType": "LINE",
                "targetAxis": "Y1",
                "timeSeriesQuery": {
+                  "apiSource": "DEFAULT_CLOUD",
                  "timeSeriesFilter": {
                    "aggregation": {
                      "alignmentPeriod": "3600s",
@ -357,6 +374,7 @@
                "plotType": "LINE",
                "targetAxis": "Y1",
                "timeSeriesQuery": {
+                  "apiSource": "DEFAULT_CLOUD",
                  "timeSeriesFilter": {
                    "aggregation": {
                      "alignmentPeriod": "60s",
@ -375,6 +393,97 @@
          }
        },
        "width": 6,
+        "xPos": 6,
+        "yPos": 16
+      },
+      {
+        "height": 4,
+        "widget": {
+          "title": "firewalls_per_project_vpc_usage",
+          "xyChart": {
+            "chartOptions": {
+              "mode": "COLOR"
+            },
+            "dataSets": [
+              {
+                "minAlignmentPeriod": "60s",
+                "plotType": "LINE",
+                "targetAxis": "Y1",
+                "timeSeriesQuery": {
+                  "apiSource": "DEFAULT_CLOUD",
+                  "timeSeriesFilter": {
+                    "aggregation": {
+                      "alignmentPeriod": "60s",
+                      "crossSeriesReducer": "REDUCE_SUM",
+                      "groupByFields": [
+                        "metric.label.\"project\""
+                      ],
+                      "perSeriesAligner": "ALIGN_MEAN"
+                    },
+                    "filter": "metric.type=\"custom.googleapis.com/firewalls_per_project_vpc_usage\" resource.type=\"global\"",
+                    "secondaryAggregation": {
+                      "alignmentPeriod": "60s",
+                      "perSeriesAligner": "ALIGN_NONE"
+                    }
+                  }
+                }
+              }
+            ],
+            "thresholds": [],
+            "timeshiftDuration": "0s",
+            "yAxis": {
+              "label": "y1Axis",
+              "scale": "LINEAR"
+            }
+          }
+        },
+        "width": 6,
+        "xPos": 0,
+        "yPos": 20
+      },
+      {
+        "height": 4,
+        "widget": {
+          "title": "firewalls_per_project_utilization",
+          "xyChart": {
+            "chartOptions": {
+              "mode": "COLOR"
+            },
+            "dataSets": [
+              {
+                "minAlignmentPeriod": "60s",
+                "plotType": "LINE",
+                "targetAxis": "Y1",
+                "timeSeriesQuery": {
+                  "apiSource": "DEFAULT_CLOUD",
+                  "timeSeriesFilter": {
+                    "aggregation": {
+                      "alignmentPeriod": "60s",
+                      "crossSeriesReducer": "REDUCE_MAX",
+                      "groupByFields": [
+                        "metric.label.\"project\""
+                      ],
+                      "perSeriesAligner": "ALIGN_MAX"
+                    },
+                    "filter": "metric.type=\"custom.googleapis.com/firewalls_per_project_utilization\" resource.type=\"global\"",
+                    "secondaryAggregation": {
+                      "alignmentPeriod": "60s",
+                      "perSeriesAligner": "ALIGN_NONE"
+                    }
+                  }
+                }
+              }
+            ],
+            "thresholds": [],
+            "timeshiftDuration": "0s",
+            "yAxis": {
+              "label": "y1Axis",
+              "scale": "LINEAR"
+            }
+          }
+        },
+        "width": 6,
+        "xPos": 6,
        "yPos": 20
      },
      {
@ -391,15 +500,13 @@
                "plotType": "LINE",
                "targetAxis": "Y1",
                "timeSeriesQuery": {
+                  "apiSource": "DEFAULT_CLOUD",
                  "timeSeriesFilter": {
                    "aggregation": {
                      "alignmentPeriod": "60s",
                      "perSeriesAligner": "ALIGN_MEAN"
                    },
-                    "filter": "metric.type=\"custom.googleapis.com/ip_addresses_per_subnet_utilization\" resource.type=\"global\"",
-                    "secondaryAggregation": {
-                      "alignmentPeriod": "60s"
-                    }
+                    "filter": "metric.type=\"custom.googleapis.com/ip_addresses_per_subnet_utilization\" resource.type=\"global\""
                  }
                }
              }
@ -412,48 +519,9 @@
          }
        },
        "width": 6,
-        "xPos": 6,
-        "yPos": 16
-      },
-      {
-        "height": 4,
-        "widget": {
-          "title": "dynamic_routes_ppg_utilization",
-          "xyChart": {
-            "chartOptions": {
-              "mode": "COLOR"
-            },
-            "dataSets": [
-              {
-                "minAlignmentPeriod": "60s",
-                "plotType": "LINE",
-                "targetAxis": "Y1",
-                "timeSeriesQuery": {
-                  "timeSeriesFilter": {
-                    "aggregation": {
-                      "alignmentPeriod": "60s",
-                      "perSeriesAligner": "ALIGN_MEAN"
-                    },
-                    "filter": "metric.type=\"custom.googleapis.com/dynamic_routes_per_peering_group_utilization\" resource.type=\"global\"",
-                    "secondaryAggregation": {
-                      "alignmentPeriod": "60s"
-                    }
-                  }
-                }
-              }
-            ],
-            "timeshiftDuration": "0s",
-            "yAxis": {
-              "label": "y1Axis",
-              "scale": "LINEAR"
-            }
-          }
-        },
-        "width": 6,
-        "xPos": 6,
-        "yPos": 20
+        "xPos": 0,
+        "yPos": 24
      }
    ]
-  },
-  "name": "projects/347834224817/dashboards/1bdcd06a-030d-4977-bf4b-f32231aa3b77"
+  }
 }