Maunope/network added firewall metrics (#856)

* support for project level VPC firewall metrics

* updated GCP networking dashboard (.json file)

Co-authored-by: Maurizio Noseda Pedraglio <mnoseda@google.com>, Aurélien Legrand <aurelien.legrand01@gmail.com>
This commit is contained in:
maunope 2022-10-10 10:46:22 +02:00 committed by GitHub
parent e13bce1147
commit d5b42d5378
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 315 additions and 60 deletions

View File

@ -9,7 +9,7 @@ Here is an example of dashboard you can get with this solution:
Here you see utilization (usage compared to the limit) for a specific metric (number of instances per VPC) for multiple VPCs and projects.
3 metrics are created: Usage, limit and utilization. You can follow each of these and create alerting policies if a threshold is reached.
Three metric descriptors are created for each monitored resource: usage, limit and utilization. You can follow each of these and create alerting policies if a threshold is reached.
## Usage
@ -45,6 +45,7 @@ The Cloud Function currently tracks usage, limit and utilization of:
- Dynamic routes per VPC
- Dynamic routes per VPC peering group
- IP utilization per subnet (% of IP addresses used in a subnet)
- VPC firewall rules per project (VPC drill down is available for usage)
It writes this values to custom metrics in Cloud Monitoring and creates a dashboard to visualize the current utilization of these metrics in Cloud Monitoring.

View File

@ -20,7 +20,7 @@ import time
from google.cloud import monitoring_v3, asset_v1
from google.protobuf import field_mask_pb2
from googleapiclient import discovery
from metrics import ilb_fwrules, instances, networks, metrics, limits, peerings, routes, subnets
from metrics import ilb_fwrules, instances, networks, metrics, limits, peerings, routes, subnets, vpc_firewalls
def get_monitored_projects_list(config):
@ -33,7 +33,7 @@ def get_monitored_projects_list(config):
monitored_projects (List of strings): Full list of projects to be monitored
'''
monitored_projects = config["monitored_projects"]
monitored_folders = os.environ.get("MONITORED_FOLDERS_LIST").split(",")
monitored_folders = [] #os.environ.get("MONITORED_FOLDERS_LIST").split(",")
# Handling empty monitored folders list
if monitored_folders == ['']:
@ -94,7 +94,7 @@ config = {
# list of projects from which function will get quotas information
"monitored_projects":
os.environ.get("MONITORED_PROJECTS_LIST").split(","),
"monitoring_project_link":
"monitoring_project":
os.environ.get('MONITORING_PROJECT_ID'),
"monitoring_project_link":
f"projects/{os.environ.get('MONITORING_PROJECT_ID')}",
@ -143,6 +143,9 @@ def main(event, context):
metrics_dict, limits_dict = metrics.create_metrics(
config["monitoring_project_link"])
project_quotas_dict = limits.get_quota_project_limit(config)
firewalls_dict = vpc_firewalls.get_firewalls_dict(config)
# IP utilization subnet level metrics
subnets.get_subnets(config, metrics_dict)
@ -153,6 +156,10 @@ def main(event, context):
l7_forwarding_rules_dict = ilb_fwrules.get_forwarding_rules_dict(config, "L7")
subnet_range_dict = networks.get_subnet_ranges_dict(config)
# Per Project metrics
vpc_firewalls.get_firewalls_data(config, metrics_dict, project_quotas_dict,
firewalls_dict)
# Per Network metrics
instances.get_gce_instances_data(config, metrics_dict, gce_instance_dict,
limits_dict['number_of_instances_limit'])
@ -197,4 +204,4 @@ def main(event, context):
if __name__ == "__main__":
main(None, None)
main(None, None)

View File

@ -159,4 +159,16 @@ metrics_per_peering_group:
default_value: 300
utilization:
name: dynamic_routes_per_peering_group_utilization
description: Number of Dynamic routes per peering group - utilization.
description: Number of Dynamic routes per peering group - utilization.
metrics_per_project:
firewalls:
usage:
name: firewalls_per_project_vpc_usage
description: Number of VPC firewall rules in a project - usage.
limit:
# Firewalls limit is per project and we get the limit for the GCP quota API in vpc_firewalls.py
name: firewalls_per_project_limit
description: Number of VPC firewall rules in a project - limit.
utilization:
name: firewalls_per_project_utilization
description: Number of VPC firewall rules in a project - utilization.

View File

@ -19,6 +19,60 @@ from google.cloud import monitoring_v3
from . import metrics
def get_quotas_dict(quotas_list):
'''
Creates a dictionary of quotas from a list, with lower case quota name as keys
Parameters:
quotas_array (array): array of quotas
Returns:
quotas_dict (dict): dictionary of quotas
'''
quota_keys = [q['metric'] for q in quotas_list]
quotas_dict = dict()
i = 0
for key in quota_keys:
if ("metric" in quotas_list[i]):
del (quotas_list[i]["metric"])
quotas_dict[key.lower()] = quotas_list[i]
i += 1
return quotas_dict
def get_quota_project_limit(config, regions=["global"]):
'''
Retrieves limit for a specific project quota
Parameters:
project_link (string): Project link.
Returns:
quotas (dict): quotas for all selected regions, default 'global'
'''
try:
request = {}
quotas = dict()
for project in config["monitored_projects"]:
quotas[project] = dict()
if regions != ["global"]:
for region in regions:
request = config["clients"]["discovery_client"].compute.regions().get(
region=region, project=project)
response = request.execute()
quotas[project][region] = get_quotas_dict(response['quotas'])
else:
region = "global"
request = config["clients"]["discovery_client"].projects().get(
project=project, fields="quotas")
response = request.execute()
quotas[project][region] = get_quotas_dict(response['quotas'])
return quotas
except exceptions.PermissionDenied as err:
print(
f"Warning: error reading quotas for {project}. " +
f"This can happen if you don't have permissions on the project, for example if the project is in another organization or a Google managed project"
)
return None
def get_ppg(network_link, limit_dict):
'''
Checks if this network has a specific limit for a metric, if so, returns that limit, if not, returns the default limit.

View File

@ -36,7 +36,7 @@ def create_metrics(monitoring_project):
existing_metrics.append(desc.type)
limits_dict = {}
with open("metrics.yaml", 'r') as stream:
with open("./metrics.yaml", 'r') as stream:
try:
metrics_dict = yaml.safe_load(stream)
@ -52,8 +52,9 @@ def create_metrics(monitoring_project):
# Subnet level metrics have a different limit: the subnet IP range size
if sub_metric_key == "limit" and metric_name != "ip_usage_per_subnet":
limits_dict_for_metric = {}
for network_link, limit_value in sub_metric["values"].items():
limits_dict_for_metric[network_link] = limit_value
if "values" in sub_metric:
for network_link, limit_value in sub_metric["values"].items():
limits_dict_for_metric[network_link] = limit_value
limits_dict[sub_metric["name"]] = limits_dict_for_metric
return metrics_dict, limits_dict
@ -84,7 +85,7 @@ def create_metric(metric_name, description, monitoring_project):
def write_data_to_metric(config, monitored_project_id, value, metric_name,
network_name, subnet_id=None):
network_name=None, subnet_id=None):
'''
Writes data to Cloud Monitoring custom metrics.
Parameters:
@ -103,9 +104,10 @@ def write_data_to_metric(config, monitored_project_id, value, metric_name,
series = monitoring_v3.TimeSeries()
series.metric.type = f"custom.googleapis.com/{metric_name}"
series.resource.type = "global"
series.metric.labels["network_name"] = network_name
series.metric.labels["project"] = monitored_project_id
if subnet_id:
if network_name != None:
series.metric.labels["network_name"] = network_name
if subnet_id != None:
series.metric.labels["subnet_id"] = subnet_id
now = time.time()

View File

@ -0,0 +1,111 @@
#
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import re
from collections import defaultdict
from pydoc import doc
from collections import defaultdict
from google.protobuf import field_mask_pb2
from . import metrics, networks, limits, peerings, routers
def get_firewalls_dict(config: dict):
'''
Calls the Asset Inventory API to get all VPC Firewall Rules under the GCP organization.
Parameters:
config (dict): The dict containing config like clients and limits
Returns:
firewalls_dict (dictionary of dictionary: int): Keys are projects, subkeys are networks, values count #of VPC Firewall Rules
'''
firewalls_dict = defaultdict(int)
read_mask = field_mask_pb2.FieldMask()
read_mask.FromJsonString('name,versionedResources')
response = config["clients"]["asset_client"].search_all_resources(
request={
"scope": f"organizations/{config['organization']}",
"asset_types": ["compute.googleapis.com/Firewall"],
"read_mask": read_mask,
})
for resource in response:
project_id = re.search("(compute.googleapis.com/projects/)([\w\-\d]+)",
resource.name).group(2)
network_name = ""
for versioned in resource.versioned_resources:
for field_name, field_value in versioned.resource.items():
if field_name == "network":
network_name = re.search("[a-z0-9\-]*$", field_value).group(0)
firewalls_dict[project_id] = defaultdict(
int
) if not project_id in firewalls_dict else firewalls_dict[project_id]
firewalls_dict[project_id][
network_name] = 1 if not network_name in firewalls_dict[
project_id] else firewalls_dict[project_id][network_name] + 1
break
break
return firewalls_dict
def get_firewalls_data(config, metrics_dict, project_quotas_dict,
firewalls_dict):
'''
Gets the data for VPC Firewall Rules per VPC Network and writes it to the metric defined in vpc_firewalls_metric.
Parameters:
config (dict): The dict containing config like clients and limits
metrics_dict (dictionary of dictionary of string: string): metrics names and descriptions.
limit_dict (dictionary of string:int): Dictionary with the network link as key and the limit as value.
firewalls_dict (dictionary of dictionary): Keys are projects, subkeys are networks, values count #of VPC Firewall Rules
Returns:
None
'''
for project in config["monitored_projects"]:
current_quota_limit = project_quotas_dict[project]['global']["firewalls"]
if current_quota_limit is None:
print(
f"Could not write VPC firewal rules to metric for projects/{project} due to missing quotas"
)
continue
network_dict = networks.get_networks(config, project)
project_usage = 0
for net in network_dict:
usage = 0
if project in firewalls_dict and net['network_name'] in firewalls_dict[
project]:
usage = firewalls_dict[project][net['network_name']]
project_usage += usage
metrics.write_data_to_metric(
config, project, usage,
metrics_dict["metrics_per_project"][f"firewalls"]["usage"]["name"],
net['network_name'])
# firewall quotas are per project, not per single VPC
metrics.write_data_to_metric(
config, project, current_quota_limit['limit'],
metrics_dict["metrics_per_project"][f"firewalls"]["limit"]["name"])
metrics.write_data_to_metric(
config, project, project_usage / current_quota_limit['limit']
if current_quota_limit['limit'] != 0 else 0,
metrics_dict["metrics_per_project"][f"firewalls"]["utilization"]
["name"])
print(
f"Wrote number of VPC Firewall Rules to metric for projects/{project}")

View File

@ -1,4 +1,5 @@
{
"category": "CUSTOM",
"displayName": "quotas_utilization",
"mosaicLayout": {
"columns": 12,
@ -17,6 +18,7 @@
"plotType": "LINE",
"targetAxis": "Y1",
"timeSeriesQuery": {
"apiSource": "DEFAULT_CLOUD",
"timeSeriesFilter": {
"aggregation": {
"alignmentPeriod": "3600s",
@ -38,7 +40,9 @@
}
}
},
"width": 6
"width": 6,
"xPos": 0,
"yPos": 0
},
{
"height": 4,
@ -54,6 +58,7 @@
"plotType": "LINE",
"targetAxis": "Y1",
"timeSeriesQuery": {
"apiSource": "DEFAULT_CLOUD",
"timeSeriesFilter": {
"aggregation": {
"alignmentPeriod": "3600s",
@ -76,6 +81,7 @@
}
},
"width": 6,
"xPos": 0,
"yPos": 12
},
{
@ -92,6 +98,7 @@
"plotType": "LINE",
"targetAxis": "Y1",
"timeSeriesQuery": {
"apiSource": "DEFAULT_CLOUD",
"timeSeriesFilter": {
"aggregation": {
"alignmentPeriod": "3600s",
@ -114,6 +121,7 @@
}
},
"width": 6,
"xPos": 0,
"yPos": 8
},
{
@ -130,6 +138,7 @@
"plotType": "LINE",
"targetAxis": "Y1",
"timeSeriesQuery": {
"apiSource": "DEFAULT_CLOUD",
"timeSeriesFilter": {
"aggregation": {
"alignmentPeriod": "3600s",
@ -169,6 +178,7 @@
"plotType": "LINE",
"targetAxis": "Y1",
"timeSeriesQuery": {
"apiSource": "DEFAULT_CLOUD",
"timeSeriesFilter": {
"aggregation": {
"alignmentPeriod": "3600s",
@ -191,6 +201,7 @@
}
},
"width": 6,
"xPos": 0,
"yPos": 4
},
{
@ -207,6 +218,7 @@
"plotType": "LINE",
"targetAxis": "Y1",
"timeSeriesQuery": {
"apiSource": "DEFAULT_CLOUD",
"timeSeriesFilter": {
"aggregation": {
"alignmentPeriod": "3600s",
@ -229,6 +241,7 @@
}
},
"width": 6,
"xPos": 0,
"yPos": 16
},
{
@ -245,6 +258,7 @@
"plotType": "LINE",
"targetAxis": "Y1",
"timeSeriesQuery": {
"apiSource": "DEFAULT_CLOUD",
"timeSeriesFilter": {
"aggregation": {
"alignmentPeriod": "3600s",
@ -267,7 +281,8 @@
}
},
"width": 6,
"xPos": 6
"xPos": 6,
"yPos": 0
},
{
"height": 4,
@ -283,6 +298,7 @@
"plotType": "LINE",
"targetAxis": "Y1",
"timeSeriesQuery": {
"apiSource": "DEFAULT_CLOUD",
"timeSeriesFilter": {
"aggregation": {
"alignmentPeriod": "3600s",
@ -322,6 +338,7 @@
"plotType": "LINE",
"targetAxis": "Y1",
"timeSeriesQuery": {
"apiSource": "DEFAULT_CLOUD",
"timeSeriesFilter": {
"aggregation": {
"alignmentPeriod": "3600s",
@ -357,6 +374,7 @@
"plotType": "LINE",
"targetAxis": "Y1",
"timeSeriesQuery": {
"apiSource": "DEFAULT_CLOUD",
"timeSeriesFilter": {
"aggregation": {
"alignmentPeriod": "60s",
@ -375,6 +393,97 @@
}
},
"width": 6,
"xPos": 6,
"yPos": 16
},
{
"height": 4,
"widget": {
"title": "firewalls_per_project_vpc_usage",
"xyChart": {
"chartOptions": {
"mode": "COLOR"
},
"dataSets": [
{
"minAlignmentPeriod": "60s",
"plotType": "LINE",
"targetAxis": "Y1",
"timeSeriesQuery": {
"apiSource": "DEFAULT_CLOUD",
"timeSeriesFilter": {
"aggregation": {
"alignmentPeriod": "60s",
"crossSeriesReducer": "REDUCE_SUM",
"groupByFields": [
"metric.label.\"project\""
],
"perSeriesAligner": "ALIGN_MEAN"
},
"filter": "metric.type=\"custom.googleapis.com/firewalls_per_project_vpc_usage\" resource.type=\"global\"",
"secondaryAggregation": {
"alignmentPeriod": "60s",
"perSeriesAligner": "ALIGN_NONE"
}
}
}
}
],
"thresholds": [],
"timeshiftDuration": "0s",
"yAxis": {
"label": "y1Axis",
"scale": "LINEAR"
}
}
},
"width": 6,
"xPos": 0,
"yPos": 20
},
{
"height": 4,
"widget": {
"title": "firewalls_per_project_utilization",
"xyChart": {
"chartOptions": {
"mode": "COLOR"
},
"dataSets": [
{
"minAlignmentPeriod": "60s",
"plotType": "LINE",
"targetAxis": "Y1",
"timeSeriesQuery": {
"apiSource": "DEFAULT_CLOUD",
"timeSeriesFilter": {
"aggregation": {
"alignmentPeriod": "60s",
"crossSeriesReducer": "REDUCE_MAX",
"groupByFields": [
"metric.label.\"project\""
],
"perSeriesAligner": "ALIGN_MAX"
},
"filter": "metric.type=\"custom.googleapis.com/firewalls_per_project_utilization\" resource.type=\"global\"",
"secondaryAggregation": {
"alignmentPeriod": "60s",
"perSeriesAligner": "ALIGN_NONE"
}
}
}
}
],
"thresholds": [],
"timeshiftDuration": "0s",
"yAxis": {
"label": "y1Axis",
"scale": "LINEAR"
}
}
},
"width": 6,
"xPos": 6,
"yPos": 20
},
{
@ -391,15 +500,13 @@
"plotType": "LINE",
"targetAxis": "Y1",
"timeSeriesQuery": {
"apiSource": "DEFAULT_CLOUD",
"timeSeriesFilter": {
"aggregation": {
"alignmentPeriod": "60s",
"perSeriesAligner": "ALIGN_MEAN"
},
"filter": "metric.type=\"custom.googleapis.com/ip_addresses_per_subnet_utilization\" resource.type=\"global\"",
"secondaryAggregation": {
"alignmentPeriod": "60s"
}
"filter": "metric.type=\"custom.googleapis.com/ip_addresses_per_subnet_utilization\" resource.type=\"global\""
}
}
}
@ -412,48 +519,9 @@
}
},
"width": 6,
"xPos": 6,
"yPos": 16
},
{
"height": 4,
"widget": {
"title": "dynamic_routes_ppg_utilization",
"xyChart": {
"chartOptions": {
"mode": "COLOR"
},
"dataSets": [
{
"minAlignmentPeriod": "60s",
"plotType": "LINE",
"targetAxis": "Y1",
"timeSeriesQuery": {
"timeSeriesFilter": {
"aggregation": {
"alignmentPeriod": "60s",
"perSeriesAligner": "ALIGN_MEAN"
},
"filter": "metric.type=\"custom.googleapis.com/dynamic_routes_per_peering_group_utilization\" resource.type=\"global\"",
"secondaryAggregation": {
"alignmentPeriod": "60s"
}
}
}
}
],
"timeshiftDuration": "0s",
"yAxis": {
"label": "y1Axis",
"scale": "LINEAR"
}
}
},
"width": 6,
"xPos": 6,
"yPos": 20
"xPos": 0,
"yPos": 24
}
]
},
"name": "projects/347834224817/dashboards/1bdcd06a-030d-4977-bf4b-f32231aa3b77"
}
}