2022-04-28 02:48:48 -07:00
|
|
|
#
|
|
|
|
# Copyright 2022 Google LLC
|
|
|
|
#
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
#
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
#
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
|
|
|
#
|
|
|
|
|
2022-10-10 06:53:14 -07:00
|
|
|
from curses import KEY_MARK
|
|
|
|
import re
|
2022-04-28 02:48:48 -07:00
|
|
|
import time
|
|
|
|
import yaml
|
|
|
|
from google.api import metric_pb2 as ga_metric
|
|
|
|
from google.cloud import monitoring_v3
|
|
|
|
from . import peerings, limits, networks
|
|
|
|
|
2022-10-10 06:53:14 -07:00
|
|
|
|
2022-10-19 09:59:28 -07:00
|
|
|
def create_metrics(monitoring_project, config):
|
2022-04-28 02:48:48 -07:00
|
|
|
'''
|
|
|
|
Creates all Cloud Monitoring custom metrics based on the metric.yaml file
|
|
|
|
Parameters:
|
|
|
|
monitoring_project (string): the project where the metrics are written to
|
2022-10-19 09:59:28 -07:00
|
|
|
config (dict): The dict containing config like clients and limits
|
2022-04-28 02:48:48 -07:00
|
|
|
Returns:
|
|
|
|
metrics_dict (dictionary of dictionary of string: string): metrics names and descriptions
|
|
|
|
limits_dict (dictionary of dictionary of string: int): limits_dict[metric_name]: dict[network_name] = limit_value
|
|
|
|
'''
|
2022-10-19 09:59:28 -07:00
|
|
|
client = config["clients"]["monitoring_client"]
|
2022-04-28 02:48:48 -07:00
|
|
|
existing_metrics = []
|
|
|
|
for desc in client.list_metric_descriptors(name=monitoring_project):
|
|
|
|
existing_metrics.append(desc.type)
|
|
|
|
limits_dict = {}
|
|
|
|
|
2022-10-05 02:13:53 -07:00
|
|
|
with open("./metrics.yaml", 'r') as stream:
|
2022-04-28 02:48:48 -07:00
|
|
|
try:
|
|
|
|
metrics_dict = yaml.safe_load(stream)
|
|
|
|
|
|
|
|
for metric_list in metrics_dict.values():
|
2022-09-30 01:51:16 -07:00
|
|
|
for metric_name, metric in metric_list.items():
|
2022-04-28 02:48:48 -07:00
|
|
|
for sub_metric_key, sub_metric in metric.items():
|
|
|
|
metric_link = f"custom.googleapis.com/{sub_metric['name']}"
|
|
|
|
# If the metric doesn't exist yet, then we create it
|
|
|
|
if metric_link not in existing_metrics:
|
|
|
|
create_metric(sub_metric["name"], sub_metric["description"],
|
2022-10-19 09:59:28 -07:00
|
|
|
monitoring_project, config)
|
2022-09-30 01:51:16 -07:00
|
|
|
# Parse limits for network and peering group metrics
|
|
|
|
# Subnet level metrics have a different limit: the subnet IP range size
|
|
|
|
if sub_metric_key == "limit" and metric_name != "ip_usage_per_subnet":
|
2022-04-28 02:48:48 -07:00
|
|
|
limits_dict_for_metric = {}
|
2022-10-03 06:48:46 -07:00
|
|
|
if "values" in sub_metric:
|
|
|
|
for network_link, limit_value in sub_metric["values"].items():
|
|
|
|
limits_dict_for_metric[network_link] = limit_value
|
2022-04-28 02:48:48 -07:00
|
|
|
limits_dict[sub_metric["name"]] = limits_dict_for_metric
|
|
|
|
|
|
|
|
return metrics_dict, limits_dict
|
|
|
|
except yaml.YAMLError as exc:
|
|
|
|
print(exc)
|
|
|
|
|
|
|
|
|
2022-10-19 09:59:28 -07:00
|
|
|
def create_metric(metric_name, description, monitoring_project, config):
|
2022-04-28 02:48:48 -07:00
|
|
|
'''
|
|
|
|
Creates a Cloud Monitoring metric based on the parameter given if the metric is not already existing
|
|
|
|
Parameters:
|
|
|
|
metric_name (string): Name of the metric to be created
|
|
|
|
description (string): Description of the metric to be created
|
|
|
|
monitoring_project (string): the project where the metrics are written to
|
2022-10-19 09:59:28 -07:00
|
|
|
config (dict): The dict containing config like clients and limits
|
2022-04-28 02:48:48 -07:00
|
|
|
Returns:
|
|
|
|
None
|
|
|
|
'''
|
2022-10-19 09:59:28 -07:00
|
|
|
client = config["clients"]["monitoring_client"]
|
2022-04-28 02:48:48 -07:00
|
|
|
|
|
|
|
descriptor = ga_metric.MetricDescriptor()
|
|
|
|
descriptor.type = f"custom.googleapis.com/{metric_name}"
|
|
|
|
descriptor.metric_kind = ga_metric.MetricDescriptor.MetricKind.GAUGE
|
|
|
|
descriptor.value_type = ga_metric.MetricDescriptor.ValueType.DOUBLE
|
|
|
|
descriptor.description = description
|
|
|
|
descriptor = client.create_metric_descriptor(name=monitoring_project,
|
|
|
|
metric_descriptor=descriptor)
|
|
|
|
print("Created {}.".format(descriptor.name))
|
|
|
|
|
|
|
|
|
2022-10-10 06:53:14 -07:00
|
|
|
def append_data_to_series_buffer(config, metric_name, metric_value,
|
|
|
|
metric_labels, timestamp=None):
|
2022-04-28 02:48:48 -07:00
|
|
|
'''
|
2022-10-10 13:06:57 -07:00
|
|
|
Appends data to Cloud Monitoring custom metrics, using a buffer. buffer is flushed every BUFFER_LEN elements,
|
|
|
|
any unflushed series is discarded upon function closure
|
2022-04-28 02:48:48 -07:00
|
|
|
Parameters:
|
|
|
|
config (dict): The dict containing config like clients and limits
|
|
|
|
metric_name (string): Name of the metric
|
2022-10-10 06:53:14 -07:00
|
|
|
metric_value (int): Value for the data point of the metric.
|
|
|
|
matric_labels (dictionary of dictionary of string: string): metric labels names and values
|
|
|
|
timestamp (float): seconds since the epoch, in UTC
|
2022-04-28 02:48:48 -07:00
|
|
|
Returns:
|
|
|
|
usage (int): Current usage for that network.
|
|
|
|
limit (int): Current usage for that network.
|
|
|
|
'''
|
|
|
|
|
2022-10-19 09:59:28 -07:00
|
|
|
# Configurable buffer size to improve performance when writing datapoints to metrics
|
|
|
|
buffer_len = 10
|
|
|
|
|
2022-04-28 02:48:48 -07:00
|
|
|
series = monitoring_v3.TimeSeries()
|
|
|
|
series.metric.type = f"custom.googleapis.com/{metric_name}"
|
|
|
|
series.resource.type = "global"
|
2022-10-10 06:53:14 -07:00
|
|
|
|
|
|
|
for label_name in metric_labels:
|
|
|
|
if (metric_labels[label_name] != None):
|
|
|
|
series.metric.labels[label_name] = metric_labels[label_name]
|
|
|
|
|
|
|
|
timestamp = timestamp if timestamp != None else time.time()
|
|
|
|
seconds = int(timestamp)
|
|
|
|
nanos = int((timestamp - seconds) * 10**9)
|
2022-04-28 02:48:48 -07:00
|
|
|
interval = monitoring_v3.TimeInterval(
|
|
|
|
{"end_time": {
|
|
|
|
"seconds": seconds,
|
|
|
|
"nanos": nanos
|
|
|
|
}})
|
|
|
|
point = monitoring_v3.Point({
|
|
|
|
"interval": interval,
|
|
|
|
"value": {
|
2022-10-10 06:53:14 -07:00
|
|
|
"double_value": metric_value
|
2022-04-28 02:48:48 -07:00
|
|
|
}
|
|
|
|
})
|
|
|
|
series.points = [point]
|
|
|
|
|
|
|
|
# TODO: sometimes this cashes with 'DeadlineExceeded: 504 Deadline expired before operation could complete' error
|
|
|
|
# Implement exponential backoff retries?
|
2022-10-10 06:53:14 -07:00
|
|
|
config["series_buffer"].append(series)
|
2022-10-19 09:59:28 -07:00
|
|
|
if len(config["series_buffer"]) >= buffer_len:
|
2022-10-10 06:53:14 -07:00
|
|
|
flush_series_buffer(config)
|
|
|
|
|
|
|
|
|
|
|
|
def flush_series_buffer(config):
|
|
|
|
'''
|
2022-10-10 13:06:57 -07:00
|
|
|
writes buffered metrics to Google Cloud Monitoring, empties buffer upon both failure/success
|
2022-10-10 06:53:14 -07:00
|
|
|
config (dict): The dict containing config like clients and limits
|
|
|
|
'''
|
2022-04-28 02:48:48 -07:00
|
|
|
try:
|
2022-10-10 06:53:14 -07:00
|
|
|
if config["series_buffer"] and len(config["series_buffer"]) > 0:
|
2022-10-19 09:59:28 -07:00
|
|
|
client = config["clients"]["monitoring_client"]
|
2022-10-10 06:53:14 -07:00
|
|
|
client.create_time_series(name=config["monitoring_project_link"],
|
|
|
|
time_series=config["series_buffer"])
|
|
|
|
series_names = [
|
|
|
|
re.search("\/(.+$)", series.metric.type).group(1)
|
|
|
|
for series in config["series_buffer"]
|
|
|
|
]
|
|
|
|
print("Wrote time series: ", series_names)
|
2022-04-28 02:48:48 -07:00
|
|
|
except Exception as e:
|
2022-10-10 06:53:14 -07:00
|
|
|
print("Error while flushing series buffer")
|
2022-04-28 02:48:48 -07:00
|
|
|
print(e)
|
|
|
|
|
2022-10-10 06:53:14 -07:00
|
|
|
config["series_buffer"] = []
|
|
|
|
|
2022-04-28 02:48:48 -07:00
|
|
|
|
|
|
|
def get_pgg_data(config, metric_dict, usage_dict, limit_metric, limit_dict):
|
|
|
|
'''
|
|
|
|
This function gets the usage, limit and utilization per VPC peering group for a specific metric for all projects to be monitored.
|
|
|
|
Parameters:
|
|
|
|
config (dict): The dict containing config like clients and limits
|
|
|
|
metric_dict (dictionary of string: string): Dictionary with the metric names and description, that will be used to populate the metrics
|
|
|
|
usage_dict (dictionnary of string:int): Dictionary with the network link as key and the number of resources as value
|
|
|
|
limit_metric (string): Name of the existing GCP metric for limit per VPC network
|
|
|
|
limit_dict (dictionary of string:int): Dictionary with the network link as key and the limit as value
|
|
|
|
Returns:
|
|
|
|
None
|
|
|
|
'''
|
2022-10-10 06:53:14 -07:00
|
|
|
for project_id in config["monitored_projects"]:
|
|
|
|
network_dict_list = peerings.gather_peering_data(config, project_id)
|
2022-04-28 02:48:48 -07:00
|
|
|
# Network dict list is a list of dictionary (one for each network)
|
|
|
|
# For each network, this dictionary contains:
|
|
|
|
# project_id, network_name, network_id, usage, limit, peerings (list of peered networks)
|
|
|
|
# peerings is a list of dictionary (one for each peered network) and contains:
|
|
|
|
# project_id, network_name, network_id
|
|
|
|
current_quota_limit = limits.get_quota_current_limit(
|
2022-10-10 06:53:14 -07:00
|
|
|
config, f"projects/{project_id}", limit_metric)
|
2022-04-28 02:48:48 -07:00
|
|
|
if current_quota_limit is None:
|
|
|
|
print(
|
2022-10-10 06:53:14 -07:00
|
|
|
f"Could not determine number of L7 forwarding rules to metric for projects/{project_id} due to missing quotas"
|
2022-04-28 02:48:48 -07:00
|
|
|
)
|
|
|
|
continue
|
|
|
|
|
|
|
|
current_quota_limit_view = customize_quota_view(current_quota_limit)
|
|
|
|
|
2022-10-12 06:05:58 -07:00
|
|
|
timestamp = time.time()
|
2022-04-28 02:48:48 -07:00
|
|
|
# For each network in this GCP project
|
|
|
|
for network_dict in network_dict_list:
|
|
|
|
if network_dict['network_id'] == 0:
|
|
|
|
print(
|
2022-10-10 06:53:14 -07:00
|
|
|
f"Could not determine {metric_dict['usage']['name']} for peering group {network_dict['network_name']} in {project_id} due to missing permissions."
|
2022-04-28 02:48:48 -07:00
|
|
|
)
|
|
|
|
continue
|
2022-10-10 06:53:14 -07:00
|
|
|
network_link = f"https://www.googleapis.com/compute/v1/projects/{project_id}/global/networks/{network_dict['network_name']}"
|
2022-04-28 02:48:48 -07:00
|
|
|
|
|
|
|
limit = networks.get_limit_network(network_dict, network_link,
|
|
|
|
current_quota_limit_view, limit_dict)
|
|
|
|
|
|
|
|
usage = 0
|
|
|
|
if network_link in usage_dict:
|
|
|
|
usage = usage_dict[network_link]
|
|
|
|
|
|
|
|
# Here we add usage and limit to the network dictionary
|
|
|
|
network_dict["usage"] = usage
|
|
|
|
network_dict["limit"] = limit
|
|
|
|
|
|
|
|
# For every peered network, get usage and limits
|
|
|
|
for peered_network_dict in network_dict['peerings']:
|
|
|
|
peered_network_link = f"https://www.googleapis.com/compute/v1/projects/{peered_network_dict['project_id']}/global/networks/{peered_network_dict['network_name']}"
|
|
|
|
peered_usage = 0
|
|
|
|
if peered_network_link in usage_dict:
|
|
|
|
peered_usage = usage_dict[peered_network_link]
|
|
|
|
|
|
|
|
current_peered_quota_limit = limits.get_quota_current_limit(
|
|
|
|
config, f"projects/{peered_network_dict['project_id']}",
|
|
|
|
limit_metric)
|
|
|
|
if current_peered_quota_limit is None:
|
|
|
|
print(
|
2022-10-10 06:53:14 -07:00
|
|
|
f"Could not determine metrics for peering to projects/{peered_network_dict['project_id']} due to missing quotas"
|
2022-04-28 02:48:48 -07:00
|
|
|
)
|
|
|
|
continue
|
|
|
|
|
|
|
|
peering_project_limit = customize_quota_view(current_peered_quota_limit)
|
|
|
|
|
|
|
|
peered_limit = networks.get_limit_network(peered_network_dict,
|
|
|
|
peered_network_link,
|
|
|
|
peering_project_limit,
|
|
|
|
limit_dict)
|
|
|
|
# Here we add usage and limit to the peered network dictionary
|
|
|
|
peered_network_dict["usage"] = peered_usage
|
|
|
|
peered_network_dict["limit"] = peered_limit
|
|
|
|
|
2022-10-10 06:53:14 -07:00
|
|
|
limits.count_effective_limit(config, project_id, network_dict,
|
2022-04-28 02:48:48 -07:00
|
|
|
metric_dict["usage"]["name"],
|
|
|
|
metric_dict["limit"]["name"],
|
|
|
|
metric_dict["utilization"]["name"],
|
2022-10-12 06:05:58 -07:00
|
|
|
limit_dict, timestamp)
|
2022-04-28 02:48:48 -07:00
|
|
|
print(
|
2022-10-10 13:06:57 -07:00
|
|
|
f"Buffered {metric_dict['usage']['name']} for peering group {network_dict['network_name']} in {project_id}"
|
2022-04-28 02:48:48 -07:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
def customize_quota_view(quota_results):
|
|
|
|
'''
|
|
|
|
Customize the quota output for an easier parsable output.
|
|
|
|
Parameters:
|
|
|
|
quota_results (string): Input from get_quota_current_usage or get_quota_current_limit. Contains the Current usage or limit for all networks in that project.
|
|
|
|
Returns:
|
|
|
|
quotaViewList (list of dictionaries of string: string): Current quota usage or limit.
|
|
|
|
'''
|
|
|
|
quotaViewList = []
|
|
|
|
for result in quota_results:
|
|
|
|
quotaViewJson = {}
|
|
|
|
quotaViewJson.update(dict(result.resource.labels))
|
|
|
|
quotaViewJson.update(dict(result.metric.labels))
|
|
|
|
for val in result.points:
|
|
|
|
quotaViewJson.update({'value': val.value.int64_value})
|
|
|
|
quotaViewList.append(quotaViewJson)
|
2022-09-30 01:51:16 -07:00
|
|
|
return quotaViewList
|