227 lines
7.7 KiB
Python
Executable File
227 lines
7.7 KiB
Python
Executable File
#! /usr/bin/env python3
|
|
# Copyright 2022 Google LLC
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
"""Sync GCE quota usage to Stackdriver for multiple projects.
|
|
|
|
This tool fetches global and/or regional quotas from the GCE API for
|
|
multiple projects, and sends them to Stackdriver as custom metrics, where they
|
|
can be used to set alert policies or create charts.
|
|
"""
|
|
|
|
import base64
|
|
import datetime
|
|
import json
|
|
import logging
|
|
import os
|
|
import time
|
|
import warnings
|
|
|
|
import click
|
|
|
|
from google.api_core.exceptions import GoogleAPIError
|
|
from google.api import label_pb2 as ga_label
|
|
from google.api import metric_pb2 as ga_metric
|
|
from google.cloud import monitoring_v3
|
|
|
|
import googleapiclient.discovery
|
|
import googleapiclient.errors
|
|
|
|
_BATCH_SIZE = 5
|
|
_METRIC_KIND = ga_metric.MetricDescriptor.MetricKind.GAUGE
|
|
_METRIC_TYPE_STEM = 'custom.googleapis.com/quota/'
|
|
|
|
_USAGE = "usage"
|
|
_LIMIT = "limit"
|
|
_UTILIZATION = "utilization"
|
|
|
|
|
|
def _add_series(project_id, series, client=None):
|
|
"""Write metrics series to Stackdriver.
|
|
|
|
Args:
|
|
project_id: series will be written to this project id's account
|
|
series: the time series to be written, as a list of
|
|
monitoring_v3.types.TimeSeries instances
|
|
client: optional monitoring_v3.MetricServiceClient will be used
|
|
instead of obtaining a new one
|
|
"""
|
|
client = client or monitoring_v3.MetricServiceClient()
|
|
project_name = client.common_project_path(project_id)
|
|
if isinstance(series, monitoring_v3.types.TimeSeries):
|
|
series = [series]
|
|
try:
|
|
client.create_time_series(name=project_name, time_series=series)
|
|
except GoogleAPIError as e:
|
|
raise RuntimeError('Error from monitoring API: %s' % e)
|
|
|
|
|
|
def _configure_logging(verbose=True):
|
|
"""Basic logging configuration.
|
|
|
|
Args:
|
|
verbose: enable verbose logging
|
|
"""
|
|
level = logging.DEBUG if verbose else logging.INFO
|
|
logging.basicConfig(level=level)
|
|
warnings.filterwarnings('ignore', r'.*end user credentials.*', UserWarning)
|
|
|
|
|
|
def _fetch_quotas(project, region='global', compute=None):
|
|
"""Fetch GCE per - project or per - region quotas from the API.
|
|
|
|
Args:
|
|
project: fetch global or regional quotas for this project id
|
|
region: which quotas to fetch, 'global' or region name
|
|
compute: optional instance of googleapiclient.discovery.build will be used
|
|
instead of obtaining a new one
|
|
"""
|
|
compute = compute or googleapiclient.discovery.build('compute', 'v1')
|
|
try:
|
|
if region != 'global':
|
|
req = compute.regions().get(project=project, region=region)
|
|
else:
|
|
req = compute.projects().get(project=project)
|
|
resp = req.execute()
|
|
return resp['quotas']
|
|
except (GoogleAPIError, googleapiclient.errors.HttpError) as e:
|
|
logging.debug('API Error: %s', e, exc_info=True)
|
|
raise RuntimeError('Error fetching quota (project: %s, region: %s)' %
|
|
(project, region))
|
|
|
|
|
|
def _get_series(metric_labels, value, metric_type, timestamp, dt=None):
|
|
"""Create a Stackdriver monitoring time series from value and labels.
|
|
|
|
Args:
|
|
metric_labels: dict with labels that will be used in the time series
|
|
value: time series value
|
|
metric_type: which metric is this series for
|
|
dt: datetime.datetime instance used for the series end time
|
|
"""
|
|
series = monitoring_v3.types.TimeSeries()
|
|
series.metric.type = metric_type
|
|
series.resource.type = 'global'
|
|
for label in metric_labels:
|
|
series.metric.labels[label] = metric_labels[label]
|
|
point = monitoring_v3.types.Point()
|
|
point.value.double_value = value
|
|
|
|
seconds = int(timestamp)
|
|
nanos = int((timestamp - seconds) * 10**9)
|
|
interval = monitoring_v3.TimeInterval(
|
|
{"end_time": {
|
|
"seconds": seconds,
|
|
"nanos": nanos
|
|
}})
|
|
point.interval = interval
|
|
|
|
series.points.append(point)
|
|
return series
|
|
|
|
|
|
def _quota_to_series_triplet(project, region, quota):
|
|
"""Convert API quota objects to three Stackdriver monitoring time series: usage, limit and utilization
|
|
|
|
Args:
|
|
project: set in converted time series labels
|
|
region: set in converted time series labels
|
|
quota: quota object received from the GCE API
|
|
"""
|
|
labels = dict()
|
|
labels['project'] = project
|
|
labels['region'] = region
|
|
|
|
try:
|
|
utilization = quota['usage'] / float(quota['limit'])
|
|
except ZeroDivisionError:
|
|
utilization = 0
|
|
now = time.time()
|
|
metric_type_prefix = _METRIC_TYPE_STEM + quota['metric'].lower() + '_'
|
|
return [
|
|
_get_series(labels, quota['usage'], metric_type_prefix + _USAGE, now),
|
|
_get_series(labels, quota['limit'], metric_type_prefix + _LIMIT, now),
|
|
_get_series(labels, utilization, metric_type_prefix + _UTILIZATION, now),
|
|
]
|
|
|
|
|
|
@click.command()
|
|
@click.option('--monitoring-project', required=True,
|
|
help='monitoring project id')
|
|
@click.option('--gce-project', multiple=True,
|
|
help='project ids (multiple), defaults to monitoring project')
|
|
@click.option('--gce-region', multiple=True,
|
|
help='regions (multiple), defaults to "global"')
|
|
@click.option('--verbose', is_flag=True, help='Verbose output')
|
|
@click.argument('keywords', nargs=-1)
|
|
def main_cli(monitoring_project=None, gce_project=None, gce_region=None,
|
|
verbose=False, keywords=None):
|
|
"""Fetch GCE quotas and writes them as custom metrics to Stackdriver.
|
|
|
|
If KEYWORDS are specified as arguments, only quotas matching one of the
|
|
keywords will be stored in Stackdriver.
|
|
"""
|
|
try:
|
|
_main(monitoring_project, gce_project, gce_region, verbose, keywords)
|
|
except RuntimeError:
|
|
logging.exception('exception raised')
|
|
|
|
|
|
def main(event, context):
|
|
"""Cloud Function entry point."""
|
|
try:
|
|
data = json.loads(base64.b64decode(event['data']).decode('utf-8'))
|
|
_main(os.environ.get('GCP_PROJECT'), **data)
|
|
# uncomment once https://issuetracker.google.com/issues/155215191 is fixed
|
|
# except RuntimeError:
|
|
# raise
|
|
except Exception:
|
|
logging.exception('exception in cloud function entry point')
|
|
|
|
|
|
def _main(monitoring_project, gce_project=None, gce_region=None, verbose=False,
|
|
keywords=None):
|
|
"""Module entry point used by cli and cloud function wrappers."""
|
|
_configure_logging(verbose=verbose)
|
|
gce_projects = gce_project or [monitoring_project]
|
|
gce_regions = gce_region or ['global']
|
|
keywords = set(keywords or [])
|
|
logging.debug('monitoring project %s', monitoring_project)
|
|
logging.debug('projects %s regions %s', gce_projects, gce_regions)
|
|
logging.debug('keywords %s', keywords)
|
|
quotas = []
|
|
compute = googleapiclient.discovery.build('compute', 'v1',
|
|
cache_discovery=False)
|
|
for project in gce_projects:
|
|
logging.debug('project %s', project)
|
|
for region in gce_regions:
|
|
logging.debug('region %s', region)
|
|
for quota in _fetch_quotas(project, region, compute=compute):
|
|
if keywords and not any(k in quota['metric'] for k in keywords):
|
|
# logging.debug('skipping %s', quota)
|
|
continue
|
|
logging.debug('quota %s', quota)
|
|
quotas.append((project, region, quota))
|
|
client, i = monitoring_v3.MetricServiceClient(), 0
|
|
|
|
x = len(quotas)
|
|
while i < len(quotas):
|
|
series = sum(
|
|
[_quota_to_series_triplet(*q) for q in quotas[i:i + _BATCH_SIZE]], [])
|
|
_add_series(monitoring_project, series, client)
|
|
i += _BATCH_SIZE
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main_cli()
|