diff --git a/cloud-operations/scheduled-asset-inventory-export-bq/README.md b/cloud-operations/scheduled-asset-inventory-export-bq/README.md index 4c9d2ae7..f1fabbe8 100644 --- a/cloud-operations/scheduled-asset-inventory-export-bq/README.md +++ b/cloud-operations/scheduled-asset-inventory-export-bq/README.md @@ -36,7 +36,21 @@ Once done testing, you can clean up resources by running `terraform destroy`. To Once resources are created, you can run queries on the data you exported on Bigquery. [Here](https://cloud.google.com/asset-inventory/docs/exporting-to-bigquery#querying_an_asset_snapshot) you can find some example of queries you can run. -You can also create a dashboard connecting [Datalab](https://datastudio.google.com/) or any other BI tools of your choice to your Bigquery datase. +You can also create a dashboard connecting [Datalab](https://datastudio.google.com/) or any other BI tools of your choice to your Bigquery dataset. + +## File exporter for JSON, CSV (optional). + +This is an optional part. + +Regular file-based exports of data from Cloud Asset Inventory may be useful for e.g. scale-out network dependencies discovery tools like [Planet Exporter](https://github.com/williamchanrico/planet-exporter), or to update legacy workloads tracking or configuration management systems. Bigquery supports multiple [export formats](https://cloud.google.com/bigquery/docs/exporting-data#export_formats_and_compression_types) and one may upload objects to Storage Bucket using provided Cloud Function. Specify `job.DestinationFormat` as defined in [documentation](https://googleapis.dev/python/bigquery/latest/generated/google.cloud.bigquery.job.DestinationFormat.html), e.g. `NEWLINE_DELIMITED_JSON`. + +It helps to create custom [scheduled query](https://cloud.google.com/bigquery/docs/scheduling-queries#console) from CAI export tables, and to write out results in to dedicated table (with overwrites). Define such query's output columns to comply with downstream systems' fields requirements, and time query execution after CAI export into BQ for freshness. See [sample queries](https://cloud.google.com/asset-inventory/docs/exporting-to-bigquery-sample-queries). + +This is an optional part, created if `cai_gcs_export` is set to `true`. The high level diagram extends to the following: + + + + @@ -44,12 +58,16 @@ You can also create a dashboard connecting [Datalab](https://datastudio.google.c | name | description | type | required | default | |---|---|:---:|:---:|:---:| -| cai_config | Cloud Asset inventory export config. | object({…}) | ✓ | | +| cai_config | Cloud Asset Inventory export config. | object({…}) | ✓ | | | project_id | Project id that references existing project. | string | ✓ | | | billing_account | Billing account id used as default for new projects. | string | | null | | bundle_path | Path used to write the intermediate Cloud Function code bundle. | string | | "./bundle.zip" | +| bundle_path_cffile | Path used to write the intermediate Cloud Function code bundle. | string | | "./bundle_cffile.zip" | +| cai_gcs_export | Enable optional part to export tables to GCS | bool | | false | +| file_config | Optional BQ table as a file export function config. | object({…}) | | {…} | | location | Appe Engine location used in the example. | string | | "europe-west" | | name | Arbitrary string used to name created resources. | string | | "asset-inventory" | +| name_cffile | Arbitrary string used to name created resources. | string | | "cffile-exporter" | | project_create | Create project instead ofusing an existing one. | bool | | true | | region | Compute region used in the example. | string | | "europe-west1" | | root_node | The resource name of the parent folder or organization for project creation, in 'folders/folder_id' or 'organizations/org_id' format. | string | | null | @@ -63,3 +81,4 @@ You can also create a dashboard connecting [Datalab](https://datastudio.google.c + diff --git a/cloud-operations/scheduled-asset-inventory-export-bq/cf/main.py b/cloud-operations/scheduled-asset-inventory-export-bq/cf/main.py index ad97c326..9f9cfb3f 100755 --- a/cloud-operations/scheduled-asset-inventory-export-bq/cf/main.py +++ b/cloud-operations/scheduled-asset-inventory-export-bq/cf/main.py @@ -50,18 +50,19 @@ def _configure_logging(verbose=True): @click.option('--bq-project', required=True, help='Bigquery project to use.') @click.option('--bq-dataset', required=True, help='Bigquery dataset to use.') @click.option('--bq-table', required=True, help='Bigquery table name to use.') +@click.option('--bq-table-overwrite', required=True, help='Overwrite existing BQ table or create new datetime() one.') @click.option('--target-node', required=True, help='Node in Google Cloud resource hierarchy.') @click.option('--read-time', required=False, help=( 'Day to take an asset snapshot in \'YYYYMMDD\' format, uses current day ' ' as default. Export will run at midnight of the specified day.')) @click.option('--verbose', is_flag=True, help='Verbose output') -def main_cli(project=None, bq_project=None, bq_dataset=None, bq_table=None, target_node=None, +def main_cli(project=None, bq_project=None, bq_dataset=None, bq_table=None, bq_table_overwrite=None, target_node=None, read_time=None, verbose=False): '''Trigger Cloud Asset inventory export to Bigquery. Data will be stored in the dataset specified on a dated table with the name specified. ''' try: - _main(project, bq_project, bq_dataset, bq_table, target_node, read_time, verbose) + _main(project, bq_project, bq_dataset, bq_table, bq_table_overwrite, target_node, read_time, verbose) except RuntimeError: logging.exception('exception raised') @@ -79,19 +80,22 @@ def main(event, context): logging.exception('exception in cloud function entry point') -def _main(project=None, bq_project=None, bq_dataset=None, bq_table=None, target_node=None, read_time=None, verbose=False): +def _main(project=None, bq_project=None, bq_dataset=None, bq_table=None, bq_table_overwrite=None, target_node=None, read_time=None, verbose=False): 'Module entry point used by cli and cloud function wrappers.' _configure_logging(verbose) - if not read_time: - read_time = datetime.datetime.now() - client = asset_v1.AssetServiceClient() - content_type = asset_v1.ContentType.RESOURCE output_config = asset_v1.OutputConfig() + client = asset_v1.AssetServiceClient() + if bq_table_overwrite == False: + read_time = datetime.datetime.now() + output_config.bigquery_destination.table = '%s_%s' % ( + bq_table, read_time.strftime('%Y%m%d')) + else: + output_config.bigquery_destination.table = '%s_latest' % ( + bq_table) + content_type = asset_v1.ContentType.RESOURCE output_config.bigquery_destination.dataset = 'projects/%s/datasets/%s' % ( bq_project, bq_dataset) - output_config.bigquery_destination.table = '%s_%s' % ( - bq_table, read_time.strftime('%Y%m%d')) output_config.bigquery_destination.separate_tables_per_asset_type = True output_config.bigquery_destination.force = True try: diff --git a/cloud-operations/scheduled-asset-inventory-export-bq/cffile/main.py b/cloud-operations/scheduled-asset-inventory-export-bq/cffile/main.py new file mode 100755 index 00000000..cb54b0bc --- /dev/null +++ b/cloud-operations/scheduled-asset-inventory-export-bq/cffile/main.py @@ -0,0 +1,99 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +'''Cloud Function module to export BQ table as JSON. + +This module is designed to be plugged in a Cloud Function, attached to Cloud +Scheduler trigger to create a JSON of IP to hostname mappings from BigQuery. + +''' + +import base64 +import datetime +import json +import logging +import os +import warnings + +from google.api_core.exceptions import GoogleAPIError +from google.cloud import bigquery + +import click + +import googleapiclient.discovery +import googleapiclient.errors + + +def _configure_logging(verbose=True): + '''Basic logging configuration. + Args: + verbose: enable verbose logging + ''' + level = logging.DEBUG if verbose else logging.INFO + logging.basicConfig(level=level) + warnings.filterwarnings('ignore', r'.*end user credentials.*', UserWarning) + +@click.command() +@click.option('--bucket', required=True, help='GCS bucket for export') +@click.option('--filename', required=True, help='Path and filename with extension to export e.g. folder/export.json .') +@click.option('--format', required=True, help='The exported file format, e.g. NEWLINE_DELIMITED_JSON or CSV.') +@click.option('--bq-dataset', required=True, help='Bigquery dataset where table for export is located.') +@click.option('--bq-table', required=True, help='Bigquery table to export.') +@click.option('--verbose', is_flag=True, help='Verbose output') +def main_cli(bucket=None, filename=None, format=None, bq_dataset=None, bq_table=None, verbose=False): + '''Trigger Cloud Asset inventory export from Bigquery to file. Data will be stored in + the dataset specified on a dated table with the name specified. + ''' + try: + _main(bucket, filename, format, bq_dataset, bq_table, verbose) + except RuntimeError: + logging.exception('exception raised') + +def main(event, context): + 'Cloud Function entry point.' + try: + data = json.loads(base64.b64decode(event['data']).decode('utf-8')) + print(data) + _main(**data) + # uncomment once https://issuetracker.google.com/issues/155215191 is fixed + # except RuntimeError: + # raise + except Exception: + logging.exception('exception in cloud function entry point') + + +def _main(bucket=None, filename=None, format=None, bq_dataset=None, bq_table=None, verbose=False): + 'Module entry point used by cli and cloud function wrappers.' + + _configure_logging(verbose) + client = bigquery.Client() + destination_uri = 'gs://{}/{}'.format(bucket, filename) + dataset_ref = client.dataset(bq_dataset) + table_ref = dataset_ref.table(bq_table) + job_config = bigquery.job.ExtractJobConfig() + job_config.destination_format = ( + getattr(bigquery.DestinationFormat, format) ) + extract_job = client.extract_table( + table_ref, destination_uri, job_config=job_config + ) + try: + extract_job.result() + except (GoogleAPIError, googleapiclient.errors.HttpError) as e: + logging.debug('API Error: %s', e, exc_info=True) + raise RuntimeError( + 'Error exporting BQ table %s as a file' % bq_table, e) + + +if __name__ == '__main__': + main_cli() \ No newline at end of file diff --git a/cloud-operations/scheduled-asset-inventory-export-bq/cffile/requirements.txt b/cloud-operations/scheduled-asset-inventory-export-bq/cffile/requirements.txt new file mode 100644 index 00000000..d48ebb54 --- /dev/null +++ b/cloud-operations/scheduled-asset-inventory-export-bq/cffile/requirements.txt @@ -0,0 +1,3 @@ +google-api-python-client>=1.10.1 +google-cloud-monitoring>=1.1.0 +google-cloud-bigquery \ No newline at end of file diff --git a/cloud-operations/scheduled-asset-inventory-export-bq/diagram_optional.png b/cloud-operations/scheduled-asset-inventory-export-bq/diagram_optional.png new file mode 100644 index 00000000..36111c67 Binary files /dev/null and b/cloud-operations/scheduled-asset-inventory-export-bq/diagram_optional.png differ diff --git a/cloud-operations/scheduled-asset-inventory-export-bq/main.tf b/cloud-operations/scheduled-asset-inventory-export-bq/main.tf index 0052401d..39c1e37d 100644 --- a/cloud-operations/scheduled-asset-inventory-export-bq/main.tf +++ b/cloud-operations/scheduled-asset-inventory-export-bq/main.tf @@ -14,6 +14,8 @@ * limitations under the License. */ + + ############################################################################### # Projects # ############################################################################### @@ -47,6 +49,7 @@ module "service-account" { iam_project_roles = { (var.project_id) = [ "roles/cloudasset.owner", + "roles/bigquery.jobUser" ] } } @@ -66,6 +69,17 @@ module "pubsub" { # at the project level via roles/cloudscheduler.serviceAgent } +module "pubsub_file" { + source = "../../modules/pubsub" + project_id = module.project.project_id + name = var.name_cffile + subscriptions = { + "${var.name_cffile}-default" = null + } + # the Cloud Scheduler robot service account already has pubsub.topics.publish + # at the project level via roles/cloudscheduler.serviceAgent +} + ############################################################################### # Cloud Function # ############################################################################### @@ -93,6 +107,30 @@ module "cf" { } } +module "cffile" { + count = var.cai_gcs_export ? 1 : 0 + source = "../../modules/cloud-function" + project_id = module.project.project_id + region = var.region + name = var.name_cffile + bucket_name = "${var.name_cffile}-${random_pet.random.id}" + bucket_config = { + location = var.region + lifecycle_delete_age = null + } + bundle_config = { + source_dir = "cffile" + output_path = var.bundle_path_cffile + excludes = null + } + service_account = module.service-account.email + trigger_config = { + event = "google.pubsub.topic.publish" + resource = module.pubsub_file.topic.id + retry = null + } +} + resource "random_pet" "random" { length = 1 } @@ -118,11 +156,34 @@ resource "google_cloud_scheduler_job" "job" { attributes = {} topic_name = module.pubsub.topic.id data = base64encode(jsonencode({ - project = module.project.project_id - bq_project = module.project.project_id - bq_dataset = var.cai_config.bq_dataset - bq_table = var.cai_config.bq_table - target_node = var.cai_config.target_node + project = module.project.project_id + bq_project = module.project.project_id + bq_dataset = var.cai_config.bq_dataset + bq_table = var.cai_config.bq_table + bq_table_overwrite = var.cai_config.bq_table_overwrite + target_node = var.cai_config.target_node + })) + } +} + +resource "google_cloud_scheduler_job" "job_file" { + count = var.cai_gcs_export ? 1 : 0 + project = google_app_engine_application.app.project + region = var.region + name = "file-export-job" + description = "File export from BQ Job" + schedule = "* 9 * * 1" + time_zone = "Etc/UTC" + + pubsub_target { + attributes = {} + topic_name = module.pubsub_file.topic.id + data = base64encode(jsonencode({ + bucket = var.file_config.bucket + filename = var.file_config.filename + format = var.file_config.format + bq_dataset = var.file_config.bq_dataset + bq_table = var.file_config.bq_table })) } } diff --git a/cloud-operations/scheduled-asset-inventory-export-bq/variables.tf b/cloud-operations/scheduled-asset-inventory-export-bq/variables.tf index 5bb62166..b31291b8 100644 --- a/cloud-operations/scheduled-asset-inventory-export-bq/variables.tf +++ b/cloud-operations/scheduled-asset-inventory-export-bq/variables.tf @@ -26,15 +26,50 @@ variable "bundle_path" { default = "./bundle.zip" } + +variable "bundle_path_cffile" { + description = "Path used to write the intermediate Cloud Function code bundle." + type = string + default = "./bundle_cffile.zip" +} + variable "cai_config" { - description = "Cloud Asset inventory export config." + description = "Cloud Asset Inventory export config." type = object({ - bq_dataset = string - bq_table = string - target_node = string + bq_dataset = string + bq_table = string + bq_table_overwrite = bool + target_node = string }) } + +variable "cai_gcs_export" { + description = "Enable optional part to export tables to GCS" + type = bool + default = false +} + + +variable "file_config" { + description = "Optional BQ table as a file export function config." + type = object({ + bucket = string + filename = string + format = string + bq_dataset = string + bq_table = string + }) + default = { + bucket = null + filename = null + format = null + bq_dataset = null + bq_table = null + } +} + + variable "location" { description = "Appe Engine location used in the example." type = string @@ -48,6 +83,15 @@ variable "name" { default = "asset-inventory" } + + +variable "name_cffile" { + description = "Arbitrary string used to name created resources." + type = string + default = "cffile-exporter" +} + + variable "project_create" { description = "Create project instead ofusing an existing one." type = bool diff --git a/tests/cloud_operations/scheduled_asset_inventory_export_bq/fixture/bundle_cffile.zip b/tests/cloud_operations/scheduled_asset_inventory_export_bq/fixture/bundle_cffile.zip new file mode 100644 index 00000000..454bc1f7 Binary files /dev/null and b/tests/cloud_operations/scheduled_asset_inventory_export_bq/fixture/bundle_cffile.zip differ diff --git a/tests/cloud_operations/scheduled_asset_inventory_export_bq/fixture/cffile/README b/tests/cloud_operations/scheduled_asset_inventory_export_bq/fixture/cffile/README new file mode 100644 index 00000000..e69de29b diff --git a/tests/cloud_operations/scheduled_asset_inventory_export_bq/fixture/main.tf b/tests/cloud_operations/scheduled_asset_inventory_export_bq/fixture/main.tf index b892dadb..f7bd01af 100644 --- a/tests/cloud_operations/scheduled_asset_inventory_export_bq/fixture/main.tf +++ b/tests/cloud_operations/scheduled_asset_inventory_export_bq/fixture/main.tf @@ -18,6 +18,8 @@ module "test" { source = "../../../../cloud-operations/scheduled-asset-inventory-export-bq" billing_account = var.billing_account cai_config = var.cai_config + cai_gcs_export = var.cai_gcs_export + file_config = var.file_config project_create = var.project_create project_id = var.project_id } diff --git a/tests/cloud_operations/scheduled_asset_inventory_export_bq/fixture/variables.tf b/tests/cloud_operations/scheduled_asset_inventory_export_bq/fixture/variables.tf index 1d70f827..d80431e3 100644 --- a/tests/cloud_operations/scheduled_asset_inventory_export_bq/fixture/variables.tf +++ b/tests/cloud_operations/scheduled_asset_inventory_export_bq/fixture/variables.tf @@ -19,17 +19,42 @@ variable "billing_account" { variable "cai_config" { type = object({ - bq_dataset = string - bq_table = string - target_node = string + bq_dataset = string + bq_table = string + bq_table_overwrite = bool + target_node = string }) default = { - bq_dataset = "my-dataset" - bq_table = "my_table" - target_node = "organization/1234567890" + bq_dataset = "my-dataset" + bq_table = "my_table" + bq_table_overwrite = "true" + target_node = "organization/1234567890" } } +variable "cai_gcs_export" { + type = bool + default = true +} + +variable "file_config" { + type = object({ + bucket = string + filename = string + format = string + bq_dataset = string + bq_table = string + }) + default = { + bucket = "my-bucket" + filename = "my-folder/myfile.json" + format = "NEWLINE_DELIMITED_JSON" + bq_dataset = "my-dataset" + bq_table = "my_table" + } +} + + variable "project_create" { type = bool default = true diff --git a/tests/cloud_operations/scheduled_asset_inventory_export_bq/test_plan.py b/tests/cloud_operations/scheduled_asset_inventory_export_bq/test_plan.py index de94c82d..484496a5 100644 --- a/tests/cloud_operations/scheduled_asset_inventory_export_bq/test_plan.py +++ b/tests/cloud_operations/scheduled_asset_inventory_export_bq/test_plan.py @@ -23,5 +23,5 @@ FIXTURES_DIR = os.path.join(os.path.dirname(__file__), 'fixture') def test_resources(e2e_plan_runner): "Test that plan works and the numbers of resources is as expected." modules, resources = e2e_plan_runner(FIXTURES_DIR) - assert len(modules) == 5 - assert len(resources) == 23 + assert len(modules) == 7 + assert len(resources) == 29