Merge branch 'master' into gcs2bq-least-privileges

2021-12-24 11:36:04 +01:00 · 2021-12-24 11:36:04 +01:00 · eaae34b623
parent 91ccbc279a c3115cceb9
commit eaae34b623
12 changed files with 285 additions and 28 deletions
--- a/cloud-operations/scheduled-asset-inventory-export-bq/README.md
+++ b/cloud-operations/scheduled-asset-inventory-export-bq/README.md
@ -36,7 +36,21 @@ Once done testing, you can clean up resources by running `terraform destroy`. To

 Once resources are created, you can run queries on the data you exported on Bigquery. [Here](https://cloud.google.com/asset-inventory/docs/exporting-to-bigquery#querying_an_asset_snapshot) you can find some example of queries you can run.

-You can also create a dashboard connecting [Datalab](https://datastudio.google.com/) or any other BI tools of your choice to your Bigquery datase.
+You can also create a dashboard connecting [Datalab](https://datastudio.google.com/) or any other BI tools of your choice to your Bigquery dataset.
+
+## File exporter for JSON, CSV (optional). 
+
+This is an optional part.
+
+Regular file-based exports of data from Cloud Asset Inventory may be useful for e.g. scale-out network dependencies discovery tools like [Planet Exporter](https://github.com/williamchanrico/planet-exporter), or to update legacy workloads tracking or configuration management systems. Bigquery supports multiple [export formats](https://cloud.google.com/bigquery/docs/exporting-data#export_formats_and_compression_types) and one may upload objects to Storage Bucket using provided Cloud Function. Specify `job.DestinationFormat` as defined in [documentation](https://googleapis.dev/python/bigquery/latest/generated/google.cloud.bigquery.job.DestinationFormat.html), e.g. `NEWLINE_DELIMITED_JSON`.
+
+It helps to create custom [scheduled query](https://cloud.google.com/bigquery/docs/scheduling-queries#console) from CAI export tables, and to write out results in to dedicated table (with overwrites). Define such query's output columns to comply with downstream systems' fields requirements, and time query execution after CAI export into BQ for freshness. See [sample queries](https://cloud.google.com/asset-inventory/docs/exporting-to-bigquery-sample-queries).
+
+This is an optional part, created if `cai_gcs_export` is set to `true`. The high level diagram extends to the following:
+
+<img src="diagram_optional.png" width="640px">
+
+

 <!-- BEGIN TFDOC -->

@ -44,12 +58,16 @@ You can also create a dashboard connecting [Datalab](https://datastudio.google.c

 | name | description | type | required | default |
 |---|---|:---:|:---:|:---:|
-| cai_config | Cloud Asset inventory export config. | <code title="object&#40;&#123;&#10;  bq_dataset  &#61; string&#10;  bq_table    &#61; string&#10;  target_node &#61; string&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | ✓ |  |
+| cai_config | Cloud Asset Inventory export config. | <code title="object&#40;&#123;&#10;  bq_dataset         &#61; string&#10;  bq_table           &#61; string&#10;  bq_table_overwrite &#61; bool&#10;  target_node        &#61; string&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | ✓ |  |
 | project_id | Project id that references existing project. | <code>string</code> | ✓ |  |
 | billing_account | Billing account id used as default for new projects. | <code>string</code> |  | <code>null</code> |
 | bundle_path | Path used to write the intermediate Cloud Function code bundle. | <code>string</code> |  | <code>&#34;.&#47;bundle.zip&#34;</code> |
+| bundle_path_cffile | Path used to write the intermediate Cloud Function code bundle. | <code>string</code> |  | <code>&#34;.&#47;bundle_cffile.zip&#34;</code> |
+| cai_gcs_export | Enable optional part to export tables to GCS | <code>bool</code> |  | <code>false</code> |
+| file_config | Optional BQ table as a file export function config. | <code title="object&#40;&#123;&#10;  bucket     &#61; string&#10;  filename   &#61; string&#10;  format     &#61; string&#10;  bq_dataset &#61; string&#10;  bq_table   &#61; string&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code title="&#123;&#10;  bucket     &#61; null&#10;  filename   &#61; null&#10;  format     &#61; null&#10;  bq_dataset &#61; null&#10;  bq_table   &#61; null&#10;&#125;">&#123;&#8230;&#125;</code> |
 | location | Appe Engine location used in the example. | <code>string</code> |  | <code>&#34;europe-west&#34;</code> |
 | name | Arbitrary string used to name created resources. | <code>string</code> |  | <code>&#34;asset-inventory&#34;</code> |
+| name_cffile | Arbitrary string used to name created resources. | <code>string</code> |  | <code>&#34;cffile-exporter&#34;</code> |
 | project_create | Create project instead ofusing an existing one. | <code>bool</code> |  | <code>true</code> |
 | region | Compute region used in the example. | <code>string</code> |  | <code>&#34;europe-west1&#34;</code> |
 | root_node | The resource name of the parent folder or organization for project creation, in 'folders/folder_id' or 'organizations/org_id' format. | <code>string</code> |  | <code>null</code> |
@ -63,3 +81,4 @@ You can also create a dashboard connecting [Datalab](https://datastudio.google.c


 <!-- END TFDOC -->
+
--- a/cloud-operations/scheduled-asset-inventory-export-bq/cf/main.py
+++ b/cloud-operations/scheduled-asset-inventory-export-bq/cf/main.py
@ -50,18 +50,19 @@ def _configure_logging(verbose=True):
@click.option('--bq-project', required=True, help='Bigquery project to use.')
@click.option('--bq-dataset', required=True, help='Bigquery dataset to use.')
@click.option('--bq-table', required=True, help='Bigquery table name to use.')
+@click.option('--bq-table-overwrite', required=True, help='Overwrite existing BQ table or create new datetime() one.')
@click.option('--target-node', required=True, help='Node in Google Cloud resource hierarchy.')
@click.option('--read-time', required=False, help=(
    'Day to take an asset snapshot in \'YYYYMMDD\' format, uses current day '
    ' as default. Export will run at midnight of the specified day.'))
@click.option('--verbose', is_flag=True, help='Verbose output')
-def main_cli(project=None, bq_project=None, bq_dataset=None, bq_table=None, target_node=None,
+def main_cli(project=None, bq_project=None, bq_dataset=None, bq_table=None, bq_table_overwrite=None, target_node=None,
             read_time=None, verbose=False):
  '''Trigger Cloud Asset inventory export to Bigquery. Data will be stored in
  the dataset specified on a dated table with the name specified.
  '''
  try:
-    _main(project, bq_project, bq_dataset, bq_table, target_node, read_time, verbose)
+    _main(project, bq_project, bq_dataset, bq_table, bq_table_overwrite, target_node, read_time, verbose)
  except RuntimeError:
    logging.exception('exception raised')

@ -79,19 +80,22 @@ def main(event, context):
    logging.exception('exception in cloud function entry point')


-def _main(project=None, bq_project=None, bq_dataset=None, bq_table=None, target_node=None, read_time=None, verbose=False):
+def _main(project=None, bq_project=None, bq_dataset=None, bq_table=None, bq_table_overwrite=None, target_node=None, read_time=None, verbose=False):
  'Module entry point used by cli and cloud function wrappers.'

  _configure_logging(verbose)
-  if not read_time:
-    read_time = datetime.datetime.now()
-  client = asset_v1.AssetServiceClient()
-  content_type = asset_v1.ContentType.RESOURCE
  output_config = asset_v1.OutputConfig()
+  client = asset_v1.AssetServiceClient()
+  if bq_table_overwrite == False:
+    read_time = datetime.datetime.now()
+    output_config.bigquery_destination.table = '%s_%s' % (
+      bq_table, read_time.strftime('%Y%m%d'))
+  else:
+      output_config.bigquery_destination.table = '%s_latest' % (
+      bq_table)
+  content_type = asset_v1.ContentType.RESOURCE
  output_config.bigquery_destination.dataset = 'projects/%s/datasets/%s' % (
      bq_project, bq_dataset)
-  output_config.bigquery_destination.table = '%s_%s' % (
-      bq_table, read_time.strftime('%Y%m%d'))
  output_config.bigquery_destination.separate_tables_per_asset_type = True
  output_config.bigquery_destination.force = True
  try:
--- a/cloud-operations/scheduled-asset-inventory-export-bq/cffile/main.py
+++ b/cloud-operations/scheduled-asset-inventory-export-bq/cffile/main.py
@ -0,0 +1,99 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+'''Cloud Function module to export BQ table as JSON.
+
+This module is designed to be plugged in a Cloud Function, attached to Cloud
+Scheduler trigger to create a JSON of IP to hostname mappings from BigQuery.
+
+'''
+
+import base64
+import datetime
+import json
+import logging
+import os
+import warnings
+
+from google.api_core.exceptions import GoogleAPIError
+from google.cloud import bigquery
+
+import click
+
+import googleapiclient.discovery
+import googleapiclient.errors
+
+
+def _configure_logging(verbose=True):
+  '''Basic logging configuration.
+  Args:
+    verbose: enable verbose logging
+  '''
+  level = logging.DEBUG if verbose else logging.INFO
+  logging.basicConfig(level=level)
+  warnings.filterwarnings('ignore', r'.*end user credentials.*', UserWarning)
+
+@click.command()
+@click.option('--bucket', required=True, help='GCS bucket for export')
+@click.option('--filename', required=True, help='Path and filename with extension to export e.g. folder/export.json .')
+@click.option('--format', required=True, help='The exported file format, e.g. NEWLINE_DELIMITED_JSON or CSV.')
+@click.option('--bq-dataset', required=True, help='Bigquery dataset where table for export is located.')
+@click.option('--bq-table', required=True, help='Bigquery table to export.')
+@click.option('--verbose', is_flag=True, help='Verbose output')
+def main_cli(bucket=None, filename=None, format=None, bq_dataset=None, bq_table=None, verbose=False):
+  '''Trigger Cloud Asset inventory export from Bigquery to file. Data will be stored in
+  the dataset specified on a dated table with the name specified.
+  '''
+  try:
+    _main(bucket, filename, format, bq_dataset, bq_table, verbose)
+  except RuntimeError:
+    logging.exception('exception raised')
+
+def main(event, context):
+  'Cloud Function entry point.'
+  try:
+    data = json.loads(base64.b64decode(event['data']).decode('utf-8'))
+    print(data)
+    _main(**data)
+  # uncomment once https://issuetracker.google.com/issues/155215191 is fixed
+  # except RuntimeError:
+  #  raise
+  except Exception:
+    logging.exception('exception in cloud function entry point')
+
+
+def _main(bucket=None, filename=None, format=None, bq_dataset=None, bq_table=None, verbose=False):
+  'Module entry point used by cli and cloud function wrappers.'
+
+  _configure_logging(verbose)
+  client = bigquery.Client()
+  destination_uri = 'gs://{}/{}'.format(bucket, filename)
+  dataset_ref = client.dataset(bq_dataset)
+  table_ref = dataset_ref.table(bq_table)
+  job_config = bigquery.job.ExtractJobConfig()
+  job_config.destination_format = (
+    getattr(bigquery.DestinationFormat, format) )
+  extract_job = client.extract_table(
+    table_ref, destination_uri, job_config=job_config
+    )
+  try:
+    extract_job.result()
+  except (GoogleAPIError, googleapiclient.errors.HttpError) as e:
+    logging.debug('API Error: %s', e, exc_info=True)
+    raise RuntimeError(
+        'Error exporting BQ table %s as a file' % bq_table, e)
+
+
+if __name__ == '__main__':
+  main_cli()
--- a/cloud-operations/scheduled-asset-inventory-export-bq/cffile/requirements.txt
+++ b/cloud-operations/scheduled-asset-inventory-export-bq/cffile/requirements.txt
@ -0,0 +1,3 @@
+google-api-python-client>=1.10.1
+google-cloud-monitoring>=1.1.0
+google-cloud-bigquery
--- a/cloud-operations/scheduled-asset-inventory-export-bq/diagram_optional.png
+++ b/cloud-operations/scheduled-asset-inventory-export-bq/diagram_optional.png
--- a/cloud-operations/scheduled-asset-inventory-export-bq/main.tf
+++ b/cloud-operations/scheduled-asset-inventory-export-bq/main.tf
@ -14,6 +14,8 @@
 * limitations under the License.
 */

+
+
 ###############################################################################
 #                                Projects                                     #
 ###############################################################################
@ -47,6 +49,7 @@ module "service-account" {
  iam_project_roles = {
    (var.project_id) = [
      "roles/cloudasset.owner",
+      "roles/bigquery.jobUser"
    ]
  }
 }
@ -66,6 +69,17 @@ module "pubsub" {
  # at the project level via roles/cloudscheduler.serviceAgent
 }

+module "pubsub_file" {
+  source     = "../../modules/pubsub"
+  project_id = module.project.project_id
+  name       = var.name_cffile
+  subscriptions = {
+    "${var.name_cffile}-default" = null
+  }
+  # the Cloud Scheduler robot service account already has pubsub.topics.publish
+  # at the project level via roles/cloudscheduler.serviceAgent
+}
+
 ###############################################################################
 #                             Cloud Function                                  #
 ###############################################################################
@ -93,6 +107,30 @@ module "cf" {
  }
 }

+module "cffile" {
+  count       = var.cai_gcs_export ? 1 : 0
+  source      = "../../modules/cloud-function"
+  project_id  = module.project.project_id
+  region      = var.region
+  name        = var.name_cffile
+  bucket_name = "${var.name_cffile}-${random_pet.random.id}"
+  bucket_config = {
+    location             = var.region
+    lifecycle_delete_age = null
+  }
+  bundle_config = {
+    source_dir  = "cffile"
+    output_path = var.bundle_path_cffile
+    excludes    = null
+  }
+  service_account = module.service-account.email
+  trigger_config = {
+    event    = "google.pubsub.topic.publish"
+    resource = module.pubsub_file.topic.id
+    retry    = null
+  }
+}
+
 resource "random_pet" "random" {
  length = 1
 }
@ -118,11 +156,34 @@ resource "google_cloud_scheduler_job" "job" {
    attributes = {}
    topic_name = module.pubsub.topic.id
    data = base64encode(jsonencode({
-      project     = module.project.project_id
-      bq_project  = module.project.project_id
-      bq_dataset  = var.cai_config.bq_dataset
-      bq_table    = var.cai_config.bq_table
-      target_node = var.cai_config.target_node
+      project            = module.project.project_id
+      bq_project         = module.project.project_id
+      bq_dataset         = var.cai_config.bq_dataset
+      bq_table           = var.cai_config.bq_table
+      bq_table_overwrite = var.cai_config.bq_table_overwrite
+      target_node        = var.cai_config.target_node
+    }))
+  }
+}
+
+resource "google_cloud_scheduler_job" "job_file" {
+  count       = var.cai_gcs_export ? 1 : 0
+  project     = google_app_engine_application.app.project
+  region      = var.region
+  name        = "file-export-job"
+  description = "File export from BQ Job"
+  schedule    = "* 9 * * 1"
+  time_zone   = "Etc/UTC"
+
+  pubsub_target {
+    attributes = {}
+    topic_name = module.pubsub_file.topic.id
+    data = base64encode(jsonencode({
+      bucket     = var.file_config.bucket
+      filename   = var.file_config.filename
+      format     = var.file_config.format
+      bq_dataset = var.file_config.bq_dataset
+      bq_table   = var.file_config.bq_table
    }))
  }
 }
--- a/cloud-operations/scheduled-asset-inventory-export-bq/variables.tf
+++ b/cloud-operations/scheduled-asset-inventory-export-bq/variables.tf
@ -26,15 +26,50 @@ variable "bundle_path" {
  default     = "./bundle.zip"
 }

+
+variable "bundle_path_cffile" {
+  description = "Path used to write the intermediate Cloud Function code bundle."
+  type        = string
+  default     = "./bundle_cffile.zip"
+}
+
 variable "cai_config" {
-  description = "Cloud Asset inventory export config."
+  description = "Cloud Asset Inventory export config."
  type = object({
-    bq_dataset  = string
-    bq_table    = string
-    target_node = string
+    bq_dataset         = string
+    bq_table           = string
+    bq_table_overwrite = bool
+    target_node        = string
  })
 }

+
+variable "cai_gcs_export" {
+  description = "Enable optional part to export tables to GCS"
+  type        = bool
+  default     = false
+}
+
+
+variable "file_config" {
+  description = "Optional BQ table as a file export function config."
+  type = object({
+    bucket     = string
+    filename   = string
+    format     = string
+    bq_dataset = string
+    bq_table   = string
+  })
+  default = {
+    bucket     = null
+    filename   = null
+    format     = null
+    bq_dataset = null
+    bq_table   = null
+  }
+}
+
+
 variable "location" {
  description = "Appe Engine location used in the example."
  type        = string
@ -48,6 +83,15 @@ variable "name" {
  default     = "asset-inventory"
 }

+
+
+variable "name_cffile" {
+  description = "Arbitrary string used to name created resources."
+  type        = string
+  default     = "cffile-exporter"
+}
+
+
 variable "project_create" {
  description = "Create project instead ofusing an existing one."
  type        = bool
--- a/tests/cloud_operations/scheduled_asset_inventory_export_bq/fixture/bundle_cffile.zip
+++ b/tests/cloud_operations/scheduled_asset_inventory_export_bq/fixture/bundle_cffile.zip
--- a/tests/cloud_operations/scheduled_asset_inventory_export_bq/fixture/cffile/README
+++ b/tests/cloud_operations/scheduled_asset_inventory_export_bq/fixture/cffile/README
--- a/tests/cloud_operations/scheduled_asset_inventory_export_bq/fixture/main.tf
+++ b/tests/cloud_operations/scheduled_asset_inventory_export_bq/fixture/main.tf
@ -18,6 +18,8 @@ module "test" {
  source          = "../../../../cloud-operations/scheduled-asset-inventory-export-bq"
  billing_account = var.billing_account
  cai_config      = var.cai_config
+  cai_gcs_export  = var.cai_gcs_export
+  file_config     = var.file_config
  project_create  = var.project_create
  project_id      = var.project_id
 }
--- a/tests/cloud_operations/scheduled_asset_inventory_export_bq/fixture/variables.tf
+++ b/tests/cloud_operations/scheduled_asset_inventory_export_bq/fixture/variables.tf
@ -19,17 +19,42 @@ variable "billing_account" {

 variable "cai_config" {
  type = object({
-    bq_dataset  = string
-    bq_table    = string
-    target_node = string
+    bq_dataset         = string
+    bq_table           = string
+    bq_table_overwrite = bool
+    target_node        = string
  })
  default = {
-    bq_dataset  = "my-dataset"
-    bq_table    = "my_table"
-    target_node = "organization/1234567890"
+    bq_dataset         = "my-dataset"
+    bq_table           = "my_table"
+    bq_table_overwrite = "true"
+    target_node        = "organization/1234567890"
  }
 }

+variable "cai_gcs_export" {
+  type    = bool
+  default = true
+}
+
+variable "file_config" {
+  type = object({
+    bucket     = string
+    filename   = string
+    format     = string
+    bq_dataset = string
+    bq_table   = string
+  })
+  default = {
+    bucket     = "my-bucket"
+    filename   = "my-folder/myfile.json"
+    format     = "NEWLINE_DELIMITED_JSON"
+    bq_dataset = "my-dataset"
+    bq_table   = "my_table"
+  }
+}
+
+
 variable "project_create" {
  type    = bool
  default = true
--- a/tests/cloud_operations/scheduled_asset_inventory_export_bq/test_plan.py
+++ b/tests/cloud_operations/scheduled_asset_inventory_export_bq/test_plan.py
@ -23,5 +23,5 @@ FIXTURES_DIR = os.path.join(os.path.dirname(__file__), 'fixture')
 def test_resources(e2e_plan_runner):
  "Test that plan works and the numbers of resources is as expected."
  modules, resources = e2e_plan_runner(FIXTURES_DIR)
-  assert len(modules) == 5
-  assert len(resources) == 23
+  assert len(modules) == 7
+  assert len(resources) == 29