Add optional BQ table as a file export config
This commit is contained in:
parent
976fabdf86
commit
b36688ec78
|
@ -0,0 +1,96 @@
|
|||
# Copyright 2021 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
'''Cloud Function module to export BQ table as JSON.
|
||||
|
||||
This module is designed to be plugged in a Cloud Function, attached to Cloud
|
||||
Scheduler trigger to create a JSON of IP to hostname mappings from BigQuery.
|
||||
|
||||
'''
|
||||
|
||||
import base64
|
||||
import datetime
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import warnings
|
||||
|
||||
from google.api_core.exceptions import GoogleAPIError
|
||||
from google.cloud import bigquery
|
||||
|
||||
import googleapiclient.discovery
|
||||
import googleapiclient.errors
|
||||
|
||||
|
||||
def _configure_logging(verbose=True):
|
||||
'''Basic logging configuration.
|
||||
Args:
|
||||
verbose: enable verbose logging
|
||||
'''
|
||||
level = logging.DEBUG if verbose else logging.INFO
|
||||
logging.basicConfig(level=level)
|
||||
warnings.filterwarnings('ignore', r'.*end user credentials.*', UserWarning)
|
||||
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.option('--bucket', required=True, help='GCS bucket for export')
|
||||
@click.option('--filename', required=True, help='Path and filename with extension to export e.g. folder/export.json .')
|
||||
@click.option('--format', required=True, help='The exported file format, e.g. NEWLINE_DELIMITED_JSON or CSV.')
|
||||
@click.option('--bq-dataset', required=True, help='Bigquery dataset where table for export is located.')
|
||||
@click.option('--bq-table', required=True, help='Bigquery table to export.')
|
||||
@click.option('--bq-table-overwrite', required=True, help='Overwrite existing BQ table or create new datetime() one.')
|
||||
@click.option('--verbose', is_flag=True, help='Verbose output')
|
||||
def main_cli(bucket=None, filename=None, format=None, bq_dataset=None, bq_table=None, verbose=False):
|
||||
'''Trigger Cloud Asset inventory export from Bigquery to file. Data will be stored in
|
||||
the dataset specified on a dated table with the name specified.
|
||||
'''
|
||||
try:
|
||||
_main(bucket, filename, format, bq_dataset, bq_table, verbose)
|
||||
except RuntimeError:
|
||||
logging.exception('exception raised')
|
||||
|
||||
def main(event, context):
|
||||
'Cloud Function entry point.'
|
||||
try:
|
||||
data = json.loads(base64.b64decode(event['data']).decode('utf-8'))
|
||||
print(data)
|
||||
_main(**data)
|
||||
# uncomment once https://issuetracker.google.com/issues/155215191 is fixed
|
||||
# except RuntimeError:
|
||||
# raise
|
||||
except Exception:
|
||||
logging.exception('exception in cloud function entry point')
|
||||
|
||||
|
||||
def _main(bucket=None, filename=None, format=None, bq_dataset=None, bq_table=None, verbose=False):
|
||||
'Module entry point used by cli and cloud function wrappers.'
|
||||
|
||||
_configure_logging(verbose)
|
||||
client = bigquery.Client()
|
||||
destination_uri = 'gs://{}/{}'.format(bucket, filename)
|
||||
dataset_ref = client.dataset(bq_dataset)
|
||||
table_ref = dataset_ref.table(bq_table)
|
||||
job_config = bigquery.job.ExtractJobConfig()
|
||||
job_config.destination_format = (
|
||||
"bigquery.DestinationFormat." + format)
|
||||
extract_job = client.extract_table(
|
||||
table_ref, destination_uri, job_config=job_config
|
||||
)
|
||||
try:
|
||||
extract_job.result()
|
||||
except (GoogleAPIError, googleapiclient.errors.HttpError) as e:
|
||||
logging.debug('API Error: %s', e, exc_info=True)
|
||||
raise RuntimeError(
|
||||
'Error exporting BQ table %s as a file' % bq_table, e)
|
|
@ -0,0 +1,3 @@
|
|||
google-api-python-client>=1.10.1
|
||||
google-cloud-monitoring>=1.1.0
|
||||
google-cloud-bigquery
|
|
@ -66,6 +66,17 @@ module "pubsub" {
|
|||
# at the project level via roles/cloudscheduler.serviceAgent
|
||||
}
|
||||
|
||||
module "pubsub_file" {
|
||||
source = "../../modules/pubsub"
|
||||
project_id = module.project.project_id
|
||||
name = var.name_cffile
|
||||
subscriptions = {
|
||||
"${var.name_cffile}-default" = null
|
||||
}
|
||||
# the Cloud Scheduler robot service account already has pubsub.topics.publish
|
||||
# at the project level via roles/cloudscheduler.serviceAgent
|
||||
}
|
||||
|
||||
###############################################################################
|
||||
# Cloud Function #
|
||||
###############################################################################
|
||||
|
@ -93,6 +104,29 @@ module "cf" {
|
|||
}
|
||||
}
|
||||
|
||||
module "cffile" {
|
||||
source = "../../modules/cloud-function"
|
||||
project_id = module.project.project_id
|
||||
region = var.region
|
||||
name = var.name_cffile
|
||||
bucket_name = "${var.name_cffile}-${random_pet.random.id}"
|
||||
bucket_config = {
|
||||
location = var.region
|
||||
lifecycle_delete_age = null
|
||||
}
|
||||
bundle_config = {
|
||||
source_dir = "cffile"
|
||||
output_path = var.bundle_path_cffile
|
||||
excludes = null
|
||||
}
|
||||
service_account = module.service-account.email
|
||||
trigger_config = {
|
||||
event = "google.pubsub.topic.publish"
|
||||
resource = module.pubsub_file.topic.id
|
||||
retry = null
|
||||
}
|
||||
}
|
||||
|
||||
resource "random_pet" "random" {
|
||||
length = 1
|
||||
}
|
||||
|
@ -128,6 +162,27 @@ resource "google_cloud_scheduler_job" "job" {
|
|||
}
|
||||
}
|
||||
|
||||
resource "google_cloud_scheduler_job" "job_file" {
|
||||
project = google_app_engine_application.app.project
|
||||
region = var.region
|
||||
name = "file-export-job"
|
||||
description = "File export from BQ Job"
|
||||
schedule = "* 9 * * 1"
|
||||
time_zone = "Etc/UTC"
|
||||
|
||||
pubsub_target {
|
||||
attributes = {}
|
||||
topic_name = module.pubsub_file.topic.id
|
||||
data = base64encode(jsonencode({
|
||||
bucket = var.file_config.bucket
|
||||
filename = var.file_config.filename
|
||||
format = var.file_config.format
|
||||
bq_dataset = var.file_config.bq_dataset
|
||||
bq_table = var.file_config.bq_table
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
###############################################################################
|
||||
# Bigquery #
|
||||
###############################################################################
|
||||
|
|
|
@ -26,8 +26,14 @@ variable "bundle_path" {
|
|||
default = "./bundle.zip"
|
||||
}
|
||||
|
||||
variable "bundle_path_cffile" {
|
||||
description = "Path used to write the intermediate Cloud Function code bundle."
|
||||
type = string
|
||||
default = "./bundle_cffile.zip"
|
||||
}
|
||||
|
||||
variable "cai_config" {
|
||||
description = "Cloud Asset inventory export config."
|
||||
description = "Cloud Asset Inventory export config."
|
||||
type = object({
|
||||
bq_dataset = string
|
||||
bq_table = string
|
||||
|
@ -36,6 +42,17 @@ variable "cai_config" {
|
|||
})
|
||||
}
|
||||
|
||||
variable "file_config" {
|
||||
description = "Optional BQ table as a file export function config."
|
||||
type = object({
|
||||
bucket = string
|
||||
filename = string
|
||||
format = string
|
||||
bq_dataset = string
|
||||
bq_table = string
|
||||
})
|
||||
}
|
||||
|
||||
variable "location" {
|
||||
description = "Appe Engine location used in the example."
|
||||
type = string
|
||||
|
@ -49,6 +66,13 @@ variable "name" {
|
|||
default = "asset-inventory"
|
||||
}
|
||||
|
||||
|
||||
variable "name_cffile" {
|
||||
description = "Arbitrary string used to name created resources."
|
||||
type = string
|
||||
default = "cffile-exporter"
|
||||
}
|
||||
|
||||
variable "project_create" {
|
||||
description = "Create project instead ofusing an existing one."
|
||||
type = bool
|
||||
|
|
Binary file not shown.
|
@ -18,6 +18,7 @@ module "test" {
|
|||
source = "../../../../cloud-operations/scheduled-asset-inventory-export-bq"
|
||||
billing_account = var.billing_account
|
||||
cai_config = var.cai_config
|
||||
file_config = var.file_config
|
||||
project_create = var.project_create
|
||||
project_id = var.project_id
|
||||
}
|
||||
|
|
|
@ -32,6 +32,23 @@ variable "cai_config" {
|
|||
}
|
||||
}
|
||||
|
||||
variable "file_config" {
|
||||
type = object({
|
||||
bucket = string
|
||||
filename = string
|
||||
format = string
|
||||
bq_dataset = string
|
||||
bq_table = string
|
||||
})
|
||||
default = {
|
||||
bucket = "my-bucket"
|
||||
filename = "my-folder/myfile.json"
|
||||
format = "NEWLINE_DELIMITED_JSON"
|
||||
bq_dataset = "my-dataset"
|
||||
bq_table = "my_table"
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
variable "project_create" {
|
||||
type = bool
|
||||
|
|
|
@ -23,5 +23,5 @@ FIXTURES_DIR = os.path.join(os.path.dirname(__file__), 'fixture')
|
|||
def test_resources(e2e_plan_runner):
|
||||
"Test that plan works and the numbers of resources is as expected."
|
||||
modules, resources = e2e_plan_runner(FIXTURES_DIR)
|
||||
assert len(modules) == 5
|
||||
assert len(resources) == 23
|
||||
assert len(modules) == 7
|
||||
assert len(resources) == 28
|
||||
|
|
Loading…
Reference in New Issue