Add optional BQ table as a file export config

This commit is contained in:
Arseny Chernov 2021-12-16 17:54:38 +08:00
parent 976fabdf86
commit b36688ec78
9 changed files with 199 additions and 3 deletions

View File

@ -0,0 +1,96 @@
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
'''Cloud Function module to export BQ table as JSON.
This module is designed to be plugged in a Cloud Function, attached to Cloud
Scheduler trigger to create a JSON of IP to hostname mappings from BigQuery.
'''
import base64
import datetime
import json
import logging
import os
import warnings
from google.api_core.exceptions import GoogleAPIError
from google.cloud import bigquery
import googleapiclient.discovery
import googleapiclient.errors
def _configure_logging(verbose=True):
'''Basic logging configuration.
Args:
verbose: enable verbose logging
'''
level = logging.DEBUG if verbose else logging.INFO
logging.basicConfig(level=level)
warnings.filterwarnings('ignore', r'.*end user credentials.*', UserWarning)
@click.command()
@click.option('--bucket', required=True, help='GCS bucket for export')
@click.option('--filename', required=True, help='Path and filename with extension to export e.g. folder/export.json .')
@click.option('--format', required=True, help='The exported file format, e.g. NEWLINE_DELIMITED_JSON or CSV.')
@click.option('--bq-dataset', required=True, help='Bigquery dataset where table for export is located.')
@click.option('--bq-table', required=True, help='Bigquery table to export.')
@click.option('--bq-table-overwrite', required=True, help='Overwrite existing BQ table or create new datetime() one.')
@click.option('--verbose', is_flag=True, help='Verbose output')
def main_cli(bucket=None, filename=None, format=None, bq_dataset=None, bq_table=None, verbose=False):
'''Trigger Cloud Asset inventory export from Bigquery to file. Data will be stored in
the dataset specified on a dated table with the name specified.
'''
try:
_main(bucket, filename, format, bq_dataset, bq_table, verbose)
except RuntimeError:
logging.exception('exception raised')
def main(event, context):
'Cloud Function entry point.'
try:
data = json.loads(base64.b64decode(event['data']).decode('utf-8'))
print(data)
_main(**data)
# uncomment once https://issuetracker.google.com/issues/155215191 is fixed
# except RuntimeError:
# raise
except Exception:
logging.exception('exception in cloud function entry point')
def _main(bucket=None, filename=None, format=None, bq_dataset=None, bq_table=None, verbose=False):
'Module entry point used by cli and cloud function wrappers.'
_configure_logging(verbose)
client = bigquery.Client()
destination_uri = 'gs://{}/{}'.format(bucket, filename)
dataset_ref = client.dataset(bq_dataset)
table_ref = dataset_ref.table(bq_table)
job_config = bigquery.job.ExtractJobConfig()
job_config.destination_format = (
"bigquery.DestinationFormat." + format)
extract_job = client.extract_table(
table_ref, destination_uri, job_config=job_config
)
try:
extract_job.result()
except (GoogleAPIError, googleapiclient.errors.HttpError) as e:
logging.debug('API Error: %s', e, exc_info=True)
raise RuntimeError(
'Error exporting BQ table %s as a file' % bq_table, e)

View File

@ -0,0 +1,3 @@
google-api-python-client>=1.10.1
google-cloud-monitoring>=1.1.0
google-cloud-bigquery

View File

@ -66,6 +66,17 @@ module "pubsub" {
# at the project level via roles/cloudscheduler.serviceAgent
}
module "pubsub_file" {
source = "../../modules/pubsub"
project_id = module.project.project_id
name = var.name_cffile
subscriptions = {
"${var.name_cffile}-default" = null
}
# the Cloud Scheduler robot service account already has pubsub.topics.publish
# at the project level via roles/cloudscheduler.serviceAgent
}
###############################################################################
# Cloud Function #
###############################################################################
@ -93,6 +104,29 @@ module "cf" {
}
}
module "cffile" {
source = "../../modules/cloud-function"
project_id = module.project.project_id
region = var.region
name = var.name_cffile
bucket_name = "${var.name_cffile}-${random_pet.random.id}"
bucket_config = {
location = var.region
lifecycle_delete_age = null
}
bundle_config = {
source_dir = "cffile"
output_path = var.bundle_path_cffile
excludes = null
}
service_account = module.service-account.email
trigger_config = {
event = "google.pubsub.topic.publish"
resource = module.pubsub_file.topic.id
retry = null
}
}
resource "random_pet" "random" {
length = 1
}
@ -128,6 +162,27 @@ resource "google_cloud_scheduler_job" "job" {
}
}
resource "google_cloud_scheduler_job" "job_file" {
project = google_app_engine_application.app.project
region = var.region
name = "file-export-job"
description = "File export from BQ Job"
schedule = "* 9 * * 1"
time_zone = "Etc/UTC"
pubsub_target {
attributes = {}
topic_name = module.pubsub_file.topic.id
data = base64encode(jsonencode({
bucket = var.file_config.bucket
filename = var.file_config.filename
format = var.file_config.format
bq_dataset = var.file_config.bq_dataset
bq_table = var.file_config.bq_table
}))
}
}
###############################################################################
# Bigquery #
###############################################################################

View File

@ -26,8 +26,14 @@ variable "bundle_path" {
default = "./bundle.zip"
}
variable "bundle_path_cffile" {
description = "Path used to write the intermediate Cloud Function code bundle."
type = string
default = "./bundle_cffile.zip"
}
variable "cai_config" {
description = "Cloud Asset inventory export config."
description = "Cloud Asset Inventory export config."
type = object({
bq_dataset = string
bq_table = string
@ -36,6 +42,17 @@ variable "cai_config" {
})
}
variable "file_config" {
description = "Optional BQ table as a file export function config."
type = object({
bucket = string
filename = string
format = string
bq_dataset = string
bq_table = string
})
}
variable "location" {
description = "Appe Engine location used in the example."
type = string
@ -49,6 +66,13 @@ variable "name" {
default = "asset-inventory"
}
variable "name_cffile" {
description = "Arbitrary string used to name created resources."
type = string
default = "cffile-exporter"
}
variable "project_create" {
description = "Create project instead ofusing an existing one."
type = bool

View File

@ -18,6 +18,7 @@ module "test" {
source = "../../../../cloud-operations/scheduled-asset-inventory-export-bq"
billing_account = var.billing_account
cai_config = var.cai_config
file_config = var.file_config
project_create = var.project_create
project_id = var.project_id
}

View File

@ -32,6 +32,23 @@ variable "cai_config" {
}
}
variable "file_config" {
type = object({
bucket = string
filename = string
format = string
bq_dataset = string
bq_table = string
})
default = {
bucket = "my-bucket"
filename = "my-folder/myfile.json"
format = "NEWLINE_DELIMITED_JSON"
bq_dataset = "my-dataset"
bq_table = "my_table"
}
}
variable "project_create" {
type = bool

View File

@ -23,5 +23,5 @@ FIXTURES_DIR = os.path.join(os.path.dirname(__file__), 'fixture')
def test_resources(e2e_plan_runner):
"Test that plan works and the numbers of resources is as expected."
modules, resources = e2e_plan_runner(FIXTURES_DIR)
assert len(modules) == 5
assert len(resources) == 23
assert len(modules) == 7
assert len(resources) == 28