diff --git a/blueprints/data-solutions/data-platform-minimal/01-landing.tf b/blueprints/data-solutions/data-platform-minimal/01-landing.tf
index 48eb9969..10eb5974 100644
--- a/blueprints/data-solutions/data-platform-minimal/01-landing.tf
+++ b/blueprints/data-solutions/data-platform-minimal/01-landing.tf
@@ -16,7 +16,7 @@
locals {
iam_lnd = {
- "roles/storage.objectCreator" = [module.land-sa-cs-0.iam_email]
+ "roles/storage.objectCreator" = [module.land-sa-0.iam_email]
"roles/storage.objectViewer" = [module.processing-sa-cmp-0.iam_email]
"roles/storage.objectAdmin" = [module.processing-sa-0.iam_email]
}
@@ -36,6 +36,9 @@ module "land-project" {
iam = var.project_config.billing_account_id != null ? local.iam_lnd : null
iam_additive = var.project_config.billing_account_id == null ? local.iam_lnd : null
services = [
+ "bigquery.googleapis.com",
+ "bigqueryreservation.googleapis.com",
+ "bigquerystorage.googleapis.com",
"cloudkms.googleapis.com",
"cloudresourcemanager.googleapis.com",
"iam.googleapis.com",
@@ -52,12 +55,12 @@ module "land-project" {
# Cloud Storage
-module "land-sa-cs-0" {
+module "land-sa-0" {
source = "../../../modules/iam-service-account"
project_id = module.land-project.project_id
prefix = var.prefix
- name = "lnd-cs-0"
- display_name = "Data platform GCS landing service account."
+ name = "lnd-sa-0"
+ display_name = "Data platform landing zone service account."
iam = {
"roles/iam.serviceAccountTokenCreator" = [
local.groups_iam.data-engineers
@@ -75,3 +78,11 @@ module "land-cs-0" {
encryption_key = var.service_encryption_keys.storage
force_destroy = var.data_force_destroy
}
+
+module "land-bq-0" {
+ source = "../../../modules/bigquery-dataset"
+ project_id = module.land-project.project_id
+ id = "${replace(var.prefix, "-", "_")}_lnd_bq_0"
+ location = var.location
+ encryption_key = var.service_encryption_keys.bq
+}
diff --git a/blueprints/data-solutions/data-platform-minimal/02-processing.tf b/blueprints/data-solutions/data-platform-minimal/02-processing.tf
index 17835dce..53da3fa6 100644
--- a/blueprints/data-solutions/data-platform-minimal/02-processing.tf
+++ b/blueprints/data-solutions/data-platform-minimal/02-processing.tf
@@ -49,7 +49,7 @@ locals {
}
processing_subnet = (
local.use_shared_vpc
- ? var.network_config.subnet_self_links.processing_transformation
+ ? var.network_config.subnet_self_link
: module.processing-vpc.0.subnet_self_links["${var.region}/${var.prefix}-processing"]
)
processing_vpc = (
@@ -57,8 +57,6 @@ locals {
? var.network_config.network_self_link
: module.processing-vpc.0.self_link
)
-
-
}
module "processing-project" {
diff --git a/blueprints/data-solutions/data-platform-minimal/03-curated.tf b/blueprints/data-solutions/data-platform-minimal/03-curated.tf
index 730e8d6c..4b72f761 100644
--- a/blueprints/data-solutions/data-platform-minimal/03-curated.tf
+++ b/blueprints/data-solutions/data-platform-minimal/03-curated.tf
@@ -18,33 +18,41 @@ locals {
cur_iam = {
"roles/bigquery.dataOwner" = [module.processing-sa-0.iam_email]
"roles/bigquery.dataViewer" = [
+ module.cur-sa-0.iam_email,
local.groups_iam.data-analysts,
local.groups_iam.data-engineers
]
"roles/bigquery.jobUser" = [
- module.processing-sa-0.iam_email,
+ module.processing-sa-0.iam_email, # Remove once bug is fixed. https://github.com/apache/airflow/issues/32106
+ module.cur-sa-0.iam_email,
local.groups_iam.data-analysts,
local.groups_iam.data-engineers
]
"roles/datacatalog.tagTemplateViewer" = [
- local.groups_iam.data-analysts, local.groups_iam.data-engineers
+ module.cur-sa-0.iam_email,
+ local.groups_iam.data-analysts,
+ local.groups_iam.data-engineers
]
"roles/datacatalog.viewer" = [
- local.groups_iam.data-analysts, local.groups_iam.data-engineers
+ module.cur-sa-0.iam_email,
+ local.groups_iam.data-analysts,
+ local.groups_iam.data-engineers
]
"roles/storage.objectViewer" = [
- local.groups_iam.data-analysts, local.groups_iam.data-engineers
+ module.cur-sa-0.iam_email,
+ local.groups_iam.data-analysts,
+ local.groups_iam.data-engineers
]
"roles/storage.objectAdmin" = [module.processing-sa-0.iam_email]
}
cur_services = [
- "iam.googleapis.com",
"bigquery.googleapis.com",
"bigqueryreservation.googleapis.com",
"bigquerystorage.googleapis.com",
"cloudkms.googleapis.com",
"cloudresourcemanager.googleapis.com",
"compute.googleapis.com",
+ "iam.googleapis.com",
"servicenetworking.googleapis.com",
"serviceusage.googleapis.com",
"stackdriver.googleapis.com",
@@ -75,6 +83,19 @@ module "cur-project" {
}
}
+module "cur-sa-0" {
+ source = "../../../modules/iam-service-account"
+ project_id = module.cur-project.project_id
+ prefix = var.prefix
+ name = "cur-sa-0"
+ display_name = "Data platform curated zone service account."
+ iam = {
+ "roles/iam.serviceAccountTokenCreator" = [
+ local.groups_iam.data-engineers
+ ]
+ }
+}
+
# Bigquery
module "cur-bq-0" {
diff --git a/blueprints/data-solutions/data-platform-minimal/IAM.md b/blueprints/data-solutions/data-platform-minimal/IAM.md
index 54bde92d..d5c1ccbb 100644
--- a/blueprints/data-solutions/data-platform-minimal/IAM.md
+++ b/blueprints/data-solutions/data-platform-minimal/IAM.md
@@ -9,7 +9,7 @@ Legend: +
additive, •
conditional.
|gcp-data-analysts
group|[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) |
|gcp-data-engineers
group|[roles/dlp.estimatesAdmin](https://cloud.google.com/iam/docs/understanding-roles#dlp.estimatesAdmin)
[roles/dlp.reader](https://cloud.google.com/iam/docs/understanding-roles#dlp.reader)
[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) |
|gcp-data-security
group|[roles/datacatalog.admin](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.admin)
[roles/dlp.admin](https://cloud.google.com/iam/docs/understanding-roles#dlp.admin) |
-|prc-dp-0
serviceAccount|[roles/datacatalog.categoryFineGrainedReader](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryFineGrainedReader)
[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer)
[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) |
+|prc-0
serviceAccount|[roles/datacatalog.categoryFineGrainedReader](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryFineGrainedReader)
[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer)
[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) |
## Project cur
@@ -18,15 +18,16 @@ Legend: +
additive, •
conditional.
|gcp-data-analysts
group|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer)
[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|gcp-data-engineers
group|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer)
[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|SERVICE_IDENTITY_service-networking
serviceAccount|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) +
|
-|prc-dp-0
serviceAccount|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
+|cur-sa-0
serviceAccount|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer)
[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
+|prc-0
serviceAccount|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
## Project lnd
| members | roles |
|---|---|
-|lnd-cs-0
serviceAccount|[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator) |
+|lnd-sa-0
serviceAccount|[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator) |
+|prc-0
serviceAccount|[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|prc-cmp-0
serviceAccount|[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
-|prc-dp-0
serviceAccount|[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
## Project prc
@@ -35,5 +36,5 @@ Legend: +
additive, •
conditional.
|gcp-data-engineers
group|[roles/composer.admin](https://cloud.google.com/iam/docs/understanding-roles#composer.admin)
[roles/composer.environmentAndStorageObjectAdmin](https://cloud.google.com/iam/docs/understanding-roles#composer.environmentAndStorageObjectAdmin)
[roles/iam.serviceAccountUser](https://cloud.google.com/iam/docs/understanding-roles#iam.serviceAccountUser)
[roles/iap.httpsResourceAccessor](https://cloud.google.com/iam/docs/understanding-roles#iap.httpsResourceAccessor)
[roles/serviceusage.serviceUsageConsumer](https://cloud.google.com/iam/docs/understanding-roles#serviceusage.serviceUsageConsumer)
[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) |
|SERVICE_IDENTITY_cloudcomposer-accounts
serviceAccount|[roles/composer.ServiceAgentV2Ext](https://cloud.google.com/iam/docs/understanding-roles#composer.ServiceAgentV2Ext)
[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) |
|SERVICE_IDENTITY_service-networking
serviceAccount|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) +
|
-|prc-cmp-0
serviceAccount|[roles/composer.worker](https://cloud.google.com/iam/docs/understanding-roles#composer.worker)
[roles/dataproc.editor](https://cloud.google.com/iam/docs/understanding-roles#dataproc.editor)
[roles/iam.serviceAccountUser](https://cloud.google.com/iam/docs/understanding-roles#iam.serviceAccountUser)
[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) |
-|prc-dp-0
serviceAccount|[roles/dataproc.worker](https://cloud.google.com/iam/docs/understanding-roles#dataproc.worker) |
+|prc-0
serviceAccount|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/dataflow.worker](https://cloud.google.com/iam/docs/understanding-roles#dataflow.worker)
[roles/dataproc.worker](https://cloud.google.com/iam/docs/understanding-roles#dataproc.worker) |
+|prc-cmp-0
serviceAccount|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/composer.worker](https://cloud.google.com/iam/docs/understanding-roles#composer.worker)
[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin)
[roles/dataproc.editor](https://cloud.google.com/iam/docs/understanding-roles#dataproc.editor)
[roles/iam.serviceAccountUser](https://cloud.google.com/iam/docs/understanding-roles#iam.serviceAccountUser)
[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) |
diff --git a/blueprints/data-solutions/data-platform-minimal/README.md b/blueprints/data-solutions/data-platform-minimal/README.md
index cae07a54..e459c37f 100644
--- a/blueprints/data-solutions/data-platform-minimal/README.md
+++ b/blueprints/data-solutions/data-platform-minimal/README.md
@@ -203,7 +203,7 @@ module "data-platform" {
prefix = "myprefix"
}
-# tftest modules=21 resources=116
+# tftest modules=23 resources=123
```
## Customizations
@@ -229,10 +229,7 @@ To configure the use of a shared VPC, configure the `network_config`, example:
network_config = {
host_project = "PROJECT_ID"
network_self_link = "https://www.googleapis.com/compute/v1/projects/PROJECT_ID/global/networks/NAME"
- subnet_self_links = {
- processing_transformation = "https://www.googleapis.com/compute/v1/projects/PROJECT_ID/regions/REGION/subnetworks/NAME"
- processing_composer = "https://www.googleapis.com/compute/v1/projects/PROJECT_ID/regions/REGION/subnetworks/NAME"
- }
+ subnet_self_link = "https://www.googleapis.com/compute/v1/projects/PROJECT_ID/regions/REGION/subnetworks/NAME"
composer_ip_ranges = {
cloudsql = "192.168.XXX.XXX/24"
gke_master = "192.168.XXX.XXX/28"
@@ -280,32 +277,31 @@ The application layer is out of scope of this script. As a demo purpuse only, on
| name | description | type | required | default |
|---|---|:---:|:---:|:---:|
-| [organization_domain](variables.tf#L122) | Organization domain. | string
| ✓ | |
-| [prefix](variables.tf#L127) | Prefix used for resource names. | string
| ✓ | |
-| [project_config](variables.tf#L136) | Provide 'billing_account_id' value if project creation is needed, uses existing 'project_ids' if null. Parent is in 'folders/nnn' or 'organizations/nnn' format. | object({…})
| ✓ | |
+| [organization_domain](variables.tf#L119) | Organization domain. | string
| ✓ | |
+| [prefix](variables.tf#L124) | Prefix used for resource names. | string
| ✓ | |
+| [project_config](variables.tf#L133) | Provide 'billing_account_id' value if project creation is needed, uses existing 'project_ids' if null. Parent is in 'folders/nnn' or 'organizations/nnn' format. | object({…})
| ✓ | |
| [composer_config](variables.tf#L17) | Cloud Composer config. | object({…})
| | {}
|
| [data_catalog_tags](variables.tf#L54) | List of Data Catalog Policy tags to be created with optional IAM binging configuration in {tag => {ROLE => [MEMBERS]}} format. | map(map(list(string)))
| | {…}
|
| [data_force_destroy](variables.tf#L65) | Flag to set 'force_destroy' on data services like BiguQery or Cloud Storage. | bool
| | false
|
| [enable_services](variables.tf#L71) | Flag to enable or disable services in the Data Platform. | object({…})
| | {}
|
| [groups](variables.tf#L80) | User groups. | map(string)
| | {…}
|
| [location](variables.tf#L90) | Location used for multi-regional resources. | string
| | "eu"
|
-| [network_config](variables.tf#L96) | Shared VPC network configurations to use. If null networks will be created in projects. | object({…})
| | {}
|
-| [project_suffix](variables.tf#L160) | Suffix used only for project ids. | string
| | null
|
-| [region](variables.tf#L166) | Region used for regional resources. | string
| | "europe-west1"
|
-| [service_encryption_keys](variables.tf#L172) | Cloud KMS to use to encrypt different services. Key location should match service region. | object({…})
| | {}
|
+| [network_config](variables.tf#L96) | Shared VPC network configurations to use. If null networks will be created in projects. | object({…})
| | {}
|
+| [project_suffix](variables.tf#L157) | Suffix used only for project ids. | string
| | null
|
+| [region](variables.tf#L163) | Region used for regional resources. | string
| | "europe-west1"
|
+| [service_encryption_keys](variables.tf#L169) | Cloud KMS to use to encrypt different services. Key location should match service region. | object({…})
| | {}
|
## Outputs
| name | description | sensitive |
|---|---|:---:|
| [bigquery-datasets](outputs.tf#L17) | BigQuery datasets. | |
-| [composer](outputs.tf#L24) | Composer variables. | |
-| [dataproc-history-server](outputs.tf#L31) | List of bucket names which have been assigned to the cluster. | |
-| [gcs_buckets](outputs.tf#L36) | GCS buckets. | |
-| [kms_keys](outputs.tf#L46) | Cloud MKS keys. | |
-| [projects](outputs.tf#L51) | GCP Projects informations. | |
-| [service_accounts](outputs.tf#L69) | Service account created. | |
-| [vpc_network](outputs.tf#L78) | VPC network. | |
-| [vpc_subnet](outputs.tf#L86) | VPC subnetworks. | |
+| [composer](outputs.tf#L25) | Composer variables. | |
+| [dataproc-history-server](outputs.tf#L33) | List of bucket names which have been assigned to the cluster. | |
+| [gcs_buckets](outputs.tf#L38) | GCS buckets. | |
+| [kms_keys](outputs.tf#L47) | Cloud MKS keys. | |
+| [network](outputs.tf#L52) | VPC network. | |
+| [projects](outputs.tf#L60) | GCP Projects informations. | |
+| [service_accounts](outputs.tf#L78) | Service account created. | |
diff --git a/blueprints/data-solutions/data-platform-minimal/demo/README.md b/blueprints/data-solutions/data-platform-minimal/demo/README.md
index 910fccf5..b9a24b82 100644
--- a/blueprints/data-solutions/data-platform-minimal/demo/README.md
+++ b/blueprints/data-solutions/data-platform-minimal/demo/README.md
@@ -47,7 +47,7 @@ Below you can find computed commands to perform steps.
```bash
terraform output -json | jq -r '@sh "export LND_SA=\(.service_accounts.value.landing)\nexport PRC_SA=\(.service_accounts.value.processing)\nexport CMP_SA=\(.service_accounts.value.composer)"' > env.sh
-terraform output -json | jq -r '@sh "export LND_GCS=\(.gcs_buckets.value.landing_cs_0)\nexport PRC_GCS=\(.gcs_buckets.value.processing_cs_0)\nexport CMP_GCS=\(.gcs_buckets.value.composer)"' >> env.sh
+terraform output -json | jq -r '@sh "export LND_GCS=\(.gcs_buckets.value.landing)\nexport PRC_GCS=\(.gcs_buckets.value.processing)\nexport CUR_GCS=\(.gcs_buckets.value.curated)\nexport CMP_GCS=\(.composer.value.dag_bucket)"' >> env.sh
source ./env.sh
diff --git a/blueprints/data-solutions/data-platform-minimal/demo/dag_bq_gcs2bq.py b/blueprints/data-solutions/data-platform-minimal/demo/dag_bq_gcs2bq.py
new file mode 100644
index 00000000..7abf3691
--- /dev/null
+++ b/blueprints/data-solutions/data-platform-minimal/demo/dag_bq_gcs2bq.py
@@ -0,0 +1,104 @@
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# --------------------------------------------------------------------------------
+# Load The Dependencies
+# --------------------------------------------------------------------------------
+
+import csv
+import datetime
+import io
+import json
+import logging
+import os
+
+from airflow import models
+from airflow.operators import dummy
+from airflow.providers.google.cloud.transfers.gcs_to_bigquery import GCSToBigQueryOperator
+
+# --------------------------------------------------------------------------------
+# Set variables - Needed for the DEMO
+# --------------------------------------------------------------------------------
+BQ_LOCATION = os.environ.get("BQ_LOCATION")
+CURATED_PRJ = os.environ.get("CURATED_PRJ")
+CURATED_BQ_DATASET = os.environ.get("CURATED_BQ_DATASET")
+CURATED_GCS = os.environ.get("CURATED_GCS")
+LAND_PRJ = os.environ.get("LAND_PRJ")
+LAND_GCS = os.environ.get("LAND_GCS")
+PROCESSING_GCS = os.environ.get("PROCESSING_GCS")
+PROCESSING_SA = os.environ.get("PROCESSING_SA")
+PROCESSING_PRJ = os.environ.get("PROCESSING_PRJ")
+PROCESSING_SUBNET = os.environ.get("PROCESSING_SUBNET")
+PROCESSING_VPC = os.environ.get("PROCESSING_VPC")
+DP_KMS_KEY = os.environ.get("DP_KMS_KEY", "")
+DP_REGION = os.environ.get("DP_REGION")
+DP_ZONE = os.environ.get("DP_REGION") + "-b"
+
+# --------------------------------------------------------------------------------
+# Set default arguments
+# --------------------------------------------------------------------------------
+
+# If you are running Airflow in more than one time zone
+# see https://airflow.apache.org/docs/apache-airflow/stable/timezone.html
+# for best practices
+yesterday = datetime.datetime.now() - datetime.timedelta(days=1)
+
+default_args = {
+ 'owner': 'airflow',
+ 'start_date': yesterday,
+ 'depends_on_past': False,
+ 'email': [''],
+ 'email_on_failure': False,
+ 'email_on_retry': False,
+ 'retries': 1,
+ 'retry_delay': datetime.timedelta(minutes=5),
+}
+
+# --------------------------------------------------------------------------------
+# Main DAG
+# --------------------------------------------------------------------------------
+
+with models.DAG(
+ 'bq_gcs2bq',
+ default_args=default_args,
+ schedule_interval=None) as dag:
+ start = dummy.DummyOperator(
+ task_id='start',
+ trigger_rule='all_success'
+ )
+
+ end = dummy.DummyOperator(
+ task_id='end',
+ trigger_rule='all_success'
+ )
+
+ # Bigquery Tables automatically created for demo porpuse.
+ # Consider a dedicated pipeline or tool for a real life scenario.
+
+ customers_import = GCSToBigQueryOperator(
+ task_id='csv_to_bigquery',
+ bucket=LAND_GCS[5:],
+ source_objects=['customers.csv'],
+ destination_project_dataset_table='{}:{}.{}'.format(CURATED_PRJ, CURATED_BQ_DATASET, 'customers'),
+ create_disposition='CREATE_IF_NEEDED',
+ write_disposition='WRITE_APPEND',
+ schema_update_options=['ALLOW_FIELD_RELAXATION', 'ALLOW_FIELD_ADDITION'],
+ schema_object="customers.json",
+ schema_object_bucket=PROCESSING_GCS[5:],
+ project_id=PROCESSING_PRJ, # The process will continue to run on the dataset project until the Apache Airflow bug is fixed. https://github.com/apache/airflow/issues/32106
+ impersonation_chain=[PROCESSING_SA]
+ )
+
+ start >> customers_import >> end
+
\ No newline at end of file
diff --git a/blueprints/data-solutions/data-platform-minimal/images/diagram.png b/blueprints/data-solutions/data-platform-minimal/images/diagram.png
index 7f992cbc..72eed2df 100644
Binary files a/blueprints/data-solutions/data-platform-minimal/images/diagram.png and b/blueprints/data-solutions/data-platform-minimal/images/diagram.png differ
diff --git a/blueprints/data-solutions/data-platform-minimal/outputs.tf b/blueprints/data-solutions/data-platform-minimal/outputs.tf
index 73bcf0b1..5b975443 100644
--- a/blueprints/data-solutions/data-platform-minimal/outputs.tf
+++ b/blueprints/data-solutions/data-platform-minimal/outputs.tf
@@ -17,7 +17,8 @@
output "bigquery-datasets" {
description = "BigQuery datasets."
value = {
- curated = module.cur-bq-0.dataset_id,
+ curated = module.cur-bq-0.dataset_id
+ landing = module.land-bq-0.dataset_id
}
}
@@ -25,6 +26,7 @@ output "composer" {
description = "Composer variables."
value = {
air_flow_uri = try(google_composer_environment.processing-cmp-0[0].config.0.airflow_uri, null)
+ dag_bucket = try(google_composer_environment.processing-cmp-0[0].config[0].dag_gcs_prefix, null)
}
}
@@ -36,10 +38,9 @@ output "dataproc-history-server" {
output "gcs_buckets" {
description = "GCS buckets."
value = {
- landing_cs_0 = module.land-cs-0.name,
- processing_cs_0 = module.processing-cs-0.name,
- cur_cs_0 = module.cur-cs-0.name,
- composer = try(google_composer_environment.processing-cmp-0[0].config[0].dag_gcs_prefix, null)
+ curated = module.cur-cs-0.name
+ landing = module.land-cs-0.name
+ processing = module.processing-cs-0.name
}
}
@@ -48,20 +49,28 @@ output "kms_keys" {
value = var.service_encryption_keys
}
+output "network" {
+ description = "VPC network."
+ value = {
+ processing_subnet = local.processing_subnet
+ processing_vpc = local.processing_vpc
+ }
+}
+
output "projects" {
description = "GCP Projects informations."
value = {
project_number = {
- landing = module.land-project.number,
- common = module.common-project.number,
- curated = module.cur-project.number,
- processing = module.processing-project.number,
+ common = module.common-project.number
+ curated = module.cur-project.number
+ landing = module.land-project.number
+ processing = module.processing-project.number
}
project_id = {
- landing = module.land-project.project_id,
- common = module.common-project.project_id,
- curated = module.cur-project.project_id,
- processing = module.processing-project.project_id,
+ common = module.common-project.project_id
+ curated = module.cur-project.project_id
+ landing = module.land-project.project_id
+ processing = module.processing-project.project_id
}
}
}
@@ -69,24 +78,9 @@ output "projects" {
output "service_accounts" {
description = "Service account created."
value = {
- landing = module.land-sa-cs-0.email
- processing = module.processing-sa-0.email
composer = module.processing-sa-cmp-0.email
- }
-}
-
-output "vpc_network" {
- description = "VPC network."
- value = {
- processing_transformation = local.processing_vpc
- processing_composer = local.processing_vpc
- }
-}
-
-output "vpc_subnet" {
- description = "VPC subnetworks."
- value = {
- processing_transformation = local.processing_subnet
- processing_composer = local.processing_subnet
+ curated = module.cur-sa-0.email,
+ landing = module.land-sa-0.email,
+ processing = module.processing-sa-0.email,
}
}
diff --git a/blueprints/data-solutions/data-platform-minimal/variables.tf b/blueprints/data-solutions/data-platform-minimal/variables.tf
index e6b62df6..09cdfdb8 100644
--- a/blueprints/data-solutions/data-platform-minimal/variables.tf
+++ b/blueprints/data-solutions/data-platform-minimal/variables.tf
@@ -98,10 +98,7 @@ variable "network_config" {
type = object({
host_project = optional(string)
network_self_link = optional(string)
- subnet_self_links = optional(object({
- processing_transformation = string
- processing_composer = string
- }), null)
+ subnet_self_link = optional(string)
composer_ip_ranges = optional(object({
connection_subnetwork = optional(string)
cloud_sql = optional(string, "10.20.10.0/24")