Update naming convention

This commit is contained in:
Lorenzo Caggioni 2022-04-21 23:53:16 +02:00
parent acda6ab5ab
commit e487f8d731
15 changed files with 12470 additions and 323 deletions

View File

@ -12,20 +12,20 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# tfdoc:file:description land project and resources.
# tfdoc:file:description drop off project and resources.
locals {
land_orch_service_accounts = [
drop_orch_service_accounts = [
module.load-sa-df-0.iam_email, module.orch-sa-cmp-0.iam_email
]
}
module "land-project" {
module "drop-project" {
source = "../../../modules/project"
parent = var.folder_id
billing_account = var.billing_account_id
prefix = var.prefix
name = "lnd${local.project_suffix}"
name = "drp${local.project_suffix}"
group_iam = {
(local.groups.data-engineers) = [
"roles/bigquery.dataEditor",
@ -34,14 +34,14 @@ module "land-project" {
]
}
iam = {
"roles/bigquery.dataEditor" = [module.land-sa-bq-0.iam_email]
"roles/bigquery.dataEditor" = [module.drop-sa-bq-0.iam_email]
"roles/bigquery.user" = [module.load-sa-df-0.iam_email]
"roles/pubsub.publisher" = [module.land-sa-ps-0.iam_email]
"roles/pubsub.publisher" = [module.drop-sa-ps-0.iam_email]
"roles/pubsub.subscriber" = concat(
local.land_orch_service_accounts, [module.load-sa-df-0.iam_email]
local.drop_orch_service_accounts, [module.load-sa-df-0.iam_email]
)
"roles/storage.objectAdmin" = [module.load-sa-df-0.iam_email]
"roles/storage.objectCreator" = [module.land-sa-cs-0.iam_email]
"roles/storage.objectCreator" = [module.drop-sa-cs-0.iam_email]
"roles/storage.objectViewer" = [module.orch-sa-cmp-0.iam_email]
"roles/storage.admin" = [module.load-sa-df-0.iam_email]
}
@ -63,12 +63,12 @@ module "land-project" {
# Cloud Storage
module "land-sa-cs-0" {
module "drop-sa-cs-0" {
source = "../../../modules/iam-service-account"
project_id = module.land-project.project_id
project_id = module.drop-project.project_id
prefix = var.prefix
name = "lnd-cs-0"
display_name = "Data platform GCS landing service account."
name = "drp-cs-0"
display_name = "Data platform GCS drop off service account."
iam = {
"roles/iam.serviceAccountTokenCreator" = [
local.groups_iam.data-engineers
@ -76,11 +76,11 @@ module "land-sa-cs-0" {
}
}
module "land-cs-0" {
module "drop-cs-0" {
source = "../../../modules/gcs"
project_id = module.land-project.project_id
project_id = module.drop-project.project_id
prefix = var.prefix
name = "lnd-cs-0"
name = "drp-cs-0"
location = var.location
storage_class = "MULTI_REGIONAL"
encryption_key = try(local.service_encryption_keys.storage, null)
@ -93,12 +93,12 @@ module "land-cs-0" {
# PubSub
module "land-sa-ps-0" {
module "drop-sa-ps-0" {
source = "../../../modules/iam-service-account"
project_id = module.land-project.project_id
project_id = module.drop-project.project_id
prefix = var.prefix
name = "lnd-ps-0"
display_name = "Data platform PubSub landing service account"
name = "drp-ps-0"
display_name = "Data platform PubSub drop off service account"
iam = {
"roles/iam.serviceAccountTokenCreator" = [
local.groups_iam.data-engineers
@ -106,30 +106,30 @@ module "land-sa-ps-0" {
}
}
module "land-ps-0" {
module "drop-ps-0" {
source = "../../../modules/pubsub"
project_id = module.land-project.project_id
name = "${var.prefix}-lnd-ps-0"
project_id = module.drop-project.project_id
name = "${var.prefix}-drp-ps-0"
kms_key = try(local.service_encryption_keys.pubsub, null)
}
# BigQuery
module "land-sa-bq-0" {
module "drop-sa-bq-0" {
source = "../../../modules/iam-service-account"
project_id = module.land-project.project_id
project_id = module.drop-project.project_id
prefix = var.prefix
name = "lnd-bq-0"
display_name = "Data platform BigQuery landing service account"
name = "drp-bq-0"
display_name = "Data platform BigQuery drop off service account"
iam = {
"roles/iam.serviceAccountTokenCreator" = [local.groups_iam.data-engineers]
}
}
module "land-bq-0" {
module "drop-bq-0" {
source = "../../../modules/bigquery-dataset"
project_id = module.land-project.project_id
id = "${replace(var.prefix, "-", "_")}lnd_bq_0"
project_id = module.drop-project.project_id
id = "${replace(var.prefix, "-", "_")}drp_bq_0"
location = var.location
encryption_key = try(local.service_encryption_keys.bq, null)
}

View File

@ -66,39 +66,39 @@ resource "google_composer_environment" "orch-cmp-0" {
image_version = var.composer_config.airflow_version
env_variables = merge(
var.composer_config.env_variables, {
BQ_LOCATION = var.location
DATA_CAT_TAGS = try(jsonencode(module.common-datacatalog.tags), "{}")
DF_KMS_KEY = try(var.service_encryption_keys.dataflow, "")
DTL_L0_PRJ = module.lake-0-project.project_id
DTL_L0_BQ_DATASET = module.lake-0-bq-0.dataset_id
DTL_L0_GCS = module.lake-0-cs-0.url
DTL_L1_PRJ = module.lake-1-project.project_id
DTL_L1_BQ_DATASET = module.lake-1-bq-0.dataset_id
DTL_L1_GCS = module.lake-1-cs-0.url
DTL_L2_PRJ = module.lake-2-project.project_id
DTL_L2_BQ_DATASET = module.lake-2-bq-0.dataset_id
DTL_L2_GCS = module.lake-2-cs-0.url
DTL_PLG_PRJ = module.lake-plg-project.project_id
DTL_PLG_BQ_DATASET = module.lake-plg-bq-0.dataset_id
DTL_PLG_GCS = module.lake-plg-cs-0.url
GCP_REGION = var.region
LND_PRJ = module.land-project.project_id
LND_BQ = module.land-bq-0.dataset_id
LND_GCS = module.land-cs-0.url
LND_PS = module.land-ps-0.id
LOD_PRJ = module.load-project.project_id
LOD_GCS_STAGING = module.load-cs-df-0.url
LOD_NET_VPC = local.load_vpc
LOD_NET_SUBNET = local.load_subnet
LOD_SA_DF = module.load-sa-df-0.email
ORC_PRJ = module.orch-project.project_id
ORC_GCS = module.orch-cs-0.url
TRF_PRJ = module.transf-project.project_id
TRF_GCS_STAGING = module.transf-cs-df-0.url
TRF_NET_VPC = local.transf_vpc
TRF_NET_SUBNET = local.transf_subnet
TRF_SA_DF = module.transf-sa-df-0.email
TRF_SA_BQ = module.transf-sa-bq-0.email
BQ_LOCATION = var.location
DATA_CAT_TAGS = try(jsonencode(module.common-datacatalog.tags), "{}")
DF_KMS_KEY = try(var.service_encryption_keys.dataflow, "")
DRP_PRJ = module.drop-project.project_id
DRP_BQ = module.drop-bq-0.dataset_id
DRP_GCS = module.drop-cs-0.url
DRP_PS = module.drop-ps-0.id
DWH_LAND_PRJ = module.dwh-lnd-project.project_id
DWH_LAND_BQ_DATASET = module.dwh-lnd-bq-0.dataset_id
DWH_LAND_GCS = module.dwh-lnd-cs-0.url
DWH_CURATED_PRJ = module.dwh-cur-project.project_id
DWH_CURATED_BQ_DATASET = module.dwh-cur-bq-0.dataset_id
DWH_CURATED_GCS = module.dwh-cur-cs-0.url
DWH_CONFIDENTIAL_PRJ = module.dwh-conf-project.project_id
DWH_CONFIDENTIAL_BQ_DATASET = module.dwh-conf-bq-0.dataset_id
DWH_CONFIDENTIAL_GCS = module.dwh-conf-cs-0.url
DWH_PLG_PRJ = module.dwh-plg-project.project_id
DWH_PLG_BQ_DATASET = module.dwh-plg-bq-0.dataset_id
DWH_PLG_GCS = module.dwh-plg-cs-0.url
GCP_REGION = var.region
LOD_PRJ = module.load-project.project_id
LOD_GCS_STAGING = module.load-cs-df-0.url
LOD_NET_VPC = local.load_vpc
LOD_NET_SUBNET = local.load_subnet
LOD_SA_DF = module.load-sa-df-0.email
ORC_PRJ = module.orch-project.project_id
ORC_GCS = module.orch-cs-0.url
TRF_PRJ = module.transf-project.project_id
TRF_GCS_STAGING = module.transf-cs-df-0.url
TRF_NET_VPC = local.transf_vpc
TRF_NET_SUBNET = local.transf_subnet
TRF_SA_DF = module.transf-sa-df-0.email
TRF_SA_BQ = module.transf-sa-bq-0.email
}
)
}

View File

@ -12,10 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# tfdoc:file:description Datalake projects.
# tfdoc:file:description Data Warehouse projects.
locals {
lake_group_iam = {
dwh_group_iam = {
(local.groups.data-engineers) = [
"roles/bigquery.dataEditor",
"roles/storage.admin",
@ -30,7 +30,7 @@ locals {
"roles/storage.objectViewer",
]
}
lake_plg_group_iam = {
dwh_plg_group_iam = {
(local.groups.data-engineers) = [
"roles/bigquery.dataEditor",
"roles/storage.admin",
@ -45,7 +45,7 @@ locals {
"roles/storage.objectAdmin",
]
}
lake_0_iam = {
dwh_lnd_iam = {
"roles/bigquery.dataOwner" = [
module.load-sa-df-0.iam_email,
module.transf-sa-df-0.iam_email,
@ -61,7 +61,7 @@ locals {
module.load-sa-df-0.iam_email,
]
}
lake_iam = {
dwh_iam = {
"roles/bigquery.dataOwner" = [
module.transf-sa-df-0.iam_email,
module.transf-sa-bq-0.iam_email,
@ -79,7 +79,7 @@ locals {
module.transf-sa-df-0.iam_email,
]
}
lake_services = concat(var.project_services, [
dwh_services = concat(var.project_services, [
"bigquery.googleapis.com",
"bigqueryreservation.googleapis.com",
"bigquerystorage.googleapis.com",
@ -95,60 +95,60 @@ locals {
# Project
module "lake-0-project" {
module "dwh-lnd-project" {
source = "../../../modules/project"
parent = var.folder_id
billing_account = var.billing_account_id
prefix = var.prefix
name = "dtl-0${local.project_suffix}"
group_iam = local.lake_group_iam
iam = local.lake_0_iam
services = local.lake_services
name = "dwh-lnd${local.project_suffix}"
group_iam = local.dwh_group_iam
iam = local.dwh_lnd_iam
services = local.dwh_services
service_encryption_key_ids = {
bq = [try(local.service_encryption_keys.bq, null)]
storage = [try(local.service_encryption_keys.storage, null)]
}
}
module "lake-1-project" {
module "dwh-cur-project" {
source = "../../../modules/project"
parent = var.folder_id
billing_account = var.billing_account_id
prefix = var.prefix
name = "dtl-1${local.project_suffix}"
group_iam = local.lake_group_iam
iam = local.lake_iam
services = local.lake_services
name = "dwh-cur${local.project_suffix}"
group_iam = local.dwh_group_iam
iam = local.dwh_iam
services = local.dwh_services
service_encryption_key_ids = {
bq = [try(local.service_encryption_keys.bq, null)]
storage = [try(local.service_encryption_keys.storage, null)]
}
}
module "lake-2-project" {
module "dwh-conf-project" {
source = "../../../modules/project"
parent = var.folder_id
billing_account = var.billing_account_id
prefix = var.prefix
name = "dtl-2${local.project_suffix}"
group_iam = local.lake_group_iam
iam = local.lake_iam
services = local.lake_services
name = "dwh-conf${local.project_suffix}"
group_iam = local.dwh_group_iam
iam = local.dwh_iam
services = local.dwh_services
service_encryption_key_ids = {
bq = [try(local.service_encryption_keys.bq, null)]
storage = [try(local.service_encryption_keys.storage, null)]
}
}
module "lake-plg-project" {
module "dwh-plg-project" {
source = "../../../modules/project"
parent = var.folder_id
billing_account = var.billing_account_id
prefix = var.prefix
name = "dtl-plg${local.project_suffix}"
group_iam = local.lake_plg_group_iam
name = "dwh-plg${local.project_suffix}"
group_iam = local.dwh_plg_group_iam
iam = {}
services = local.lake_services
services = local.dwh_services
service_encryption_key_ids = {
bq = [try(local.service_encryption_keys.bq, null)]
storage = [try(local.service_encryption_keys.storage, null)]
@ -157,78 +157,78 @@ module "lake-plg-project" {
# Bigquery
module "lake-0-bq-0" {
module "dwh-lnd-bq-0" {
source = "../../../modules/bigquery-dataset"
project_id = module.lake-0-project.project_id
id = "${replace(var.prefix, "-", "_")}_dtl_0_bq_0"
project_id = module.dwh-lnd-project.project_id
id = "${replace(var.prefix, "-", "_")}_dwh_lnd_bq_0"
location = var.location
encryption_key = try(local.service_encryption_keys.bq, null)
}
module "lake-1-bq-0" {
module "dwh-cur-bq-0" {
source = "../../../modules/bigquery-dataset"
project_id = module.lake-1-project.project_id
id = "${replace(var.prefix, "-", "_")}_dtl_1_bq_0"
project_id = module.dwh-cur-project.project_id
id = "${replace(var.prefix, "-", "_")}_dwh_lnd_bq_0"
location = var.location
encryption_key = try(local.service_encryption_keys.bq, null)
}
module "lake-2-bq-0" {
module "dwh-conf-bq-0" {
source = "../../../modules/bigquery-dataset"
project_id = module.lake-2-project.project_id
id = "${replace(var.prefix, "-", "_")}_dtl_2_bq_0"
project_id = module.dwh-conf-project.project_id
id = "${replace(var.prefix, "-", "_")}_dwh_conf_bq_0"
location = var.location
encryption_key = try(local.service_encryption_keys.bq, null)
}
module "lake-plg-bq-0" {
module "dwh-plg-bq-0" {
source = "../../../modules/bigquery-dataset"
project_id = module.lake-plg-project.project_id
id = "${replace(var.prefix, "-", "_")}_dtl_plg_bq_0"
project_id = module.dwh-plg-project.project_id
id = "${replace(var.prefix, "-", "_")}_dwh_plg_bq_0"
location = var.location
encryption_key = try(local.service_encryption_keys.bq, null)
}
# Cloud storage
module "lake-0-cs-0" {
module "dwh-lnd-cs-0" {
source = "../../../modules/gcs"
project_id = module.lake-0-project.project_id
project_id = module.dwh-lnd-project.project_id
prefix = var.prefix
name = "dtl-0-cs-0"
name = "dwh-lnd-cs-0"
location = var.location
storage_class = "MULTI_REGIONAL"
encryption_key = try(local.service_encryption_keys.storage, null)
force_destroy = var.data_force_destroy
}
module "lake-1-cs-0" {
module "dwh-cur-cs-0" {
source = "../../../modules/gcs"
project_id = module.lake-1-project.project_id
project_id = module.dwh-cur-project.project_id
prefix = var.prefix
name = "dtl-1-cs-0"
name = "dwh-cur-cs-0"
location = var.location
storage_class = "MULTI_REGIONAL"
encryption_key = try(local.service_encryption_keys.storage, null)
force_destroy = var.data_force_destroy
}
module "lake-2-cs-0" {
module "dwh-conf-cs-0" {
source = "../../../modules/gcs"
project_id = module.lake-2-project.project_id
project_id = module.dwh-conf-project.project_id
prefix = var.prefix
name = "dtl-2-cs-0"
name = "dwh-conf-cs-0"
location = var.location
storage_class = "MULTI_REGIONAL"
encryption_key = try(local.service_encryption_keys.storage, null)
force_destroy = var.data_force_destroy
}
module "lake-plg-cs-0" {
module "dwh-plg-cs-0" {
source = "../../../modules/gcs"
project_id = module.lake-plg-project.project_id
project_id = module.dwh-plg-project.project_id
prefix = var.prefix
name = "dtl-plg-cs-0"
name = "dwh-plg-cs-0"
location = var.location
storage_class = "MULTI_REGIONAL"
encryption_key = try(local.service_encryption_keys.storage, null)

View File

@ -13,7 +13,40 @@ Legend: <code>+</code> additive, <code>•</code> conditional.
|<b>trf-bq-0</b><br><small><i>serviceAccount</i></small>|[roles/datacatalog.categoryFineGrainedReader](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryFineGrainedReader) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) |
|<b>trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/datacatalog.categoryFineGrainedReader](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryFineGrainedReader) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) |
## Project <i>dtl-0</i>
## Project <i>drp</i>
| members | roles |
|---|---|
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/pubsub.editor](https://cloud.google.com/iam/docs/understanding-roles#pubsub.editor) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) |
|<b>drp-bq-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) |
|<b>drp-cs-0</b><br><small><i>serviceAccount</i></small>|[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator) |
|<b>drp-ps-0</b><br><small><i>serviceAccount</i></small>|[roles/pubsub.publisher](https://cloud.google.com/iam/docs/understanding-roles#pubsub.publisher) |
|<b>load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user) <br>[roles/pubsub.subscriber](https://cloud.google.com/iam/docs/understanding-roles#pubsub.subscriber) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) <br>[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|<b>orc-cmp-0</b><br><small><i>serviceAccount</i></small>|[roles/pubsub.subscriber](https://cloud.google.com/iam/docs/understanding-roles#pubsub.subscriber) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
## Project <i>dwh-conf</i>
| members | roles |
|---|---|
|<b>gcp-data-analysts</b><br><small><i>group</i></small>|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/bigquery.metadataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.metadataViewer) <br>[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user) <br>[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) |
|<b>SERVICE_IDENTITY_service-networking</b><br><small><i>serviceAccount</i></small>|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) <code>+</code>|
|<b>load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/datacatalog.categoryAdmin](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryAdmin) |
|<b>trf-bq-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) |
|<b>trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) <br>[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
## Project <i>dwh-cur</i>
| members | roles |
|---|---|
|<b>gcp-data-analysts</b><br><small><i>group</i></small>|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/bigquery.metadataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.metadataViewer) <br>[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user) <br>[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) |
|<b>SERVICE_IDENTITY_service-networking</b><br><small><i>serviceAccount</i></small>|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) <code>+</code>|
|<b>load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/datacatalog.categoryAdmin](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryAdmin) |
|<b>trf-bq-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) |
|<b>trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) <br>[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
## Project <i>dwh-lnd</i>
| members | roles |
|---|---|
@ -24,29 +57,7 @@ Legend: <code>+</code> additive, <code>•</code> conditional.
|<b>trf-bq-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) <br>[roles/datacatalog.categoryAdmin](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryAdmin) |
|<b>trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) |
## Project <i>dtl-1</i>
| members | roles |
|---|---|
|<b>gcp-data-analysts</b><br><small><i>group</i></small>|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/bigquery.metadataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.metadataViewer) <br>[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user) <br>[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) |
|<b>SERVICE_IDENTITY_service-networking</b><br><small><i>serviceAccount</i></small>|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) <code>+</code>|
|<b>load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/datacatalog.categoryAdmin](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryAdmin) |
|<b>trf-bq-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) |
|<b>trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) <br>[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
## Project <i>dtl-2</i>
| members | roles |
|---|---|
|<b>gcp-data-analysts</b><br><small><i>group</i></small>|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/bigquery.metadataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.metadataViewer) <br>[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user) <br>[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) |
|<b>SERVICE_IDENTITY_service-networking</b><br><small><i>serviceAccount</i></small>|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) <code>+</code>|
|<b>load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/datacatalog.categoryAdmin](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryAdmin) |
|<b>trf-bq-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) |
|<b>trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) <br>[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
## Project <i>dtl-plg</i>
## Project <i>dwh-plg</i>
| members | roles |
|---|---|
@ -54,17 +65,6 @@ Legend: <code>+</code> additive, <code>•</code> conditional.
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) |
|<b>SERVICE_IDENTITY_service-networking</b><br><small><i>serviceAccount</i></small>|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) <code>+</code>|
## Project <i>lnd</i>
| members | roles |
|---|---|
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/pubsub.editor](https://cloud.google.com/iam/docs/understanding-roles#pubsub.editor) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) |
|<b>lnd-bq-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) |
|<b>lnd-cs-0</b><br><small><i>serviceAccount</i></small>|[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator) |
|<b>lnd-ps-0</b><br><small><i>serviceAccount</i></small>|[roles/pubsub.publisher](https://cloud.google.com/iam/docs/understanding-roles#pubsub.publisher) |
|<b>load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user) <br>[roles/pubsub.subscriber](https://cloud.google.com/iam/docs/understanding-roles#pubsub.subscriber) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) <br>[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|<b>orc-cmp-0</b><br><small><i>serviceAccount</i></small>|[roles/pubsub.subscriber](https://cloud.google.com/iam/docs/understanding-roles#pubsub.subscriber) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
## Project <i>lod</i>
| members | roles |

View File

@ -27,9 +27,9 @@ The code in this example doesn't address Organization-level configurations (Orga
The Data Platform is designed to rely on several projects, one project per data stage. The stages identified are:
- landing
- drop off
- load
- data lake
- data warehouse
- orchestration
- transformation
- exposure
@ -38,15 +38,15 @@ This separation into projects allows adhering to the least-privilege principle b
The script will create the following projects:
- **Landing** Used to store temporary data. Data is pushed to Cloud Storage, BigQuery, or Cloud PubSub. Resources are configured with a customizable lifecycle policy.
- **Load** Used to load data from landing to data lake. The load is made with minimal to zero transformation logic (mainly `cast`). Anonymization or tokenization of Personally Identifiable Information (PII) can be implemented here or in the transformation stage, depending on your requirements. The use of [Cloud Dataflow templates](https://cloud.google.com/dataflow/docs/concepts/dataflow-templates) is recommended.
- **Data Lake** Several projects distributed across 3 separate layers, to host progressively processed and refined data:
- **L0 - Raw data** Structured Data, stored in relevant formats: structured data stored in BigQuery, unstructured data stored on Cloud Storage with additional metadata stored in BigQuery (for example pictures stored in Cloud Storage and analysis of the images for Cloud Vision API stored in BigQuery).
- **L1 - Cleansed, aggregated and standardized data**
- **L2 - Curated layer**
- **Playground** Temporary tables that Data Analyst may use to perform R&D on data available in other Data Lake layers.
- **Drop off** Used to store temporary data. Data is pushed to Cloud Storage, BigQuery, or Cloud PubSub. Resources are configured with a customizable lifecycle policy.
- **Load** Used to load data from the drop off zone to the data warehouse. The load is made with minimal to zero transformation logic (mainly `cast`). Anonymization or tokenization of Personally Identifiable Information (PII) can be implemented here or in the transformation stage, depending on your requirements. The use of [Cloud Dataflow templates](https://cloud.google.com/dataflow/docs/concepts/dataflow-templates) is recommended.
- **Data Warehouse** Several projects distributed across 3 separate layers, to host progressively processed and refined data:
- **Landing - Raw data** Structured Data, stored in relevant formats: structured data stored in BigQuery, unstructured data stored on Cloud Storage with additional metadata stored in BigQuery (for example pictures stored in Cloud Storage and analysis of the images for Cloud Vision API stored in BigQuery).
- **Curated - Cleansed, aggregated and curated data**
- **Confidential - Curated and unencrypted layer**
- **Playground** Temporary tables that Data Analyst may use to perform R&D on data available in other Data Warehouse layers.
- **Orchestration** Used to host Cloud Composer, which orchestrates all tasks that move data across layers.
- **Transformation** Used to move data between Data Lake layers. We strongly suggest relying on BigQuery Engine to perform the transformations. If BigQuery doesn't have the features needed to perform your transformations, you can use Cloud Dataflow with [Cloud Dataflow templates](https://cloud.google.com/dataflow/docs/concepts/dataflow-templates). This stage can also optionally anonymize or tokenize PII.
- **Transformation** Used to move data between Data Warehouse layers. We strongly suggest relying on BigQuery Engine to perform the transformations. If BigQuery doesn't have the features needed to perform your transformations, you can use Cloud Dataflow with [Cloud Dataflow templates](https://cloud.google.com/dataflow/docs/concepts/dataflow-templates). This stage can also optionally anonymize or tokenize PII.
- **Exposure** Used to host resources that share processed data with external systems. Depending on the access pattern, data can be presented via Cloud SQL, BigQuery, or Bigtable. For BigQuery data, we strongly suggest relying on [Authorized views](https://cloud.google.com/bigquery/docs/authorized-views).
### Roles
@ -57,9 +57,9 @@ We assign roles on resources at the project level, granting the appropriate role
Service account creation follows the least privilege principle, performing a single task which requires access to a defined set of resources. The table below shows a high level overview of roles for each service account on each data layer, using `READ` or `WRITE` access patterns for simplicity. For detailed roles please refer to the code.
|Service Account|Landing|DataLake L0|DataLake L1|DataLake L2|
|Service Account|Drop off|DWH Landing|DWH Curated|DWH Confidential|
|-|:-:|:-:|:-:|:-:|
|`landing-sa`|`WRITE`|-|-|-|
|`drop-sa`|`WRITE`|-|-|-|
|`load-sa`|`READ`|`READ`/`WRITE`|-|-|
|`transformation-sa`|-|`READ`/`WRITE`|`READ`/`WRITE`|`READ`/`WRITE`|
|`orchestration-sa`|-|-|-|-|
@ -75,12 +75,12 @@ User groups provide a stable frame of reference that allows decoupling the final
We use three groups to control access to resources:
- *Data Engineers* They handle and run the Data Hub, with read access to all resources in order to troubleshoot possible issues with pipelines. This team can also impersonate any service account.
- *Data Analysts*. They perform analysis on datasets, with read access to the data lake L2 project, and BigQuery READ/WRITE access to the playground project.
- *Data Analysts*. They perform analysis on datasets, with read access to the Data Warehouse Confidential project, and BigQuery READ/WRITE access to the playground project.
- *Data Security*:. They handle security configurations related to the Data Hub. This team has admin access to the common project to configure Cloud DLP templates or Data Catalog policy tags.
The table below shows a high level overview of roles for each group on each project, using `READ`, `WRITE` and `ADMIN` access patterns for simplicity. For detailed roles please refer to the code.
|Group|Landing|Load|Transformation|Data Lake L0|Data Lake L1|Data Lake L2|Data Lake Playground|Orchestration|Common|
|Group|Drop off|Load|Transformation|DHW Landing|DWH Curated|DWH Confidential|DWH Playground|Orchestration|Common|
|-|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|
|Data Engineers|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`|
|Data Analysts|-|-|-|-|-|`READ`|`READ`/`WRITE`|-|-|
@ -215,12 +215,12 @@ To create Cloud Key Management keys in the Data Platform you can uncomment the C
### Assign roles at BQ Dataset level
To handle multiple groups of `data-analysts` accessing the same Data Lake layer projects but only to the dataset belonging to a specific group, you may want to assign roles at BigQuery dataset level instead of at project-level.
To handle multiple groups of `data-analysts` accessing the same Data Warehouse layer projects but only to the dataset belonging to a specific group, you may want to assign roles at BigQuery dataset level instead of at project-level.
To do this, you need to remove IAM binging at project-level for the `data-analysts` group and give roles at BigQuery dataset level using the `iam` variable on `bigquery-dataset` modules.
## Demo pipeline
The application layer is out of scope of this script. As a demo purpuse only, several Cloud Composer DAGs are provided. Demos will import data from the `landing` area to the `DataLake L2` dataset suing different features.
The application layer is out of scope of this script. As a demo purpuse only, several Cloud Composer DAGs are provided. Demos will import data from the `drop off` area to the `Data Warehouse Confidential` dataset suing different features.
You can find examples in the `[demo](./demo)` folder.

View File

@ -8,7 +8,7 @@ The example is not intended to be a production-ready code.
The demo imports purchase data generated by a store.
## Input files
Data are uploaded to the `landing` GCS bucket. File structure:
Data are uploaded to the `drop off` GCS bucket. File structure:
- `customers.csv`: Comma separate value with customer information in the following format: Customer ID, Name, Surname, Registration Timestamp
- `purchases.csv`: Comma separate value with customer information in the following format: Item ID, Customer ID, Item, Item price, Purchase Timestamp
@ -16,14 +16,14 @@ Data are uploaded to the `landing` GCS bucket. File structure:
Different data pipelines are provided to highlight different features and patterns. For the purpose of the example, a single pipeline handle all data lifecycles. When adapting them to your real use case, you may want to evaluate the option to handle each functional step on a separate pipeline or a dedicated tool. For example, you may want to use `Dataform` to handle data schemas lifecycle.
Below you can find a description of each example:
- Simple import data: [`datapipeline.py`](./datapipeline.py) is a simple pipeline to import provided data from the `landing` Google Cloud Storage bucket to the Data Hub L2 layer joining `customers` and `purchases` tables into `customerpurchase` table.
- Import data with Policy Tags: [`datapipeline_dc_tags.py`](./datapipeline.py) imports provided data from `landing` bucket to the Data Hub L2 layer protecting sensitive data using Data Catalog policy Tags.
- Simple import data: [`datapipeline.py`](./datapipeline.py) is a simple pipeline to import provided data from the `drop off` Google Cloud Storage bucket to the Data Hub Confidential layer joining `customers` and `purchases` tables into `customerpurchase` table.
- Import data with Policy Tags: [`datapipeline_dc_tags.py`](./datapipeline.py) imports provided data from `drop off` bucket to the Data Hub Confidential layer protecting sensitive data using Data Catalog policy Tags.
- Delete tables: [`delete_table.py`](./delete_table.py) deletes BigQuery tables created by import pipelines.
## Runnin the demo
To run demo examples, please follow the following steps:
- 01: copy sample data to the `landing` Cloud Storage bucket impersonating the `load` service account.
- 01: copy sample data to the `drop off` Cloud Storage bucket impersonating the `load` service account.
- 02: copy sample data structure definition in the `orchestration` Cloud Storage bucket impersonating the `orchestration` service account.
- 03: copy the Cloud Composer DAG to the Cloud Composer Storage bucket impersonating the `orchestration` service account.
- 04: Open the Cloud Composer Airflow UI and run the imported DAG.

View File

@ -34,23 +34,23 @@ from airflow.utils.task_group import TaskGroup
# --------------------------------------------------------------------------------
BQ_LOCATION = os.environ.get("BQ_LOCATION")
DATA_CAT_TAGS = json.loads(os.environ.get("DATA_CAT_TAGS"))
DTL_L0_PRJ = os.environ.get("DTL_L0_PRJ")
DTL_L0_BQ_DATASET = os.environ.get("DTL_L0_BQ_DATASET")
DTL_L0_GCS = os.environ.get("DTL_L0_GCS")
DTL_L1_PRJ = os.environ.get("DTL_L1_PRJ")
DTL_L1_BQ_DATASET = os.environ.get("DTL_L1_BQ_DATASET")
DTL_L1_GCS = os.environ.get("DTL_L1_GCS")
DTL_L2_PRJ = os.environ.get("DTL_L2_PRJ")
DTL_L2_BQ_DATASET = os.environ.get("DTL_L2_BQ_DATASET")
DTL_L2_GCS = os.environ.get("DTL_L2_GCS")
DTL_PLG_PRJ = os.environ.get("DTL_PLG_PRJ")
DTL_PLG_BQ_DATASET = os.environ.get("DTL_PLG_BQ_DATASET")
DTL_PLG_GCS = os.environ.get("DTL_PLG_GCS")
DWH_LAND_PRJ = os.environ.get("DWH_LAND_PRJ")
DWH_LAND_BQ_DATASET = os.environ.get("DWH_LAND_BQ_DATASET")
DWH_LAND_GCS = os.environ.get("DWH_LAND_GCS")
DWH_CURATED_PRJ = os.environ.get("DWH_CURATED_PRJ")
DWH_CURATED_BQ_DATASET = os.environ.get("DWH_CURATED_BQ_DATASET")
DWH_CURATED_GCS = os.environ.get("DWH_CURATED_GCS")
DWH_CONFIDENTIAL_PRJ = os.environ.get("DWH_CONFIDENTIAL_PRJ")
DWH_CONFIDENTIAL_BQ_DATASET = os.environ.get("DWH_CONFIDENTIAL_BQ_DATASET")
DWH_CONFIDENTIAL_GCS = os.environ.get("DWH_CONFIDENTIAL_GCS")
DWH_PLG_PRJ = os.environ.get("DWH_PLG_PRJ")
DWH_PLG_BQ_DATASET = os.environ.get("DWH_PLG_BQ_DATASET")
DWH_PLG_GCS = os.environ.get("DWH_PLG_GCS")
GCP_REGION = os.environ.get("GCP_REGION")
LND_PRJ = os.environ.get("LND_PRJ")
LND_BQ = os.environ.get("LND_BQ")
LND_GCS = os.environ.get("LND_GCS")
LND_PS = os.environ.get("LND_PS")
DRP_PRJ = os.environ.get("DRP_PRJ")
DRP_BQ = os.environ.get("DRP_BQ")
DRP_GCS = os.environ.get("DRP_GCS")
DRP_PS = os.environ.get("DRP_PS")
LOD_PRJ = os.environ.get("LOD_PRJ")
LOD_GCS_STAGING = os.environ.get("LOD_GCS_STAGING")
LOD_NET_VPC = os.environ.get("LOD_NET_VPC")
@ -127,8 +127,8 @@ with models.DAG(
"javascriptTextTransformFunctionName": "transform",
"JSONPath": ORC_GCS + "/customers_schema.json",
"javascriptTextTransformGcsPath": ORC_GCS + "/customers_udf.js",
"inputFilePattern": LND_GCS + "/customers.csv",
"outputTable": DTL_L0_PRJ + ":"+DTL_L0_BQ_DATASET+".customers",
"inputFilePattern": DRP_GCS + "/customers.csv",
"outputTable": DWH_LAND_PRJ + ":" + DWH_LAND_BQ_DATASET + ".customers",
"bigQueryLoadingTemporaryDirectory": LOD_GCS_STAGING + "/tmp/bq/",
},
)
@ -142,8 +142,8 @@ with models.DAG(
"javascriptTextTransformFunctionName": "transform",
"JSONPath": ORC_GCS + "/purchases_schema.json",
"javascriptTextTransformGcsPath": ORC_GCS + "/purchases_udf.js",
"inputFilePattern": LND_GCS + "/purchases.csv",
"outputTable": DTL_L0_PRJ + ":"+DTL_L0_BQ_DATASET+".purchases",
"inputFilePattern": DRP_GCS + "/purchases.csv",
"outputTable": DWH_LAND_PRJ + ":" + DWH_LAND_BQ_DATASET + ".purchases",
"bigQueryLoadingTemporaryDirectory": LOD_GCS_STAGING + "/tmp/bq/",
},
)
@ -159,17 +159,15 @@ with models.DAG(
'query':"""SELECT
c.id as customer_id,
p.id as purchase_id,
c.name as name,
c.surname as surname,
p.item as item,
p.price as price,
p.timestamp as timestamp
FROM `{dtl_0_prj}.{dtl_0_dataset}.customers` c
JOIN `{dtl_0_prj}.{dtl_0_dataset}.purchases` p ON c.id = p.customer_id
""".format(dtl_0_prj=DTL_L0_PRJ, dtl_0_dataset=DTL_L0_BQ_DATASET, ),
FROM `{dwh_0_prj}.{dwh_0_dataset}.customers` c
JOIN `{dwh_0_prj}.{dwh_0_dataset}.purchases` p ON c.id = p.customer_id
""".format(dwh_0_prj=DWH_LAND_PRJ, dwh_0_dataset=DWH_LAND_BQ_DATASET, ),
'destinationTable':{
'projectId': DTL_L1_PRJ,
'datasetId': DTL_L1_BQ_DATASET,
'projectId': DWH_CURATED_PRJ,
'datasetId': DWH_CURATED_BQ_DATASET,
'tableId': 'customer_purchase'
},
'writeDisposition':'WRITE_TRUNCATE',
@ -179,8 +177,8 @@ with models.DAG(
impersonation_chain=[TRF_SA_BQ]
)
l2_customer_purchase = BigQueryInsertJobOperator(
task_id='bq_l2_customer_purchase',
confidential_customer_purchase = BigQueryInsertJobOperator(
task_id='bq_confidential_customer_purchase',
gcp_conn_id='bigquery_default',
project_id=TRF_PRJ,
location=BQ_LOCATION,
@ -188,18 +186,19 @@ with models.DAG(
'jobType':'QUERY',
'query':{
'query':"""SELECT
customer_id,
purchase_id,
name,
surname,
item,
price,
timestamp
FROM `{dtl_1_prj}.{dtl_1_dataset}.customer_purchase`
""".format(dtl_1_prj=DTL_L1_PRJ, dtl_1_dataset=DTL_L1_BQ_DATASET, ),
c.id as customer_id,
p.id as purchase_id,
c.name as name,
c.surname as surname,
p.item as item,
p.price as price,
p.timestamp as timestamp
FROM `{dwh_0_prj}.{dwh_0_dataset}.customers` c
JOIN `{dwh_0_prj}.{dwh_0_dataset}.purchases` p ON c.id = p.customer_id
""".format(dwh_0_prj=DWH_LAND_PRJ, dwh_0_dataset=DWH_LAND_BQ_DATASET, ),
'destinationTable':{
'projectId': DTL_L2_PRJ,
'datasetId': DTL_L2_BQ_DATASET,
'projectId': DWH_CONFIDENTIAL_PRJ,
'datasetId': DWH_CONFIDENTIAL_BQ_DATASET,
'tableId': 'customer_purchase'
},
'writeDisposition':'WRITE_TRUNCATE',
@ -209,4 +208,4 @@ with models.DAG(
impersonation_chain=[TRF_SA_BQ]
)
start >> [customers_import, purchases_import] >> join_customer_purchase >> l2_customer_purchase >> end
start >> [customers_import, purchases_import] >> join_customer_purchase >> confidential_customer_purchase >> end

View File

@ -34,23 +34,23 @@ from airflow.utils.task_group import TaskGroup
# --------------------------------------------------------------------------------
BQ_LOCATION = os.environ.get("BQ_LOCATION")
DATA_CAT_TAGS = json.loads(os.environ.get("DATA_CAT_TAGS"))
DTL_L0_PRJ = os.environ.get("DTL_L0_PRJ")
DTL_L0_BQ_DATASET = os.environ.get("DTL_L0_BQ_DATASET")
DTL_L0_GCS = os.environ.get("DTL_L0_GCS")
DTL_L1_PRJ = os.environ.get("DTL_L1_PRJ")
DTL_L1_BQ_DATASET = os.environ.get("DTL_L1_BQ_DATASET")
DTL_L1_GCS = os.environ.get("DTL_L1_GCS")
DTL_L2_PRJ = os.environ.get("DTL_L2_PRJ")
DTL_L2_BQ_DATASET = os.environ.get("DTL_L2_BQ_DATASET")
DTL_L2_GCS = os.environ.get("DTL_L2_GCS")
DTL_PLG_PRJ = os.environ.get("DTL_PLG_PRJ")
DTL_PLG_BQ_DATASET = os.environ.get("DTL_PLG_BQ_DATASET")
DTL_PLG_GCS = os.environ.get("DTL_PLG_GCS")
DWH_LAND_PRJ = os.environ.get("DWH_LAND_PRJ")
DWH_LAND_BQ_DATASET = os.environ.get("DWH_LAND_BQ_DATASET")
DWH_LAND_GCS = os.environ.get("DWH_LAND_GCS")
DWH_CURATED_PRJ = os.environ.get("DWH_CURATED_PRJ")
DWH_CURATED_BQ_DATASET = os.environ.get("DWH_CURATED_BQ_DATASET")
DWH_CURATED_GCS = os.environ.get("DWH_CURATED_GCS")
DWH_CONFIDENTIAL_PRJ = os.environ.get("DWH_CONFIDENTIAL_PRJ")
DWH_CONFIDENTIAL_BQ_DATASET = os.environ.get("DWH_CONFIDENTIAL_BQ_DATASET")
DWH_CONFIDENTIAL_GCS = os.environ.get("DWH_CONFIDENTIAL_GCS")
DWH_PLG_PRJ = os.environ.get("DWH_PLG_PRJ")
DWH_PLG_BQ_DATASET = os.environ.get("DWH_PLG_BQ_DATASET")
DWH_PLG_GCS = os.environ.get("DWH_PLG_GCS")
GCP_REGION = os.environ.get("GCP_REGION")
LND_PRJ = os.environ.get("LND_PRJ")
LND_BQ = os.environ.get("LND_BQ")
LND_GCS = os.environ.get("LND_GCS")
LND_PS = os.environ.get("LND_PS")
DRP_PRJ = os.environ.get("DRP_PRJ")
DRP_BQ = os.environ.get("DRP_BQ")
DRP_GCS = os.environ.get("DRP_GCS")
DRP_PS = os.environ.get("DRP_PS")
LOD_PRJ = os.environ.get("LOD_PRJ")
LOD_GCS_STAGING = os.environ.get("LOD_GCS_STAGING")
LOD_NET_VPC = os.environ.get("LOD_NET_VPC")
@ -121,8 +121,8 @@ with models.DAG(
with TaskGroup('upsert_table') as upsert_table:
upsert_table_customers = BigQueryUpsertTableOperator(
task_id="upsert_table_customers",
project_id=DTL_L0_PRJ,
dataset_id=DTL_L0_BQ_DATASET,
project_id=DWH_LAND_PRJ,
dataset_id=DWH_LAND_BQ_DATASET,
impersonation_chain=[TRF_SA_DF],
table_resource={
"tableReference": {"tableId": "customers"},
@ -131,28 +131,28 @@ with models.DAG(
upsert_table_purchases = BigQueryUpsertTableOperator(
task_id="upsert_table_purchases",
project_id=DTL_L0_PRJ,
dataset_id=DTL_L0_BQ_DATASET,
project_id=DWH_LAND_PRJ,
dataset_id=DWH_LAND_BQ_DATASET,
impersonation_chain=[TRF_SA_BQ],
table_resource={
"tableReference": {"tableId": "purchases"}
},
)
upsert_table_customer_purchase_l1 = BigQueryUpsertTableOperator(
task_id="upsert_table_customer_purchase_l1",
project_id=DTL_L1_PRJ,
dataset_id=DTL_L1_BQ_DATASET,
upsert_table_customer_purchase_curated = BigQueryUpsertTableOperator(
task_id="upsert_table_customer_purchase_curated",
project_id=DWH_CURATED_PRJ,
dataset_id=DWH_CURATED_BQ_DATASET,
impersonation_chain=[TRF_SA_BQ],
table_resource={
"tableReference": {"tableId": "customer_purchase"}
},
)
upsert_table_customer_purchase_l2 = BigQueryUpsertTableOperator(
task_id="upsert_table_customer_purchase_l2",
project_id=DTL_L2_PRJ,
dataset_id=DTL_L2_BQ_DATASET,
upsert_table_customer_purchase_confidential = BigQueryUpsertTableOperator(
task_id="upsert_table_customer_purchase_confidential",
project_id=DWH_CONFIDENTIAL_PRJ,
dataset_id=DWH_CONFIDENTIAL_BQ_DATASET,
impersonation_chain=[TRF_SA_BQ],
table_resource={
"tableReference": {"tableId": "customer_purchase"}
@ -164,8 +164,8 @@ with models.DAG(
with TaskGroup('update_schema_table') as update_schema_table:
update_table_schema_customers = BigQueryUpdateTableSchemaOperator(
task_id="update_table_schema_customers",
project_id=DTL_L0_PRJ,
dataset_id=DTL_L0_BQ_DATASET,
project_id=DWH_LAND_PRJ,
dataset_id=DWH_LAND_BQ_DATASET,
table_id="customers",
impersonation_chain=[TRF_SA_BQ],
include_policy_tags=True,
@ -179,8 +179,8 @@ with models.DAG(
update_table_schema_customers = BigQueryUpdateTableSchemaOperator(
task_id="update_table_schema_purchases",
project_id=DTL_L0_PRJ,
dataset_id=DTL_L0_BQ_DATASET,
project_id=DWH_LAND_PRJ,
dataset_id=DWH_LAND_BQ_DATASET,
table_id="purchases",
impersonation_chain=[TRF_SA_BQ],
include_policy_tags=True,
@ -193,10 +193,10 @@ with models.DAG(
]
)
update_table_schema_customer_purchase_l1 = BigQueryUpdateTableSchemaOperator(
task_id="update_table_schema_customer_purchase_l1",
project_id=DTL_L1_PRJ,
dataset_id=DTL_L1_BQ_DATASET,
update_table_schema_customer_purchase_curated = BigQueryUpdateTableSchemaOperator(
task_id="update_table_schema_customer_purchase_curated",
project_id=DWH_CURATED_PRJ,
dataset_id=DWH_CURATED_BQ_DATASET,
table_id="customer_purchase",
impersonation_chain=[TRF_SA_BQ],
include_policy_tags=True,
@ -211,10 +211,10 @@ with models.DAG(
]
)
update_table_schema_customer_purchase_l2 = BigQueryUpdateTableSchemaOperator(
task_id="update_table_schema_customer_purchase_l2",
project_id=DTL_L2_PRJ,
dataset_id=DTL_L2_BQ_DATASET,
update_table_schema_customer_purchase_confidential = BigQueryUpdateTableSchemaOperator(
task_id="update_table_schema_customer_purchase_confidential",
project_id=DWH_CONFIDENTIAL_PRJ,
dataset_id=DWH_CONFIDENTIAL_BQ_DATASET,
table_id="customer_purchase",
impersonation_chain=[TRF_SA_BQ],
include_policy_tags=True,
@ -238,8 +238,8 @@ with models.DAG(
"javascriptTextTransformFunctionName": "transform",
"JSONPath": ORC_GCS + "/customers_schema.json",
"javascriptTextTransformGcsPath": ORC_GCS + "/customers_udf.js",
"inputFilePattern": LND_GCS + "/customers.csv",
"outputTable": DTL_L0_PRJ + ":"+DTL_L0_BQ_DATASET+".customers",
"inputFilePattern": DRP_GCS + "/customers.csv",
"outputTable": DWH_LAND_PRJ + ":" + DWH_LAND_BQ_DATASET + ".customers",
"bigQueryLoadingTemporaryDirectory": LOD_GCS_STAGING + "/tmp/bq/",
},
)
@ -253,8 +253,8 @@ with models.DAG(
"javascriptTextTransformFunctionName": "transform",
"JSONPath": ORC_GCS + "/purchases_schema.json",
"javascriptTextTransformGcsPath": ORC_GCS + "/purchases_udf.js",
"inputFilePattern": LND_GCS + "/purchases.csv",
"outputTable": DTL_L0_PRJ + ":"+DTL_L0_BQ_DATASET+".purchases",
"inputFilePattern": DRP_GCS + "/purchases.csv",
"outputTable": DWH_LAND_PRJ + ":" + DWH_LAND_BQ_DATASET + ".purchases",
"bigQueryLoadingTemporaryDirectory": LOD_GCS_STAGING + "/tmp/bq/",
},
)
@ -275,12 +275,12 @@ with models.DAG(
p.item as item,
p.price as price,
p.timestamp as timestamp
FROM `{dtl_0_prj}.{dtl_0_dataset}.customers` c
JOIN `{dtl_0_prj}.{dtl_0_dataset}.purchases` p ON c.id = p.customer_id
""".format(dtl_0_prj=DTL_L0_PRJ, dtl_0_dataset=DTL_L0_BQ_DATASET, ),
FROM `{dwh_0_prj}.{dwh_0_dataset}.customers` c
JOIN `{dwh_0_prj}.{dwh_0_dataset}.purchases` p ON c.id = p.customer_id
""".format(dwh_0_prj=DWH_LAND_PRJ, dwh_0_dataset=DWH_LAND_BQ_DATASET, ),
'destinationTable':{
'projectId': DTL_L1_PRJ,
'datasetId': DTL_L1_BQ_DATASET,
'projectId': DWH_CURATED_PRJ,
'datasetId': DWH_CURATED_BQ_DATASET,
'tableId': 'customer_purchase'
},
'writeDisposition':'WRITE_TRUNCATE',
@ -290,8 +290,8 @@ with models.DAG(
impersonation_chain=[TRF_SA_BQ]
)
l2_customer_purchase = BigQueryInsertJobOperator(
task_id='bq_l2_customer_purchase',
confidential_customer_purchase = BigQueryInsertJobOperator(
task_id='bq_confidential_customer_purchase',
gcp_conn_id='bigquery_default',
project_id=TRF_PRJ,
location=BQ_LOCATION,
@ -306,11 +306,11 @@ with models.DAG(
item,
price,
timestamp
FROM `{dtl_1_prj}.{dtl_1_dataset}.customer_purchase`
""".format(dtl_1_prj=DTL_L1_PRJ, dtl_1_dataset=DTL_L1_BQ_DATASET, ),
FROM `{dwh_cur_prj}.{dwh_cur_dataset}.customer_purchase`
""".format(dwh_cur_prj=DWH_CURATED_PRJ, dwh_cur_dataset=DWH_CURATED_BQ_DATASET, ),
'destinationTable':{
'projectId': DTL_L2_PRJ,
'datasetId': DTL_L2_BQ_DATASET,
'projectId': DWH_CONFIDENTIAL_PRJ,
'datasetId': DWH_CONFIDENTIAL_BQ_DATASET,
'tableId': 'customer_purchase'
},
'writeDisposition':'WRITE_TRUNCATE',
@ -319,4 +319,4 @@ with models.DAG(
},
impersonation_chain=[TRF_SA_BQ]
)
start >> upsert_table >> update_schema_table >> [customers_import, purchases_import] >> join_customer_purchase >> l2_customer_purchase >> end
start >> upsert_table >> update_schema_table >> [customers_import, purchases_import] >> join_customer_purchase >> confidential_customer_purchase >> end

View File

@ -34,23 +34,23 @@ from airflow.utils.task_group import TaskGroup
# --------------------------------------------------------------------------------
BQ_LOCATION = os.environ.get("BQ_LOCATION")
DATA_CAT_TAGS = json.loads(os.environ.get("DATA_CAT_TAGS"))
DTL_L0_PRJ = os.environ.get("DTL_L0_PRJ")
DTL_L0_BQ_DATASET = os.environ.get("DTL_L0_BQ_DATASET")
DTL_L0_GCS = os.environ.get("DTL_L0_GCS")
DTL_L1_PRJ = os.environ.get("DTL_L1_PRJ")
DTL_L1_BQ_DATASET = os.environ.get("DTL_L1_BQ_DATASET")
DTL_L1_GCS = os.environ.get("DTL_L1_GCS")
DTL_L2_PRJ = os.environ.get("DTL_L2_PRJ")
DTL_L2_BQ_DATASET = os.environ.get("DTL_L2_BQ_DATASET")
DTL_L2_GCS = os.environ.get("DTL_L2_GCS")
DTL_PLG_PRJ = os.environ.get("DTL_PLG_PRJ")
DTL_PLG_BQ_DATASET = os.environ.get("DTL_PLG_BQ_DATASET")
DTL_PLG_GCS = os.environ.get("DTL_PLG_GCS")
DWH_LAND_PRJ = os.environ.get("DWH_LAND_PRJ")
DWH_LAND_BQ_DATASET = os.environ.get("DWH_LAND_BQ_DATASET")
DWH_LAND_GCS = os.environ.get("DWH_LAND_GCS")
DWH_CURATED_PRJ = os.environ.get("DWH_CURATED_PRJ")
DWH_CURATED_BQ_DATASET = os.environ.get("DWH_CURATED_BQ_DATASET")
DWH_CURATED_GCS = os.environ.get("DWH_CURATED_GCS")
DWH_CONFIDENTIAL_PRJ = os.environ.get("DWH_CONFIDENTIAL_PRJ")
DWH_CONFIDENTIAL_BQ_DATASET = os.environ.get("DWH_CONFIDENTIAL_BQ_DATASET")
DWH_CONFIDENTIAL_GCS = os.environ.get("DWH_CONFIDENTIAL_GCS")
DWH_PLG_PRJ = os.environ.get("DWH_PLG_PRJ")
DWH_PLG_BQ_DATASET = os.environ.get("DWH_PLG_BQ_DATASET")
DWH_PLG_GCS = os.environ.get("DWH_PLG_GCS")
GCP_REGION = os.environ.get("GCP_REGION")
LND_PRJ = os.environ.get("LND_PRJ")
LND_BQ = os.environ.get("LND_BQ")
LND_GCS = os.environ.get("LND_GCS")
LND_PS = os.environ.get("LND_PS")
DRP_PRJ = os.environ.get("DRP_PRJ")
DRP_BQ = os.environ.get("DRP_BQ")
DRP_GCS = os.environ.get("DRP_GCS")
DRP_PS = os.environ.get("DRP_PS")
LOD_PRJ = os.environ.get("LOD_PRJ")
LOD_GCS_STAGING = os.environ.get("LOD_GCS_STAGING")
LOD_NET_VPC = os.environ.get("LOD_NET_VPC")
@ -121,25 +121,25 @@ with models.DAG(
with TaskGroup('delete_table') as delte_table:
delete_table_customers = BigQueryDeleteTableOperator(
task_id="delete_table_customers",
deletion_dataset_table=DTL_L0_PRJ+"."+DTL_L0_BQ_DATASET+".customers",
deletion_dataset_table=DWH_LAND_PRJ+"."+DWH_LAND_BQ_DATASET+".customers",
impersonation_chain=[TRF_SA_DF]
)
delete_table_purchases = BigQueryDeleteTableOperator(
task_id="delete_table_purchases",
deletion_dataset_table=DTL_L0_PRJ+"."+DTL_L0_BQ_DATASET+".purchases",
deletion_dataset_table=DWH_LAND_PRJ+"."+DWH_LAND_BQ_DATASET+".purchases",
impersonation_chain=[TRF_SA_DF]
)
delete_table_customer_purchase_l1 = BigQueryDeleteTableOperator(
task_id="delete_table_customer_purchase_l1",
deletion_dataset_table=DTL_L1_PRJ+"."+DTL_L1_BQ_DATASET+".customer_purchase",
delete_table_customer_purchase_curated = BigQueryDeleteTableOperator(
task_id="delete_table_customer_purchase_curated",
deletion_dataset_table=DWH_CURATED_PRJ+"."+DWH_CURATED_BQ_DATASET+".customer_purchase",
impersonation_chain=[TRF_SA_DF]
)
delete_table_customer_purchase_l2 = BigQueryDeleteTableOperator(
task_id="delete_table_customer_purchase_l2",
deletion_dataset_table=DTL_L2_PRJ+"."+DTL_L2_BQ_DATASET+".customer_purchase",
delete_table_customer_purchase_confidential = BigQueryDeleteTableOperator(
task_id="delete_table_customer_purchase_confidential",
deletion_dataset_table=DWH_CONFIDENTIAL_PRJ+"."+DWH_CONFIDENTIAL_BQ_DATASET+".customer_purchase",
impersonation_chain=[TRF_SA_DF]
)

Binary file not shown.

Before

Width:  |  Height:  |  Size: 70 KiB

After

Width:  |  Height:  |  Size: 50 KiB

View File

@ -17,25 +17,25 @@
output "bigquery-datasets" {
description = "BigQuery datasets."
value = {
land-bq-0 = module.land-bq-0.dataset_id,
lake-0-bq-0 = module.lake-0-bq-0.dataset_id,
lake-1-bq-0 = module.lake-1-bq-0.dataset_id,
lake-2-bq-0 = module.lake-2-bq-0.dataset_id,
lake-plg-bq-0 = module.lake-plg-bq-0.dataset_id,
drop-bq-0 = module.drop-bq-0.dataset_id,
dwh-landing-bq-0 = module.dwh-lnd-bq-0.dataset_id,
dwh-curated-bq-0 = module.dwh-cur-bq-0.dataset_id,
dwh-confidential-bq-0 = module.dwh-conf-bq-0.dataset_id,
dwh-plg-bq-0 = module.dwh-plg-bq-0.dataset_id,
}
}
output "gcs-buckets" {
description = "GCS buckets."
value = {
lake-0-cs-0 = module.lake-0-cs-0.name,
lake-1-cs-0 = module.lake-1-cs-0.name,
lake-2-cs-0 = module.lake-2-cs-0.name,
lake-plg-cs-0 = module.lake-plg-cs-0.name,
land-cs-0 = module.land-cs-0.name,
lod-cs-df = module.load-cs-df-0.name,
orch-cs-0 = module.orch-cs-0.name,
transf-cs-df = module.transf-cs-df-0.name,
dwh-landing-cs-0 = module.dwh-lnd-cs-0.name,
dwh-curated-cs-0 = module.dwh-cur-cs-0.name,
dwh-confidential-cs-0 = module.dwh-conf-cs-0.name,
dwh-plg-cs-0 = module.dwh-plg-cs-0.name,
drop-cs-0 = module.drop-cs-0.name,
lod-cs-df = module.load-cs-df-0.name,
orch-cs-0 = module.orch-cs-0.name,
transf-cs-df = module.transf-cs-df-0.name,
}
}
@ -48,26 +48,26 @@ output "projects" {
description = "GCP Projects informations."
value = {
project_number = {
lake-0 = module.lake-0-project.number,
lake-1 = module.lake-1-project.number,
lake-2 = module.lake-2-project.number,
lake-plg = module.lake-plg-project.number,
exposure = module.exp-project.number,
landing = module.land-project.number,
load = module.load-project.number,
orchestration = module.orch-project.number,
transformation = module.transf-project.number,
dwh-landing = module.dwh-lnd-project.number,
dwh-curated = module.dwh-cur-project.number,
dwh-confidential = module.dwh-conf-project.number,
dwh-plg = module.dwh-plg-project.number,
exposure = module.exp-project.number,
dropoff = module.drop-project.number,
load = module.load-project.number,
orchestration = module.orch-project.number,
transformation = module.transf-project.number,
}
project_id = {
lake-0 = module.lake-0-project.project_id,
lake-1 = module.lake-1-project.project_id,
lake-2 = module.lake-2-project.project_id,
lake-plg = module.lake-plg-project.project_id,
exposure = module.exp-project.project_id,
landing = module.land-project.project_id,
load = module.load-project.project_id,
orchestration = module.orch-project.project_id,
transformation = module.transf-project.project_id,
dwh-landing = module.dwh-lnd-project.project_id,
dwh-curated = module.dwh-cur-project.project_id,
dwh-confidential = module.dwh-conf-project.project_id,
dwh-plg = module.dwh-plg-project.project_id,
exposure = module.exp-project.project_id,
dropoff = module.drop-project.project_id,
load = module.load-project.project_id,
orchestration = module.orch-project.project_id,
transformation = module.transf-project.project_id,
}
}
}
@ -93,12 +93,12 @@ output "vpc_subnet" {
output "demo_commands" {
description = "Demo commands."
value = {
01 = "gsutil -i ${module.land-sa-cs-0.email} cp demo/data/*.csv gs://${module.land-cs-0.name}"
01 = "gsutil -i ${module.drop-sa-cs-0.email} cp demo/data/*.csv gs://${module.drop-cs-0.name}"
02 = "gsutil -i ${module.orch-sa-cmp-0.email} cp demo/data/*.j* gs://${module.orch-cs-0.name}"
03 = "gsutil -i ${module.orch-sa-cmp-0.email} cp demo/*.py ${google_composer_environment.orch-cmp-0.config[0].dag_gcs_prefix}/"
04 = "Open ${google_composer_environment.orch-cmp-0.config.0.airflow_uri} and run uploaded DAG."
05 = <<EOT
bq query --project_id=${module.lake-2-project.project_id} --use_legacy_sql=false 'SELECT * EXCEPT (name, surname) FROM `${module.lake-2-project.project_id}.${module.lake-2-bq-0.dataset_id}.customer_purchase` LIMIT 1000'"
bq query --project_id=${module.dwh-conf-project.project_id} --use_legacy_sql=false 'SELECT * EXCEPT (name, surname) FROM `${module.dwh-conf-project.project_id}.${module.dwh-conf-bq-0.dataset_id}.customer_purchase` LIMIT 1000'"
EOT
}
}

File diff suppressed because it is too large Load Diff

View File

@ -31,10 +31,10 @@ The Data Platform manages:
As per our GCP best practices the Data Platform relies on user groups to assign roles to human identities. These are the specific groups used by the Data Platform and their access patterns, from the [module documentation](../../../../examples/data-solutions/data-platform-foundations/#groups):
- *Data Engineers* They handle and run the Data Hub, with read access to all resources in order to troubleshoot possible issues with pipelines. This team can also impersonate any service account.
- *Data Analysts*. They perform analysis on datasets, with read access to the data lake L2 project, and BigQuery READ/WRITE access to the playground project.
- *Data Analysts*. They perform analysis on datasets, with read access to the data warehouse Curated or Confidential projects depending on their privileges, and BigQuery READ/WRITE access to the playground project.
- *Data Security*:. They handle security configurations related to the Data Hub. This team has admin access to the common project to configure Cloud DLP templates or Data Catalog policy tags.
|Group|Landing|Load|Transformation|Data Lake L0|Data Lake L1|Data Lake L2|Data Lake Playground|Orchestration|Common|
|Group|Landing|Load|Transformation|Data Warehouse Landing|Data Warehouse Curated|Data Warehouse Confidential|Data Warehouse Playground|Orchestration|Common|
|-|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|
|Data Engineers|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`|
|Data Analysts|-|-|-|-|-|`READ`|`READ`/`WRITE`|-|-|
@ -69,6 +69,12 @@ As is often the case in real-world configurations, [VPC-SC](https://cloud.google
To configure the use of VPC-SC on the data platform, you have to specify the data platform project numbers on the `vpc_sc_perimeter_projects.dev` variable on [FAST security stage](../../02-security#perimeter-resources).
In the case your Data Warehouse need to handle confidential data and you have the requirement to separate them deeply from other data and IAM is not enough, the suggested configuration is to keep the confidential project in a separate VPC-SC perimeter with the adequate ingress/egress rules needed for the load and tranformation service account. Below you can find an high level diagram describing the configuration.
<p align="center">
<img src="diagram_vpcsc.png" alt="Data Platform VPC-SC diagram">
</p>
## How to run this stage
This stage can be run in isolation by prviding the necessary variables, but it's really meant to be used as part of the FAST flow after the "foundational stages" ([`00-bootstrap`](../../00-bootstrap), [`01-resman`](../../01-resman), [`02-networking`](../../02-networking-vpn) and [`02-security`](../../02-security)).
@ -131,7 +137,7 @@ terraform apply
## Demo pipeline
The application layer is out of scope of this script. As a demo purpuse only, several Cloud Composer DAGs are provided. Demos will import data from the `landing` area to the `DataLake L2` dataset suing different features.
The application layer is out of scope of this script. As a demo purpuse only, several Cloud Composer DAGs are provided. Demos will import data from the `landing` area to the `DataWarehouse Confidential` dataset suing different features.
You can find examples in the `[demo](../../../../examples/data-solutions/data-platform-foundations/demo)` folder.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 115 KiB

After

Width:  |  Height:  |  Size: 58 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 34 KiB