diff --git a/examples/data-solutions/data-platform-foundations/01-landing.tf b/examples/data-solutions/data-platform-foundations/01-dropoff.tf similarity index 70% rename from examples/data-solutions/data-platform-foundations/01-landing.tf rename to examples/data-solutions/data-platform-foundations/01-dropoff.tf index 49f32aff..9a07b517 100644 --- a/examples/data-solutions/data-platform-foundations/01-landing.tf +++ b/examples/data-solutions/data-platform-foundations/01-dropoff.tf @@ -12,20 +12,20 @@ # See the License for the specific language governing permissions and # limitations under the License. -# tfdoc:file:description land project and resources. +# tfdoc:file:description drop off project and resources. locals { - land_orch_service_accounts = [ + drop_orch_service_accounts = [ module.load-sa-df-0.iam_email, module.orch-sa-cmp-0.iam_email ] } -module "land-project" { +module "drop-project" { source = "../../../modules/project" parent = var.folder_id billing_account = var.billing_account_id prefix = var.prefix - name = "lnd${local.project_suffix}" + name = "drp${local.project_suffix}" group_iam = { (local.groups.data-engineers) = [ "roles/bigquery.dataEditor", @@ -34,14 +34,14 @@ module "land-project" { ] } iam = { - "roles/bigquery.dataEditor" = [module.land-sa-bq-0.iam_email] + "roles/bigquery.dataEditor" = [module.drop-sa-bq-0.iam_email] "roles/bigquery.user" = [module.load-sa-df-0.iam_email] - "roles/pubsub.publisher" = [module.land-sa-ps-0.iam_email] + "roles/pubsub.publisher" = [module.drop-sa-ps-0.iam_email] "roles/pubsub.subscriber" = concat( - local.land_orch_service_accounts, [module.load-sa-df-0.iam_email] + local.drop_orch_service_accounts, [module.load-sa-df-0.iam_email] ) "roles/storage.objectAdmin" = [module.load-sa-df-0.iam_email] - "roles/storage.objectCreator" = [module.land-sa-cs-0.iam_email] + "roles/storage.objectCreator" = [module.drop-sa-cs-0.iam_email] "roles/storage.objectViewer" = [module.orch-sa-cmp-0.iam_email] "roles/storage.admin" = [module.load-sa-df-0.iam_email] } @@ -63,12 +63,12 @@ module "land-project" { # Cloud Storage -module "land-sa-cs-0" { +module "drop-sa-cs-0" { source = "../../../modules/iam-service-account" - project_id = module.land-project.project_id + project_id = module.drop-project.project_id prefix = var.prefix - name = "lnd-cs-0" - display_name = "Data platform GCS landing service account." + name = "drp-cs-0" + display_name = "Data platform GCS drop off service account." iam = { "roles/iam.serviceAccountTokenCreator" = [ local.groups_iam.data-engineers @@ -76,11 +76,11 @@ module "land-sa-cs-0" { } } -module "land-cs-0" { +module "drop-cs-0" { source = "../../../modules/gcs" - project_id = module.land-project.project_id + project_id = module.drop-project.project_id prefix = var.prefix - name = "lnd-cs-0" + name = "drp-cs-0" location = var.location storage_class = "MULTI_REGIONAL" encryption_key = try(local.service_encryption_keys.storage, null) @@ -93,12 +93,12 @@ module "land-cs-0" { # PubSub -module "land-sa-ps-0" { +module "drop-sa-ps-0" { source = "../../../modules/iam-service-account" - project_id = module.land-project.project_id + project_id = module.drop-project.project_id prefix = var.prefix - name = "lnd-ps-0" - display_name = "Data platform PubSub landing service account" + name = "drp-ps-0" + display_name = "Data platform PubSub drop off service account" iam = { "roles/iam.serviceAccountTokenCreator" = [ local.groups_iam.data-engineers @@ -106,30 +106,30 @@ module "land-sa-ps-0" { } } -module "land-ps-0" { +module "drop-ps-0" { source = "../../../modules/pubsub" - project_id = module.land-project.project_id - name = "${var.prefix}-lnd-ps-0" + project_id = module.drop-project.project_id + name = "${var.prefix}-drp-ps-0" kms_key = try(local.service_encryption_keys.pubsub, null) } # BigQuery -module "land-sa-bq-0" { +module "drop-sa-bq-0" { source = "../../../modules/iam-service-account" - project_id = module.land-project.project_id + project_id = module.drop-project.project_id prefix = var.prefix - name = "lnd-bq-0" - display_name = "Data platform BigQuery landing service account" + name = "drp-bq-0" + display_name = "Data platform BigQuery drop off service account" iam = { "roles/iam.serviceAccountTokenCreator" = [local.groups_iam.data-engineers] } } -module "land-bq-0" { +module "drop-bq-0" { source = "../../../modules/bigquery-dataset" - project_id = module.land-project.project_id - id = "${replace(var.prefix, "-", "_")}lnd_bq_0" + project_id = module.drop-project.project_id + id = "${replace(var.prefix, "-", "_")}drp_bq_0" location = var.location encryption_key = try(local.service_encryption_keys.bq, null) } diff --git a/examples/data-solutions/data-platform-foundations/03-composer.tf b/examples/data-solutions/data-platform-foundations/03-composer.tf index fac47ec5..2622ffa2 100644 --- a/examples/data-solutions/data-platform-foundations/03-composer.tf +++ b/examples/data-solutions/data-platform-foundations/03-composer.tf @@ -66,39 +66,39 @@ resource "google_composer_environment" "orch-cmp-0" { image_version = var.composer_config.airflow_version env_variables = merge( var.composer_config.env_variables, { - BQ_LOCATION = var.location - DATA_CAT_TAGS = try(jsonencode(module.common-datacatalog.tags), "{}") - DF_KMS_KEY = try(var.service_encryption_keys.dataflow, "") - DTL_L0_PRJ = module.lake-0-project.project_id - DTL_L0_BQ_DATASET = module.lake-0-bq-0.dataset_id - DTL_L0_GCS = module.lake-0-cs-0.url - DTL_L1_PRJ = module.lake-1-project.project_id - DTL_L1_BQ_DATASET = module.lake-1-bq-0.dataset_id - DTL_L1_GCS = module.lake-1-cs-0.url - DTL_L2_PRJ = module.lake-2-project.project_id - DTL_L2_BQ_DATASET = module.lake-2-bq-0.dataset_id - DTL_L2_GCS = module.lake-2-cs-0.url - DTL_PLG_PRJ = module.lake-plg-project.project_id - DTL_PLG_BQ_DATASET = module.lake-plg-bq-0.dataset_id - DTL_PLG_GCS = module.lake-plg-cs-0.url - GCP_REGION = var.region - LND_PRJ = module.land-project.project_id - LND_BQ = module.land-bq-0.dataset_id - LND_GCS = module.land-cs-0.url - LND_PS = module.land-ps-0.id - LOD_PRJ = module.load-project.project_id - LOD_GCS_STAGING = module.load-cs-df-0.url - LOD_NET_VPC = local.load_vpc - LOD_NET_SUBNET = local.load_subnet - LOD_SA_DF = module.load-sa-df-0.email - ORC_PRJ = module.orch-project.project_id - ORC_GCS = module.orch-cs-0.url - TRF_PRJ = module.transf-project.project_id - TRF_GCS_STAGING = module.transf-cs-df-0.url - TRF_NET_VPC = local.transf_vpc - TRF_NET_SUBNET = local.transf_subnet - TRF_SA_DF = module.transf-sa-df-0.email - TRF_SA_BQ = module.transf-sa-bq-0.email + BQ_LOCATION = var.location + DATA_CAT_TAGS = try(jsonencode(module.common-datacatalog.tags), "{}") + DF_KMS_KEY = try(var.service_encryption_keys.dataflow, "") + DRP_PRJ = module.drop-project.project_id + DRP_BQ = module.drop-bq-0.dataset_id + DRP_GCS = module.drop-cs-0.url + DRP_PS = module.drop-ps-0.id + DWH_LAND_PRJ = module.dwh-lnd-project.project_id + DWH_LAND_BQ_DATASET = module.dwh-lnd-bq-0.dataset_id + DWH_LAND_GCS = module.dwh-lnd-cs-0.url + DWH_CURATED_PRJ = module.dwh-cur-project.project_id + DWH_CURATED_BQ_DATASET = module.dwh-cur-bq-0.dataset_id + DWH_CURATED_GCS = module.dwh-cur-cs-0.url + DWH_CONFIDENTIAL_PRJ = module.dwh-conf-project.project_id + DWH_CONFIDENTIAL_BQ_DATASET = module.dwh-conf-bq-0.dataset_id + DWH_CONFIDENTIAL_GCS = module.dwh-conf-cs-0.url + DWH_PLG_PRJ = module.dwh-plg-project.project_id + DWH_PLG_BQ_DATASET = module.dwh-plg-bq-0.dataset_id + DWH_PLG_GCS = module.dwh-plg-cs-0.url + GCP_REGION = var.region + LOD_PRJ = module.load-project.project_id + LOD_GCS_STAGING = module.load-cs-df-0.url + LOD_NET_VPC = local.load_vpc + LOD_NET_SUBNET = local.load_subnet + LOD_SA_DF = module.load-sa-df-0.email + ORC_PRJ = module.orch-project.project_id + ORC_GCS = module.orch-cs-0.url + TRF_PRJ = module.transf-project.project_id + TRF_GCS_STAGING = module.transf-cs-df-0.url + TRF_NET_VPC = local.transf_vpc + TRF_NET_SUBNET = local.transf_subnet + TRF_SA_DF = module.transf-sa-df-0.email + TRF_SA_BQ = module.transf-sa-bq-0.email } ) } diff --git a/examples/data-solutions/data-platform-foundations/05-datalake.tf b/examples/data-solutions/data-platform-foundations/05-datawarehouse.tf similarity index 74% rename from examples/data-solutions/data-platform-foundations/05-datalake.tf rename to examples/data-solutions/data-platform-foundations/05-datawarehouse.tf index b163f9e5..879a0e0b 100644 --- a/examples/data-solutions/data-platform-foundations/05-datalake.tf +++ b/examples/data-solutions/data-platform-foundations/05-datawarehouse.tf @@ -12,10 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -# tfdoc:file:description Datalake projects. +# tfdoc:file:description Data Warehouse projects. locals { - lake_group_iam = { + dwh_group_iam = { (local.groups.data-engineers) = [ "roles/bigquery.dataEditor", "roles/storage.admin", @@ -30,7 +30,7 @@ locals { "roles/storage.objectViewer", ] } - lake_plg_group_iam = { + dwh_plg_group_iam = { (local.groups.data-engineers) = [ "roles/bigquery.dataEditor", "roles/storage.admin", @@ -45,7 +45,7 @@ locals { "roles/storage.objectAdmin", ] } - lake_0_iam = { + dwh_lnd_iam = { "roles/bigquery.dataOwner" = [ module.load-sa-df-0.iam_email, module.transf-sa-df-0.iam_email, @@ -61,7 +61,7 @@ locals { module.load-sa-df-0.iam_email, ] } - lake_iam = { + dwh_iam = { "roles/bigquery.dataOwner" = [ module.transf-sa-df-0.iam_email, module.transf-sa-bq-0.iam_email, @@ -79,7 +79,7 @@ locals { module.transf-sa-df-0.iam_email, ] } - lake_services = concat(var.project_services, [ + dwh_services = concat(var.project_services, [ "bigquery.googleapis.com", "bigqueryreservation.googleapis.com", "bigquerystorage.googleapis.com", @@ -95,60 +95,60 @@ locals { # Project -module "lake-0-project" { +module "dwh-lnd-project" { source = "../../../modules/project" parent = var.folder_id billing_account = var.billing_account_id prefix = var.prefix - name = "dtl-0${local.project_suffix}" - group_iam = local.lake_group_iam - iam = local.lake_0_iam - services = local.lake_services + name = "dwh-lnd${local.project_suffix}" + group_iam = local.dwh_group_iam + iam = local.dwh_lnd_iam + services = local.dwh_services service_encryption_key_ids = { bq = [try(local.service_encryption_keys.bq, null)] storage = [try(local.service_encryption_keys.storage, null)] } } -module "lake-1-project" { +module "dwh-cur-project" { source = "../../../modules/project" parent = var.folder_id billing_account = var.billing_account_id prefix = var.prefix - name = "dtl-1${local.project_suffix}" - group_iam = local.lake_group_iam - iam = local.lake_iam - services = local.lake_services + name = "dwh-cur${local.project_suffix}" + group_iam = local.dwh_group_iam + iam = local.dwh_iam + services = local.dwh_services service_encryption_key_ids = { bq = [try(local.service_encryption_keys.bq, null)] storage = [try(local.service_encryption_keys.storage, null)] } } -module "lake-2-project" { +module "dwh-conf-project" { source = "../../../modules/project" parent = var.folder_id billing_account = var.billing_account_id prefix = var.prefix - name = "dtl-2${local.project_suffix}" - group_iam = local.lake_group_iam - iam = local.lake_iam - services = local.lake_services + name = "dwh-conf${local.project_suffix}" + group_iam = local.dwh_group_iam + iam = local.dwh_iam + services = local.dwh_services service_encryption_key_ids = { bq = [try(local.service_encryption_keys.bq, null)] storage = [try(local.service_encryption_keys.storage, null)] } } -module "lake-plg-project" { +module "dwh-plg-project" { source = "../../../modules/project" parent = var.folder_id billing_account = var.billing_account_id prefix = var.prefix - name = "dtl-plg${local.project_suffix}" - group_iam = local.lake_plg_group_iam + name = "dwh-plg${local.project_suffix}" + group_iam = local.dwh_plg_group_iam iam = {} - services = local.lake_services + services = local.dwh_services service_encryption_key_ids = { bq = [try(local.service_encryption_keys.bq, null)] storage = [try(local.service_encryption_keys.storage, null)] @@ -157,78 +157,78 @@ module "lake-plg-project" { # Bigquery -module "lake-0-bq-0" { +module "dwh-lnd-bq-0" { source = "../../../modules/bigquery-dataset" - project_id = module.lake-0-project.project_id - id = "${replace(var.prefix, "-", "_")}_dtl_0_bq_0" + project_id = module.dwh-lnd-project.project_id + id = "${replace(var.prefix, "-", "_")}_dwh_lnd_bq_0" location = var.location encryption_key = try(local.service_encryption_keys.bq, null) } -module "lake-1-bq-0" { +module "dwh-cur-bq-0" { source = "../../../modules/bigquery-dataset" - project_id = module.lake-1-project.project_id - id = "${replace(var.prefix, "-", "_")}_dtl_1_bq_0" + project_id = module.dwh-cur-project.project_id + id = "${replace(var.prefix, "-", "_")}_dwh_lnd_bq_0" location = var.location encryption_key = try(local.service_encryption_keys.bq, null) } -module "lake-2-bq-0" { +module "dwh-conf-bq-0" { source = "../../../modules/bigquery-dataset" - project_id = module.lake-2-project.project_id - id = "${replace(var.prefix, "-", "_")}_dtl_2_bq_0" + project_id = module.dwh-conf-project.project_id + id = "${replace(var.prefix, "-", "_")}_dwh_conf_bq_0" location = var.location encryption_key = try(local.service_encryption_keys.bq, null) } -module "lake-plg-bq-0" { +module "dwh-plg-bq-0" { source = "../../../modules/bigquery-dataset" - project_id = module.lake-plg-project.project_id - id = "${replace(var.prefix, "-", "_")}_dtl_plg_bq_0" + project_id = module.dwh-plg-project.project_id + id = "${replace(var.prefix, "-", "_")}_dwh_plg_bq_0" location = var.location encryption_key = try(local.service_encryption_keys.bq, null) } # Cloud storage -module "lake-0-cs-0" { +module "dwh-lnd-cs-0" { source = "../../../modules/gcs" - project_id = module.lake-0-project.project_id + project_id = module.dwh-lnd-project.project_id prefix = var.prefix - name = "dtl-0-cs-0" + name = "dwh-lnd-cs-0" location = var.location storage_class = "MULTI_REGIONAL" encryption_key = try(local.service_encryption_keys.storage, null) force_destroy = var.data_force_destroy } -module "lake-1-cs-0" { +module "dwh-cur-cs-0" { source = "../../../modules/gcs" - project_id = module.lake-1-project.project_id + project_id = module.dwh-cur-project.project_id prefix = var.prefix - name = "dtl-1-cs-0" + name = "dwh-cur-cs-0" location = var.location storage_class = "MULTI_REGIONAL" encryption_key = try(local.service_encryption_keys.storage, null) force_destroy = var.data_force_destroy } -module "lake-2-cs-0" { +module "dwh-conf-cs-0" { source = "../../../modules/gcs" - project_id = module.lake-2-project.project_id + project_id = module.dwh-conf-project.project_id prefix = var.prefix - name = "dtl-2-cs-0" + name = "dwh-conf-cs-0" location = var.location storage_class = "MULTI_REGIONAL" encryption_key = try(local.service_encryption_keys.storage, null) force_destroy = var.data_force_destroy } -module "lake-plg-cs-0" { +module "dwh-plg-cs-0" { source = "../../../modules/gcs" - project_id = module.lake-plg-project.project_id + project_id = module.dwh-plg-project.project_id prefix = var.prefix - name = "dtl-plg-cs-0" + name = "dwh-plg-cs-0" location = var.location storage_class = "MULTI_REGIONAL" encryption_key = try(local.service_encryption_keys.storage, null) diff --git a/examples/data-solutions/data-platform-foundations/IAM.md b/examples/data-solutions/data-platform-foundations/IAM.md index d6ccbecb..54d35939 100644 --- a/examples/data-solutions/data-platform-foundations/IAM.md +++ b/examples/data-solutions/data-platform-foundations/IAM.md @@ -13,7 +13,40 @@ Legend: + additive, conditional. |trf-bq-0
serviceAccount|[roles/datacatalog.categoryFineGrainedReader](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryFineGrainedReader)
[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) | |trf-df-0
serviceAccount|[roles/datacatalog.categoryFineGrainedReader](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryFineGrainedReader)
[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer)
[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) | -## Project dtl-0 +## Project drp + +| members | roles | +|---|---| +|gcp-data-engineers
group|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/pubsub.editor](https://cloud.google.com/iam/docs/understanding-roles#pubsub.editor)
[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) | +|drp-bq-0
serviceAccount|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) | +|drp-cs-0
serviceAccount|[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator) | +|drp-ps-0
serviceAccount|[roles/pubsub.publisher](https://cloud.google.com/iam/docs/understanding-roles#pubsub.publisher) | +|load-df-0
serviceAccount|[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user)
[roles/pubsub.subscriber](https://cloud.google.com/iam/docs/understanding-roles#pubsub.subscriber)
[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin)
[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) | +|orc-cmp-0
serviceAccount|[roles/pubsub.subscriber](https://cloud.google.com/iam/docs/understanding-roles#pubsub.subscriber)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) | + +## Project dwh-conf + +| members | roles | +|---|---| +|gcp-data-analysts
group|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/bigquery.metadataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.metadataViewer)
[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user)
[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer)
[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) | +|gcp-data-engineers
group|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) | +|SERVICE_IDENTITY_service-networking
serviceAccount|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) +| +|load-df-0
serviceAccount|[roles/datacatalog.categoryAdmin](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryAdmin) | +|trf-bq-0
serviceAccount|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) | +|trf-df-0
serviceAccount|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner)
[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) | + +## Project dwh-cur + +| members | roles | +|---|---| +|gcp-data-analysts
group|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/bigquery.metadataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.metadataViewer)
[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user)
[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer)
[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) | +|gcp-data-engineers
group|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) | +|SERVICE_IDENTITY_service-networking
serviceAccount|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) +| +|load-df-0
serviceAccount|[roles/datacatalog.categoryAdmin](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryAdmin) | +|trf-bq-0
serviceAccount|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) | +|trf-df-0
serviceAccount|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner)
[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) | + +## Project dwh-lnd | members | roles | |---|---| @@ -24,29 +57,7 @@ Legend: + additive, conditional. |trf-bq-0
serviceAccount|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner)
[roles/datacatalog.categoryAdmin](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryAdmin) | |trf-df-0
serviceAccount|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) | -## Project dtl-1 - -| members | roles | -|---|---| -|gcp-data-analysts
group|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/bigquery.metadataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.metadataViewer)
[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user)
[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer)
[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) | -|gcp-data-engineers
group|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) | -|SERVICE_IDENTITY_service-networking
serviceAccount|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) +| -|load-df-0
serviceAccount|[roles/datacatalog.categoryAdmin](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryAdmin) | -|trf-bq-0
serviceAccount|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) | -|trf-df-0
serviceAccount|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner)
[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) | - -## Project dtl-2 - -| members | roles | -|---|---| -|gcp-data-analysts
group|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/bigquery.metadataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.metadataViewer)
[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user)
[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer)
[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) | -|gcp-data-engineers
group|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) | -|SERVICE_IDENTITY_service-networking
serviceAccount|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) +| -|load-df-0
serviceAccount|[roles/datacatalog.categoryAdmin](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryAdmin) | -|trf-bq-0
serviceAccount|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) | -|trf-df-0
serviceAccount|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner)
[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) | - -## Project dtl-plg +## Project dwh-plg | members | roles | |---|---| @@ -54,17 +65,6 @@ Legend: + additive, conditional. |gcp-data-engineers
group|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) | |SERVICE_IDENTITY_service-networking
serviceAccount|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) +| -## Project lnd - -| members | roles | -|---|---| -|gcp-data-engineers
group|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/pubsub.editor](https://cloud.google.com/iam/docs/understanding-roles#pubsub.editor)
[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) | -|lnd-bq-0
serviceAccount|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) | -|lnd-cs-0
serviceAccount|[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator) | -|lnd-ps-0
serviceAccount|[roles/pubsub.publisher](https://cloud.google.com/iam/docs/understanding-roles#pubsub.publisher) | -|load-df-0
serviceAccount|[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user)
[roles/pubsub.subscriber](https://cloud.google.com/iam/docs/understanding-roles#pubsub.subscriber)
[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin)
[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) | -|orc-cmp-0
serviceAccount|[roles/pubsub.subscriber](https://cloud.google.com/iam/docs/understanding-roles#pubsub.subscriber)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) | - ## Project lod | members | roles | diff --git a/examples/data-solutions/data-platform-foundations/README.md b/examples/data-solutions/data-platform-foundations/README.md index 09f8e63a..2ac1a987 100644 --- a/examples/data-solutions/data-platform-foundations/README.md +++ b/examples/data-solutions/data-platform-foundations/README.md @@ -27,9 +27,9 @@ The code in this example doesn't address Organization-level configurations (Orga The Data Platform is designed to rely on several projects, one project per data stage. The stages identified are: -- landing +- drop off - load -- data lake +- data warehouse - orchestration - transformation - exposure @@ -38,15 +38,15 @@ This separation into projects allows adhering to the least-privilege principle b The script will create the following projects: -- **Landing** Used to store temporary data. Data is pushed to Cloud Storage, BigQuery, or Cloud PubSub. Resources are configured with a customizable lifecycle policy. -- **Load** Used to load data from landing to data lake. The load is made with minimal to zero transformation logic (mainly `cast`). Anonymization or tokenization of Personally Identifiable Information (PII) can be implemented here or in the transformation stage, depending on your requirements. The use of [Cloud Dataflow templates](https://cloud.google.com/dataflow/docs/concepts/dataflow-templates) is recommended. -- **Data Lake** Several projects distributed across 3 separate layers, to host progressively processed and refined data: - - **L0 - Raw data** Structured Data, stored in relevant formats: structured data stored in BigQuery, unstructured data stored on Cloud Storage with additional metadata stored in BigQuery (for example pictures stored in Cloud Storage and analysis of the images for Cloud Vision API stored in BigQuery). - - **L1 - Cleansed, aggregated and standardized data** - - **L2 - Curated layer** - - **Playground** Temporary tables that Data Analyst may use to perform R&D on data available in other Data Lake layers. +- **Drop off** Used to store temporary data. Data is pushed to Cloud Storage, BigQuery, or Cloud PubSub. Resources are configured with a customizable lifecycle policy. +- **Load** Used to load data from the drop off zone to the data warehouse. The load is made with minimal to zero transformation logic (mainly `cast`). Anonymization or tokenization of Personally Identifiable Information (PII) can be implemented here or in the transformation stage, depending on your requirements. The use of [Cloud Dataflow templates](https://cloud.google.com/dataflow/docs/concepts/dataflow-templates) is recommended. +- **Data Warehouse** Several projects distributed across 3 separate layers, to host progressively processed and refined data: + - **Landing - Raw data** Structured Data, stored in relevant formats: structured data stored in BigQuery, unstructured data stored on Cloud Storage with additional metadata stored in BigQuery (for example pictures stored in Cloud Storage and analysis of the images for Cloud Vision API stored in BigQuery). + - **Curated - Cleansed, aggregated and curated data** + - **Confidential - Curated and unencrypted layer** + - **Playground** Temporary tables that Data Analyst may use to perform R&D on data available in other Data Warehouse layers. - **Orchestration** Used to host Cloud Composer, which orchestrates all tasks that move data across layers. -- **Transformation** Used to move data between Data Lake layers. We strongly suggest relying on BigQuery Engine to perform the transformations. If BigQuery doesn't have the features needed to perform your transformations, you can use Cloud Dataflow with [Cloud Dataflow templates](https://cloud.google.com/dataflow/docs/concepts/dataflow-templates). This stage can also optionally anonymize or tokenize PII. +- **Transformation** Used to move data between Data Warehouse layers. We strongly suggest relying on BigQuery Engine to perform the transformations. If BigQuery doesn't have the features needed to perform your transformations, you can use Cloud Dataflow with [Cloud Dataflow templates](https://cloud.google.com/dataflow/docs/concepts/dataflow-templates). This stage can also optionally anonymize or tokenize PII. - **Exposure** Used to host resources that share processed data with external systems. Depending on the access pattern, data can be presented via Cloud SQL, BigQuery, or Bigtable. For BigQuery data, we strongly suggest relying on [Authorized views](https://cloud.google.com/bigquery/docs/authorized-views). ### Roles @@ -57,9 +57,9 @@ We assign roles on resources at the project level, granting the appropriate role Service account creation follows the least privilege principle, performing a single task which requires access to a defined set of resources. The table below shows a high level overview of roles for each service account on each data layer, using `READ` or `WRITE` access patterns for simplicity. For detailed roles please refer to the code. -|Service Account|Landing|DataLake L0|DataLake L1|DataLake L2| +|Service Account|Drop off|DWH Landing|DWH Curated|DWH Confidential| |-|:-:|:-:|:-:|:-:| -|`landing-sa`|`WRITE`|-|-|-| +|`drop-sa`|`WRITE`|-|-|-| |`load-sa`|`READ`|`READ`/`WRITE`|-|-| |`transformation-sa`|-|`READ`/`WRITE`|`READ`/`WRITE`|`READ`/`WRITE`| |`orchestration-sa`|-|-|-|-| @@ -75,12 +75,12 @@ User groups provide a stable frame of reference that allows decoupling the final We use three groups to control access to resources: - *Data Engineers* They handle and run the Data Hub, with read access to all resources in order to troubleshoot possible issues with pipelines. This team can also impersonate any service account. -- *Data Analysts*. They perform analysis on datasets, with read access to the data lake L2 project, and BigQuery READ/WRITE access to the playground project. +- *Data Analysts*. They perform analysis on datasets, with read access to the Data Warehouse Confidential project, and BigQuery READ/WRITE access to the playground project. - *Data Security*:. They handle security configurations related to the Data Hub. This team has admin access to the common project to configure Cloud DLP templates or Data Catalog policy tags. The table below shows a high level overview of roles for each group on each project, using `READ`, `WRITE` and `ADMIN` access patterns for simplicity. For detailed roles please refer to the code. -|Group|Landing|Load|Transformation|Data Lake L0|Data Lake L1|Data Lake L2|Data Lake Playground|Orchestration|Common| +|Group|Drop off|Load|Transformation|DHW Landing|DWH Curated|DWH Confidential|DWH Playground|Orchestration|Common| |-|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:| |Data Engineers|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`| |Data Analysts|-|-|-|-|-|`READ`|`READ`/`WRITE`|-|-| @@ -215,12 +215,12 @@ To create Cloud Key Management keys in the Data Platform you can uncomment the C ### Assign roles at BQ Dataset level -To handle multiple groups of `data-analysts` accessing the same Data Lake layer projects but only to the dataset belonging to a specific group, you may want to assign roles at BigQuery dataset level instead of at project-level. +To handle multiple groups of `data-analysts` accessing the same Data Warehouse layer projects but only to the dataset belonging to a specific group, you may want to assign roles at BigQuery dataset level instead of at project-level. To do this, you need to remove IAM binging at project-level for the `data-analysts` group and give roles at BigQuery dataset level using the `iam` variable on `bigquery-dataset` modules. ## Demo pipeline -The application layer is out of scope of this script. As a demo purpuse only, several Cloud Composer DAGs are provided. Demos will import data from the `landing` area to the `DataLake L2` dataset suing different features. +The application layer is out of scope of this script. As a demo purpuse only, several Cloud Composer DAGs are provided. Demos will import data from the `drop off` area to the `Data Warehouse Confidential` dataset suing different features. You can find examples in the `[demo](./demo)` folder. diff --git a/examples/data-solutions/data-platform-foundations/demo/README.md b/examples/data-solutions/data-platform-foundations/demo/README.md index 5347b2cf..13f750a3 100644 --- a/examples/data-solutions/data-platform-foundations/demo/README.md +++ b/examples/data-solutions/data-platform-foundations/demo/README.md @@ -8,7 +8,7 @@ The example is not intended to be a production-ready code. The demo imports purchase data generated by a store. ## Input files -Data are uploaded to the `landing` GCS bucket. File structure: +Data are uploaded to the `drop off` GCS bucket. File structure: - `customers.csv`: Comma separate value with customer information in the following format: Customer ID, Name, Surname, Registration Timestamp - `purchases.csv`: Comma separate value with customer information in the following format: Item ID, Customer ID, Item, Item price, Purchase Timestamp @@ -16,14 +16,14 @@ Data are uploaded to the `landing` GCS bucket. File structure: Different data pipelines are provided to highlight different features and patterns. For the purpose of the example, a single pipeline handle all data lifecycles. When adapting them to your real use case, you may want to evaluate the option to handle each functional step on a separate pipeline or a dedicated tool. For example, you may want to use `Dataform` to handle data schemas lifecycle. Below you can find a description of each example: - - Simple import data: [`datapipeline.py`](./datapipeline.py) is a simple pipeline to import provided data from the `landing` Google Cloud Storage bucket to the Data Hub L2 layer joining `customers` and `purchases` tables into `customerpurchase` table. - - Import data with Policy Tags: [`datapipeline_dc_tags.py`](./datapipeline.py) imports provided data from `landing` bucket to the Data Hub L2 layer protecting sensitive data using Data Catalog policy Tags. + - Simple import data: [`datapipeline.py`](./datapipeline.py) is a simple pipeline to import provided data from the `drop off` Google Cloud Storage bucket to the Data Hub Confidential layer joining `customers` and `purchases` tables into `customerpurchase` table. + - Import data with Policy Tags: [`datapipeline_dc_tags.py`](./datapipeline.py) imports provided data from `drop off` bucket to the Data Hub Confidential layer protecting sensitive data using Data Catalog policy Tags. - Delete tables: [`delete_table.py`](./delete_table.py) deletes BigQuery tables created by import pipelines. ## Runnin the demo To run demo examples, please follow the following steps: -- 01: copy sample data to the `landing` Cloud Storage bucket impersonating the `load` service account. +- 01: copy sample data to the `drop off` Cloud Storage bucket impersonating the `load` service account. - 02: copy sample data structure definition in the `orchestration` Cloud Storage bucket impersonating the `orchestration` service account. - 03: copy the Cloud Composer DAG to the Cloud Composer Storage bucket impersonating the `orchestration` service account. - 04: Open the Cloud Composer Airflow UI and run the imported DAG. diff --git a/examples/data-solutions/data-platform-foundations/demo/datapipeline.py b/examples/data-solutions/data-platform-foundations/demo/datapipeline.py index 1f748c08..a682d346 100644 --- a/examples/data-solutions/data-platform-foundations/demo/datapipeline.py +++ b/examples/data-solutions/data-platform-foundations/demo/datapipeline.py @@ -34,23 +34,23 @@ from airflow.utils.task_group import TaskGroup # -------------------------------------------------------------------------------- BQ_LOCATION = os.environ.get("BQ_LOCATION") DATA_CAT_TAGS = json.loads(os.environ.get("DATA_CAT_TAGS")) -DTL_L0_PRJ = os.environ.get("DTL_L0_PRJ") -DTL_L0_BQ_DATASET = os.environ.get("DTL_L0_BQ_DATASET") -DTL_L0_GCS = os.environ.get("DTL_L0_GCS") -DTL_L1_PRJ = os.environ.get("DTL_L1_PRJ") -DTL_L1_BQ_DATASET = os.environ.get("DTL_L1_BQ_DATASET") -DTL_L1_GCS = os.environ.get("DTL_L1_GCS") -DTL_L2_PRJ = os.environ.get("DTL_L2_PRJ") -DTL_L2_BQ_DATASET = os.environ.get("DTL_L2_BQ_DATASET") -DTL_L2_GCS = os.environ.get("DTL_L2_GCS") -DTL_PLG_PRJ = os.environ.get("DTL_PLG_PRJ") -DTL_PLG_BQ_DATASET = os.environ.get("DTL_PLG_BQ_DATASET") -DTL_PLG_GCS = os.environ.get("DTL_PLG_GCS") +DWH_LAND_PRJ = os.environ.get("DWH_LAND_PRJ") +DWH_LAND_BQ_DATASET = os.environ.get("DWH_LAND_BQ_DATASET") +DWH_LAND_GCS = os.environ.get("DWH_LAND_GCS") +DWH_CURATED_PRJ = os.environ.get("DWH_CURATED_PRJ") +DWH_CURATED_BQ_DATASET = os.environ.get("DWH_CURATED_BQ_DATASET") +DWH_CURATED_GCS = os.environ.get("DWH_CURATED_GCS") +DWH_CONFIDENTIAL_PRJ = os.environ.get("DWH_CONFIDENTIAL_PRJ") +DWH_CONFIDENTIAL_BQ_DATASET = os.environ.get("DWH_CONFIDENTIAL_BQ_DATASET") +DWH_CONFIDENTIAL_GCS = os.environ.get("DWH_CONFIDENTIAL_GCS") +DWH_PLG_PRJ = os.environ.get("DWH_PLG_PRJ") +DWH_PLG_BQ_DATASET = os.environ.get("DWH_PLG_BQ_DATASET") +DWH_PLG_GCS = os.environ.get("DWH_PLG_GCS") GCP_REGION = os.environ.get("GCP_REGION") -LND_PRJ = os.environ.get("LND_PRJ") -LND_BQ = os.environ.get("LND_BQ") -LND_GCS = os.environ.get("LND_GCS") -LND_PS = os.environ.get("LND_PS") +DRP_PRJ = os.environ.get("DRP_PRJ") +DRP_BQ = os.environ.get("DRP_BQ") +DRP_GCS = os.environ.get("DRP_GCS") +DRP_PS = os.environ.get("DRP_PS") LOD_PRJ = os.environ.get("LOD_PRJ") LOD_GCS_STAGING = os.environ.get("LOD_GCS_STAGING") LOD_NET_VPC = os.environ.get("LOD_NET_VPC") @@ -127,8 +127,8 @@ with models.DAG( "javascriptTextTransformFunctionName": "transform", "JSONPath": ORC_GCS + "/customers_schema.json", "javascriptTextTransformGcsPath": ORC_GCS + "/customers_udf.js", - "inputFilePattern": LND_GCS + "/customers.csv", - "outputTable": DTL_L0_PRJ + ":"+DTL_L0_BQ_DATASET+".customers", + "inputFilePattern": DRP_GCS + "/customers.csv", + "outputTable": DWH_LAND_PRJ + ":" + DWH_LAND_BQ_DATASET + ".customers", "bigQueryLoadingTemporaryDirectory": LOD_GCS_STAGING + "/tmp/bq/", }, ) @@ -142,8 +142,8 @@ with models.DAG( "javascriptTextTransformFunctionName": "transform", "JSONPath": ORC_GCS + "/purchases_schema.json", "javascriptTextTransformGcsPath": ORC_GCS + "/purchases_udf.js", - "inputFilePattern": LND_GCS + "/purchases.csv", - "outputTable": DTL_L0_PRJ + ":"+DTL_L0_BQ_DATASET+".purchases", + "inputFilePattern": DRP_GCS + "/purchases.csv", + "outputTable": DWH_LAND_PRJ + ":" + DWH_LAND_BQ_DATASET + ".purchases", "bigQueryLoadingTemporaryDirectory": LOD_GCS_STAGING + "/tmp/bq/", }, ) @@ -159,17 +159,15 @@ with models.DAG( 'query':"""SELECT c.id as customer_id, p.id as purchase_id, - c.name as name, - c.surname as surname, p.item as item, p.price as price, p.timestamp as timestamp - FROM `{dtl_0_prj}.{dtl_0_dataset}.customers` c - JOIN `{dtl_0_prj}.{dtl_0_dataset}.purchases` p ON c.id = p.customer_id - """.format(dtl_0_prj=DTL_L0_PRJ, dtl_0_dataset=DTL_L0_BQ_DATASET, ), + FROM `{dwh_0_prj}.{dwh_0_dataset}.customers` c + JOIN `{dwh_0_prj}.{dwh_0_dataset}.purchases` p ON c.id = p.customer_id + """.format(dwh_0_prj=DWH_LAND_PRJ, dwh_0_dataset=DWH_LAND_BQ_DATASET, ), 'destinationTable':{ - 'projectId': DTL_L1_PRJ, - 'datasetId': DTL_L1_BQ_DATASET, + 'projectId': DWH_CURATED_PRJ, + 'datasetId': DWH_CURATED_BQ_DATASET, 'tableId': 'customer_purchase' }, 'writeDisposition':'WRITE_TRUNCATE', @@ -179,8 +177,8 @@ with models.DAG( impersonation_chain=[TRF_SA_BQ] ) - l2_customer_purchase = BigQueryInsertJobOperator( - task_id='bq_l2_customer_purchase', + confidential_customer_purchase = BigQueryInsertJobOperator( + task_id='bq_confidential_customer_purchase', gcp_conn_id='bigquery_default', project_id=TRF_PRJ, location=BQ_LOCATION, @@ -188,18 +186,19 @@ with models.DAG( 'jobType':'QUERY', 'query':{ 'query':"""SELECT - customer_id, - purchase_id, - name, - surname, - item, - price, - timestamp - FROM `{dtl_1_prj}.{dtl_1_dataset}.customer_purchase` - """.format(dtl_1_prj=DTL_L1_PRJ, dtl_1_dataset=DTL_L1_BQ_DATASET, ), + c.id as customer_id, + p.id as purchase_id, + c.name as name, + c.surname as surname, + p.item as item, + p.price as price, + p.timestamp as timestamp + FROM `{dwh_0_prj}.{dwh_0_dataset}.customers` c + JOIN `{dwh_0_prj}.{dwh_0_dataset}.purchases` p ON c.id = p.customer_id + """.format(dwh_0_prj=DWH_LAND_PRJ, dwh_0_dataset=DWH_LAND_BQ_DATASET, ), 'destinationTable':{ - 'projectId': DTL_L2_PRJ, - 'datasetId': DTL_L2_BQ_DATASET, + 'projectId': DWH_CONFIDENTIAL_PRJ, + 'datasetId': DWH_CONFIDENTIAL_BQ_DATASET, 'tableId': 'customer_purchase' }, 'writeDisposition':'WRITE_TRUNCATE', @@ -209,4 +208,4 @@ with models.DAG( impersonation_chain=[TRF_SA_BQ] ) - start >> [customers_import, purchases_import] >> join_customer_purchase >> l2_customer_purchase >> end \ No newline at end of file + start >> [customers_import, purchases_import] >> join_customer_purchase >> confidential_customer_purchase >> end \ No newline at end of file diff --git a/examples/data-solutions/data-platform-foundations/demo/datapipeline_dc_tags.py b/examples/data-solutions/data-platform-foundations/demo/datapipeline_dc_tags.py index 2fb88c9e..4b15eaab 100644 --- a/examples/data-solutions/data-platform-foundations/demo/datapipeline_dc_tags.py +++ b/examples/data-solutions/data-platform-foundations/demo/datapipeline_dc_tags.py @@ -34,23 +34,23 @@ from airflow.utils.task_group import TaskGroup # -------------------------------------------------------------------------------- BQ_LOCATION = os.environ.get("BQ_LOCATION") DATA_CAT_TAGS = json.loads(os.environ.get("DATA_CAT_TAGS")) -DTL_L0_PRJ = os.environ.get("DTL_L0_PRJ") -DTL_L0_BQ_DATASET = os.environ.get("DTL_L0_BQ_DATASET") -DTL_L0_GCS = os.environ.get("DTL_L0_GCS") -DTL_L1_PRJ = os.environ.get("DTL_L1_PRJ") -DTL_L1_BQ_DATASET = os.environ.get("DTL_L1_BQ_DATASET") -DTL_L1_GCS = os.environ.get("DTL_L1_GCS") -DTL_L2_PRJ = os.environ.get("DTL_L2_PRJ") -DTL_L2_BQ_DATASET = os.environ.get("DTL_L2_BQ_DATASET") -DTL_L2_GCS = os.environ.get("DTL_L2_GCS") -DTL_PLG_PRJ = os.environ.get("DTL_PLG_PRJ") -DTL_PLG_BQ_DATASET = os.environ.get("DTL_PLG_BQ_DATASET") -DTL_PLG_GCS = os.environ.get("DTL_PLG_GCS") +DWH_LAND_PRJ = os.environ.get("DWH_LAND_PRJ") +DWH_LAND_BQ_DATASET = os.environ.get("DWH_LAND_BQ_DATASET") +DWH_LAND_GCS = os.environ.get("DWH_LAND_GCS") +DWH_CURATED_PRJ = os.environ.get("DWH_CURATED_PRJ") +DWH_CURATED_BQ_DATASET = os.environ.get("DWH_CURATED_BQ_DATASET") +DWH_CURATED_GCS = os.environ.get("DWH_CURATED_GCS") +DWH_CONFIDENTIAL_PRJ = os.environ.get("DWH_CONFIDENTIAL_PRJ") +DWH_CONFIDENTIAL_BQ_DATASET = os.environ.get("DWH_CONFIDENTIAL_BQ_DATASET") +DWH_CONFIDENTIAL_GCS = os.environ.get("DWH_CONFIDENTIAL_GCS") +DWH_PLG_PRJ = os.environ.get("DWH_PLG_PRJ") +DWH_PLG_BQ_DATASET = os.environ.get("DWH_PLG_BQ_DATASET") +DWH_PLG_GCS = os.environ.get("DWH_PLG_GCS") GCP_REGION = os.environ.get("GCP_REGION") -LND_PRJ = os.environ.get("LND_PRJ") -LND_BQ = os.environ.get("LND_BQ") -LND_GCS = os.environ.get("LND_GCS") -LND_PS = os.environ.get("LND_PS") +DRP_PRJ = os.environ.get("DRP_PRJ") +DRP_BQ = os.environ.get("DRP_BQ") +DRP_GCS = os.environ.get("DRP_GCS") +DRP_PS = os.environ.get("DRP_PS") LOD_PRJ = os.environ.get("LOD_PRJ") LOD_GCS_STAGING = os.environ.get("LOD_GCS_STAGING") LOD_NET_VPC = os.environ.get("LOD_NET_VPC") @@ -121,8 +121,8 @@ with models.DAG( with TaskGroup('upsert_table') as upsert_table: upsert_table_customers = BigQueryUpsertTableOperator( task_id="upsert_table_customers", - project_id=DTL_L0_PRJ, - dataset_id=DTL_L0_BQ_DATASET, + project_id=DWH_LAND_PRJ, + dataset_id=DWH_LAND_BQ_DATASET, impersonation_chain=[TRF_SA_DF], table_resource={ "tableReference": {"tableId": "customers"}, @@ -131,28 +131,28 @@ with models.DAG( upsert_table_purchases = BigQueryUpsertTableOperator( task_id="upsert_table_purchases", - project_id=DTL_L0_PRJ, - dataset_id=DTL_L0_BQ_DATASET, + project_id=DWH_LAND_PRJ, + dataset_id=DWH_LAND_BQ_DATASET, impersonation_chain=[TRF_SA_BQ], table_resource={ "tableReference": {"tableId": "purchases"} }, ) - upsert_table_customer_purchase_l1 = BigQueryUpsertTableOperator( - task_id="upsert_table_customer_purchase_l1", - project_id=DTL_L1_PRJ, - dataset_id=DTL_L1_BQ_DATASET, + upsert_table_customer_purchase_curated = BigQueryUpsertTableOperator( + task_id="upsert_table_customer_purchase_curated", + project_id=DWH_CURATED_PRJ, + dataset_id=DWH_CURATED_BQ_DATASET, impersonation_chain=[TRF_SA_BQ], table_resource={ "tableReference": {"tableId": "customer_purchase"} }, ) - upsert_table_customer_purchase_l2 = BigQueryUpsertTableOperator( - task_id="upsert_table_customer_purchase_l2", - project_id=DTL_L2_PRJ, - dataset_id=DTL_L2_BQ_DATASET, + upsert_table_customer_purchase_confidential = BigQueryUpsertTableOperator( + task_id="upsert_table_customer_purchase_confidential", + project_id=DWH_CONFIDENTIAL_PRJ, + dataset_id=DWH_CONFIDENTIAL_BQ_DATASET, impersonation_chain=[TRF_SA_BQ], table_resource={ "tableReference": {"tableId": "customer_purchase"} @@ -164,8 +164,8 @@ with models.DAG( with TaskGroup('update_schema_table') as update_schema_table: update_table_schema_customers = BigQueryUpdateTableSchemaOperator( task_id="update_table_schema_customers", - project_id=DTL_L0_PRJ, - dataset_id=DTL_L0_BQ_DATASET, + project_id=DWH_LAND_PRJ, + dataset_id=DWH_LAND_BQ_DATASET, table_id="customers", impersonation_chain=[TRF_SA_BQ], include_policy_tags=True, @@ -179,8 +179,8 @@ with models.DAG( update_table_schema_customers = BigQueryUpdateTableSchemaOperator( task_id="update_table_schema_purchases", - project_id=DTL_L0_PRJ, - dataset_id=DTL_L0_BQ_DATASET, + project_id=DWH_LAND_PRJ, + dataset_id=DWH_LAND_BQ_DATASET, table_id="purchases", impersonation_chain=[TRF_SA_BQ], include_policy_tags=True, @@ -193,10 +193,10 @@ with models.DAG( ] ) - update_table_schema_customer_purchase_l1 = BigQueryUpdateTableSchemaOperator( - task_id="update_table_schema_customer_purchase_l1", - project_id=DTL_L1_PRJ, - dataset_id=DTL_L1_BQ_DATASET, + update_table_schema_customer_purchase_curated = BigQueryUpdateTableSchemaOperator( + task_id="update_table_schema_customer_purchase_curated", + project_id=DWH_CURATED_PRJ, + dataset_id=DWH_CURATED_BQ_DATASET, table_id="customer_purchase", impersonation_chain=[TRF_SA_BQ], include_policy_tags=True, @@ -211,10 +211,10 @@ with models.DAG( ] ) - update_table_schema_customer_purchase_l2 = BigQueryUpdateTableSchemaOperator( - task_id="update_table_schema_customer_purchase_l2", - project_id=DTL_L2_PRJ, - dataset_id=DTL_L2_BQ_DATASET, + update_table_schema_customer_purchase_confidential = BigQueryUpdateTableSchemaOperator( + task_id="update_table_schema_customer_purchase_confidential", + project_id=DWH_CONFIDENTIAL_PRJ, + dataset_id=DWH_CONFIDENTIAL_BQ_DATASET, table_id="customer_purchase", impersonation_chain=[TRF_SA_BQ], include_policy_tags=True, @@ -238,8 +238,8 @@ with models.DAG( "javascriptTextTransformFunctionName": "transform", "JSONPath": ORC_GCS + "/customers_schema.json", "javascriptTextTransformGcsPath": ORC_GCS + "/customers_udf.js", - "inputFilePattern": LND_GCS + "/customers.csv", - "outputTable": DTL_L0_PRJ + ":"+DTL_L0_BQ_DATASET+".customers", + "inputFilePattern": DRP_GCS + "/customers.csv", + "outputTable": DWH_LAND_PRJ + ":" + DWH_LAND_BQ_DATASET + ".customers", "bigQueryLoadingTemporaryDirectory": LOD_GCS_STAGING + "/tmp/bq/", }, ) @@ -253,8 +253,8 @@ with models.DAG( "javascriptTextTransformFunctionName": "transform", "JSONPath": ORC_GCS + "/purchases_schema.json", "javascriptTextTransformGcsPath": ORC_GCS + "/purchases_udf.js", - "inputFilePattern": LND_GCS + "/purchases.csv", - "outputTable": DTL_L0_PRJ + ":"+DTL_L0_BQ_DATASET+".purchases", + "inputFilePattern": DRP_GCS + "/purchases.csv", + "outputTable": DWH_LAND_PRJ + ":" + DWH_LAND_BQ_DATASET + ".purchases", "bigQueryLoadingTemporaryDirectory": LOD_GCS_STAGING + "/tmp/bq/", }, ) @@ -275,12 +275,12 @@ with models.DAG( p.item as item, p.price as price, p.timestamp as timestamp - FROM `{dtl_0_prj}.{dtl_0_dataset}.customers` c - JOIN `{dtl_0_prj}.{dtl_0_dataset}.purchases` p ON c.id = p.customer_id - """.format(dtl_0_prj=DTL_L0_PRJ, dtl_0_dataset=DTL_L0_BQ_DATASET, ), + FROM `{dwh_0_prj}.{dwh_0_dataset}.customers` c + JOIN `{dwh_0_prj}.{dwh_0_dataset}.purchases` p ON c.id = p.customer_id + """.format(dwh_0_prj=DWH_LAND_PRJ, dwh_0_dataset=DWH_LAND_BQ_DATASET, ), 'destinationTable':{ - 'projectId': DTL_L1_PRJ, - 'datasetId': DTL_L1_BQ_DATASET, + 'projectId': DWH_CURATED_PRJ, + 'datasetId': DWH_CURATED_BQ_DATASET, 'tableId': 'customer_purchase' }, 'writeDisposition':'WRITE_TRUNCATE', @@ -290,8 +290,8 @@ with models.DAG( impersonation_chain=[TRF_SA_BQ] ) - l2_customer_purchase = BigQueryInsertJobOperator( - task_id='bq_l2_customer_purchase', + confidential_customer_purchase = BigQueryInsertJobOperator( + task_id='bq_confidential_customer_purchase', gcp_conn_id='bigquery_default', project_id=TRF_PRJ, location=BQ_LOCATION, @@ -306,11 +306,11 @@ with models.DAG( item, price, timestamp - FROM `{dtl_1_prj}.{dtl_1_dataset}.customer_purchase` - """.format(dtl_1_prj=DTL_L1_PRJ, dtl_1_dataset=DTL_L1_BQ_DATASET, ), + FROM `{dwh_cur_prj}.{dwh_cur_dataset}.customer_purchase` + """.format(dwh_cur_prj=DWH_CURATED_PRJ, dwh_cur_dataset=DWH_CURATED_BQ_DATASET, ), 'destinationTable':{ - 'projectId': DTL_L2_PRJ, - 'datasetId': DTL_L2_BQ_DATASET, + 'projectId': DWH_CONFIDENTIAL_PRJ, + 'datasetId': DWH_CONFIDENTIAL_BQ_DATASET, 'tableId': 'customer_purchase' }, 'writeDisposition':'WRITE_TRUNCATE', @@ -319,4 +319,4 @@ with models.DAG( }, impersonation_chain=[TRF_SA_BQ] ) - start >> upsert_table >> update_schema_table >> [customers_import, purchases_import] >> join_customer_purchase >> l2_customer_purchase >> end + start >> upsert_table >> update_schema_table >> [customers_import, purchases_import] >> join_customer_purchase >> confidential_customer_purchase >> end diff --git a/examples/data-solutions/data-platform-foundations/demo/delete_table.py b/examples/data-solutions/data-platform-foundations/demo/delete_table.py index a2585a68..dc0c954b 100644 --- a/examples/data-solutions/data-platform-foundations/demo/delete_table.py +++ b/examples/data-solutions/data-platform-foundations/demo/delete_table.py @@ -34,23 +34,23 @@ from airflow.utils.task_group import TaskGroup # -------------------------------------------------------------------------------- BQ_LOCATION = os.environ.get("BQ_LOCATION") DATA_CAT_TAGS = json.loads(os.environ.get("DATA_CAT_TAGS")) -DTL_L0_PRJ = os.environ.get("DTL_L0_PRJ") -DTL_L0_BQ_DATASET = os.environ.get("DTL_L0_BQ_DATASET") -DTL_L0_GCS = os.environ.get("DTL_L0_GCS") -DTL_L1_PRJ = os.environ.get("DTL_L1_PRJ") -DTL_L1_BQ_DATASET = os.environ.get("DTL_L1_BQ_DATASET") -DTL_L1_GCS = os.environ.get("DTL_L1_GCS") -DTL_L2_PRJ = os.environ.get("DTL_L2_PRJ") -DTL_L2_BQ_DATASET = os.environ.get("DTL_L2_BQ_DATASET") -DTL_L2_GCS = os.environ.get("DTL_L2_GCS") -DTL_PLG_PRJ = os.environ.get("DTL_PLG_PRJ") -DTL_PLG_BQ_DATASET = os.environ.get("DTL_PLG_BQ_DATASET") -DTL_PLG_GCS = os.environ.get("DTL_PLG_GCS") +DWH_LAND_PRJ = os.environ.get("DWH_LAND_PRJ") +DWH_LAND_BQ_DATASET = os.environ.get("DWH_LAND_BQ_DATASET") +DWH_LAND_GCS = os.environ.get("DWH_LAND_GCS") +DWH_CURATED_PRJ = os.environ.get("DWH_CURATED_PRJ") +DWH_CURATED_BQ_DATASET = os.environ.get("DWH_CURATED_BQ_DATASET") +DWH_CURATED_GCS = os.environ.get("DWH_CURATED_GCS") +DWH_CONFIDENTIAL_PRJ = os.environ.get("DWH_CONFIDENTIAL_PRJ") +DWH_CONFIDENTIAL_BQ_DATASET = os.environ.get("DWH_CONFIDENTIAL_BQ_DATASET") +DWH_CONFIDENTIAL_GCS = os.environ.get("DWH_CONFIDENTIAL_GCS") +DWH_PLG_PRJ = os.environ.get("DWH_PLG_PRJ") +DWH_PLG_BQ_DATASET = os.environ.get("DWH_PLG_BQ_DATASET") +DWH_PLG_GCS = os.environ.get("DWH_PLG_GCS") GCP_REGION = os.environ.get("GCP_REGION") -LND_PRJ = os.environ.get("LND_PRJ") -LND_BQ = os.environ.get("LND_BQ") -LND_GCS = os.environ.get("LND_GCS") -LND_PS = os.environ.get("LND_PS") +DRP_PRJ = os.environ.get("DRP_PRJ") +DRP_BQ = os.environ.get("DRP_BQ") +DRP_GCS = os.environ.get("DRP_GCS") +DRP_PS = os.environ.get("DRP_PS") LOD_PRJ = os.environ.get("LOD_PRJ") LOD_GCS_STAGING = os.environ.get("LOD_GCS_STAGING") LOD_NET_VPC = os.environ.get("LOD_NET_VPC") @@ -121,25 +121,25 @@ with models.DAG( with TaskGroup('delete_table') as delte_table: delete_table_customers = BigQueryDeleteTableOperator( task_id="delete_table_customers", - deletion_dataset_table=DTL_L0_PRJ+"."+DTL_L0_BQ_DATASET+".customers", + deletion_dataset_table=DWH_LAND_PRJ+"."+DWH_LAND_BQ_DATASET+".customers", impersonation_chain=[TRF_SA_DF] ) delete_table_purchases = BigQueryDeleteTableOperator( task_id="delete_table_purchases", - deletion_dataset_table=DTL_L0_PRJ+"."+DTL_L0_BQ_DATASET+".purchases", + deletion_dataset_table=DWH_LAND_PRJ+"."+DWH_LAND_BQ_DATASET+".purchases", impersonation_chain=[TRF_SA_DF] ) - delete_table_customer_purchase_l1 = BigQueryDeleteTableOperator( - task_id="delete_table_customer_purchase_l1", - deletion_dataset_table=DTL_L1_PRJ+"."+DTL_L1_BQ_DATASET+".customer_purchase", + delete_table_customer_purchase_curated = BigQueryDeleteTableOperator( + task_id="delete_table_customer_purchase_curated", + deletion_dataset_table=DWH_CURATED_PRJ+"."+DWH_CURATED_BQ_DATASET+".customer_purchase", impersonation_chain=[TRF_SA_DF] ) - delete_table_customer_purchase_l2 = BigQueryDeleteTableOperator( - task_id="delete_table_customer_purchase_l2", - deletion_dataset_table=DTL_L2_PRJ+"."+DTL_L2_BQ_DATASET+".customer_purchase", + delete_table_customer_purchase_confidential = BigQueryDeleteTableOperator( + task_id="delete_table_customer_purchase_confidential", + deletion_dataset_table=DWH_CONFIDENTIAL_PRJ+"."+DWH_CONFIDENTIAL_BQ_DATASET+".customer_purchase", impersonation_chain=[TRF_SA_DF] ) diff --git a/examples/data-solutions/data-platform-foundations/images/overview_diagram.png b/examples/data-solutions/data-platform-foundations/images/overview_diagram.png index b1a0f788..642c81c2 100644 Binary files a/examples/data-solutions/data-platform-foundations/images/overview_diagram.png and b/examples/data-solutions/data-platform-foundations/images/overview_diagram.png differ diff --git a/examples/data-solutions/data-platform-foundations/outputs.tf b/examples/data-solutions/data-platform-foundations/outputs.tf index 32e98fc6..3fd81a0e 100644 --- a/examples/data-solutions/data-platform-foundations/outputs.tf +++ b/examples/data-solutions/data-platform-foundations/outputs.tf @@ -17,25 +17,25 @@ output "bigquery-datasets" { description = "BigQuery datasets." value = { - land-bq-0 = module.land-bq-0.dataset_id, - lake-0-bq-0 = module.lake-0-bq-0.dataset_id, - lake-1-bq-0 = module.lake-1-bq-0.dataset_id, - lake-2-bq-0 = module.lake-2-bq-0.dataset_id, - lake-plg-bq-0 = module.lake-plg-bq-0.dataset_id, + drop-bq-0 = module.drop-bq-0.dataset_id, + dwh-landing-bq-0 = module.dwh-lnd-bq-0.dataset_id, + dwh-curated-bq-0 = module.dwh-cur-bq-0.dataset_id, + dwh-confidential-bq-0 = module.dwh-conf-bq-0.dataset_id, + dwh-plg-bq-0 = module.dwh-plg-bq-0.dataset_id, } } output "gcs-buckets" { description = "GCS buckets." value = { - lake-0-cs-0 = module.lake-0-cs-0.name, - lake-1-cs-0 = module.lake-1-cs-0.name, - lake-2-cs-0 = module.lake-2-cs-0.name, - lake-plg-cs-0 = module.lake-plg-cs-0.name, - land-cs-0 = module.land-cs-0.name, - lod-cs-df = module.load-cs-df-0.name, - orch-cs-0 = module.orch-cs-0.name, - transf-cs-df = module.transf-cs-df-0.name, + dwh-landing-cs-0 = module.dwh-lnd-cs-0.name, + dwh-curated-cs-0 = module.dwh-cur-cs-0.name, + dwh-confidential-cs-0 = module.dwh-conf-cs-0.name, + dwh-plg-cs-0 = module.dwh-plg-cs-0.name, + drop-cs-0 = module.drop-cs-0.name, + lod-cs-df = module.load-cs-df-0.name, + orch-cs-0 = module.orch-cs-0.name, + transf-cs-df = module.transf-cs-df-0.name, } } @@ -48,26 +48,26 @@ output "projects" { description = "GCP Projects informations." value = { project_number = { - lake-0 = module.lake-0-project.number, - lake-1 = module.lake-1-project.number, - lake-2 = module.lake-2-project.number, - lake-plg = module.lake-plg-project.number, - exposure = module.exp-project.number, - landing = module.land-project.number, - load = module.load-project.number, - orchestration = module.orch-project.number, - transformation = module.transf-project.number, + dwh-landing = module.dwh-lnd-project.number, + dwh-curated = module.dwh-cur-project.number, + dwh-confidential = module.dwh-conf-project.number, + dwh-plg = module.dwh-plg-project.number, + exposure = module.exp-project.number, + dropoff = module.drop-project.number, + load = module.load-project.number, + orchestration = module.orch-project.number, + transformation = module.transf-project.number, } project_id = { - lake-0 = module.lake-0-project.project_id, - lake-1 = module.lake-1-project.project_id, - lake-2 = module.lake-2-project.project_id, - lake-plg = module.lake-plg-project.project_id, - exposure = module.exp-project.project_id, - landing = module.land-project.project_id, - load = module.load-project.project_id, - orchestration = module.orch-project.project_id, - transformation = module.transf-project.project_id, + dwh-landing = module.dwh-lnd-project.project_id, + dwh-curated = module.dwh-cur-project.project_id, + dwh-confidential = module.dwh-conf-project.project_id, + dwh-plg = module.dwh-plg-project.project_id, + exposure = module.exp-project.project_id, + dropoff = module.drop-project.project_id, + load = module.load-project.project_id, + orchestration = module.orch-project.project_id, + transformation = module.transf-project.project_id, } } } @@ -93,12 +93,12 @@ output "vpc_subnet" { output "demo_commands" { description = "Demo commands." value = { - 01 = "gsutil -i ${module.land-sa-cs-0.email} cp demo/data/*.csv gs://${module.land-cs-0.name}" + 01 = "gsutil -i ${module.drop-sa-cs-0.email} cp demo/data/*.csv gs://${module.drop-cs-0.name}" 02 = "gsutil -i ${module.orch-sa-cmp-0.email} cp demo/data/*.j* gs://${module.orch-cs-0.name}" 03 = "gsutil -i ${module.orch-sa-cmp-0.email} cp demo/*.py ${google_composer_environment.orch-cmp-0.config[0].dag_gcs_prefix}/" 04 = "Open ${google_composer_environment.orch-cmp-0.config.0.airflow_uri} and run uploaded DAG." 05 = < + Data Platform VPC-SC diagram +

+ ## How to run this stage This stage can be run in isolation by prviding the necessary variables, but it's really meant to be used as part of the FAST flow after the "foundational stages" ([`00-bootstrap`](../../00-bootstrap), [`01-resman`](../../01-resman), [`02-networking`](../../02-networking-vpn) and [`02-security`](../../02-security)). @@ -131,7 +137,7 @@ terraform apply ## Demo pipeline -The application layer is out of scope of this script. As a demo purpuse only, several Cloud Composer DAGs are provided. Demos will import data from the `landing` area to the `DataLake L2` dataset suing different features. +The application layer is out of scope of this script. As a demo purpuse only, several Cloud Composer DAGs are provided. Demos will import data from the `landing` area to the `DataWarehouse Confidential` dataset suing different features. You can find examples in the `[demo](../../../../examples/data-solutions/data-platform-foundations/demo)` folder. diff --git a/fast/stages/03-data-platform/dev/diagram.png b/fast/stages/03-data-platform/dev/diagram.png index 001c6f2a..79b46e17 100644 Binary files a/fast/stages/03-data-platform/dev/diagram.png and b/fast/stages/03-data-platform/dev/diagram.png differ diff --git a/fast/stages/03-data-platform/dev/diagram_vpcsc.png b/fast/stages/03-data-platform/dev/diagram_vpcsc.png new file mode 100644 index 00000000..2bbaad0b Binary files /dev/null and b/fast/stages/03-data-platform/dev/diagram_vpcsc.png differ diff --git a/tools/requirements.txt b/tools/requirements.txt index 3a6cb816..8c7dd54a 100644 --- a/tools/requirements.txt +++ b/tools/requirements.txt @@ -1,4 +1,5 @@ click +deepdiff marko requests yamale