diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 6998eafe..080422e7 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -62,3 +62,8 @@ jobs: id: documentation-links-fabric run: | python3 tools/check_links.py . + + - name: Check name length (fast) + id: name-length-fast + run: | + python3 tools/check_names.py --prefix-length=10 fast/stages diff --git a/examples/data-solutions/data-platform-foundations/01-landing.tf b/examples/data-solutions/data-platform-foundations/01-landing.tf index 460fc283..fa527803 100644 --- a/examples/data-solutions/data-platform-foundations/01-landing.tf +++ b/examples/data-solutions/data-platform-foundations/01-landing.tf @@ -31,16 +31,15 @@ module "land-project" { "roles/bigquery.dataEditor", "roles/pubsub.editor", "roles/storage.admin", - "roles/storage.objectViewer", ] } iam = { - "roles/bigquery.dataEditor" = [module.land-sa-bq-0.iam_email] - "roles/bigquery.dataViewer" = local.land_orch_service_accounts - "roles/bigquery.jobUser" = [module.orch-sa-cmp-0.iam_email] - "roles/bigquery.user" = [module.load-sa-df-0.iam_email] - "roles/pubsub.publisher" = [module.land-sa-ps-0.iam_email] - "roles/pubsub.subscriber" = local.land_orch_service_accounts + "roles/bigquery.dataEditor" = [module.land-sa-bq-0.iam_email] + "roles/bigquery.user" = [module.load-sa-df-0.iam_email] + "roles/pubsub.publisher" = [module.land-sa-ps-0.iam_email] + "roles/pubsub.subscriber" = concat( + local.land_orch_service_accounts, [module.load-sa-df-0.iam_email] + ) "roles/storage.objectAdmin" = [module.load-sa-df-0.iam_email] "roles/storage.objectCreator" = [module.land-sa-cs-0.iam_email] "roles/storage.objectViewer" = [module.orch-sa-cmp-0.iam_email] @@ -65,12 +64,11 @@ module "land-project" { # Cloud Storage module "land-sa-cs-0" { - source = "../../../modules/iam-service-account" - project_id = module.land-project.project_id - prefix = var.prefix - name = "lnd-cs-0" - # TODO: descriptive name - display_name = "TODO" + source = "../../../modules/iam-service-account" + project_id = module.land-project.project_id + prefix = var.prefix + name = "lnd-cs-0" + display_name = "Data platform GCS landing service account." iam = { "roles/iam.serviceAccountTokenCreator" = [ local.groups_iam.data-engineers @@ -83,8 +81,8 @@ module "land-cs-0" { project_id = module.land-project.project_id prefix = var.prefix name = "lnd-cs-0" - location = var.region - storage_class = "REGIONAL" + location = var.location + storage_class = "MULTI_REGIONAL" encryption_key = try(local.service_encryption_keys.storage, null) force_destroy = var.data_force_destroy # retention_policy = { @@ -96,12 +94,11 @@ module "land-cs-0" { # PubSub module "land-sa-ps-0" { - source = "../../../modules/iam-service-account" - project_id = module.land-project.project_id - prefix = var.prefix - name = "lnd-ps-0" - # TODO: descriptive name - display_name = "TODO" + source = "../../../modules/iam-service-account" + project_id = module.land-project.project_id + prefix = var.prefix + name = "lnd-ps-0" + display_name = "Data platform PubSub landing service account" iam = { "roles/iam.serviceAccountTokenCreator" = [ local.groups_iam.data-engineers @@ -119,12 +116,11 @@ module "land-ps-0" { # BigQuery module "land-sa-bq-0" { - source = "../../../modules/iam-service-account" - project_id = module.land-project.project_id - prefix = var.prefix - name = "lnd-bq-0" - # TODO: descriptive name - display_name = "TODO" + source = "../../../modules/iam-service-account" + project_id = module.land-project.project_id + prefix = var.prefix + name = "lnd-bq-0" + display_name = "Data platform BigQuery landing service account" iam = { "roles/iam.serviceAccountTokenCreator" = [local.groups_iam.data-engineers] } @@ -134,6 +130,6 @@ module "land-bq-0" { source = "../../../modules/bigquery-dataset" project_id = module.land-project.project_id id = "${replace(var.prefix, "-", "_")}lnd_bq_0" - location = var.region + location = var.location encryption_key = try(local.service_encryption_keys.bq, null) } diff --git a/examples/data-solutions/data-platform-foundations/02-load.tf b/examples/data-solutions/data-platform-foundations/02-load.tf index 6696f4d3..7d7392f9 100644 --- a/examples/data-solutions/data-platform-foundations/02-load.tf +++ b/examples/data-solutions/data-platform-foundations/02-load.tf @@ -54,13 +54,6 @@ module "load-project" { ] "roles/dataflow.worker" = [module.load-sa-df-0.iam_email] "roles/storage.objectAdmin" = local.load_service_accounts - # TODO: these are needed on the shared VPC? - # "roles/compute.serviceAgent" = [ - # "serviceAccount:${module.load-project.service_accounts.robots.compute}" - # ] - # "roles/dataflow.serviceAgent" = [ - # "serviceAccount:${module.load-project.service_accounts.robots.dataflow}" - # ] } services = concat(var.project_services, [ "bigquery.googleapis.com", @@ -84,19 +77,15 @@ module "load-project" { attach = true host_project = local.shared_vpc_project service_identity_iam = {} - # service_identity_iam = { - # "compute.networkUser" = ["dataflow"] - # } } } module "load-sa-df-0" { - source = "../../../modules/iam-service-account" - project_id = module.load-project.project_id - prefix = var.prefix - name = "load-df-0" - # TODO: descriptive name - display_name = "TODO" + source = "../../../modules/iam-service-account" + project_id = module.load-project.project_id + prefix = var.prefix + name = "load-df-0" + display_name = "Data platform Dataflow load service account" iam = { "roles/iam.serviceAccountTokenCreator" = [local.groups_iam.data-engineers] "roles/iam.serviceAccountUser" = [module.orch-sa-cmp-0.iam_email] @@ -108,8 +97,8 @@ module "load-cs-df-0" { project_id = module.load-project.project_id prefix = var.prefix name = "load-cs-0" - storage_class = "REGIONAL" - location = var.region + location = var.location + storage_class = "MULTI_REGIONAL" encryption_key = try(local.service_encryption_keys.storage, null) } diff --git a/examples/data-solutions/data-platform-foundations/03-composer.tf b/examples/data-solutions/data-platform-foundations/03-composer.tf index eec72fe9..87c5f4b5 100644 --- a/examples/data-solutions/data-platform-foundations/03-composer.tf +++ b/examples/data-solutions/data-platform-foundations/03-composer.tf @@ -15,12 +15,11 @@ # tfdoc:file:description Orchestration Cloud Composer definition. module "orch-sa-cmp-0" { - source = "../../../modules/iam-service-account" - project_id = module.orch-project.project_id - prefix = var.prefix - name = "orc-cmp-0" - # TODO: descriptive name - display_name = "TODO" + source = "../../../modules/iam-service-account" + project_id = module.orch-project.project_id + prefix = var.prefix + name = "orc-cmp-0" + display_name = "Data platform Composer service account" iam = { "roles/iam.serviceAccountTokenCreator" = [local.groups_iam.data-engineers] "roles/iam.serviceAccountUser" = [module.orch-sa-cmp-0.iam_email] @@ -54,6 +53,7 @@ resource "google_composer_environment" "orch-cmp-0" { image_version = var.composer_config.airflow_version env_variables = merge( var.composer_config.env_variables, { + BQ_LOCATION = var.location DTL_L0_PRJ = module.lake-0-project.project_id DTL_L0_BQ_DATASET = module.lake-0-bq-0.dataset_id DTL_L0_GCS = module.lake-0-cs-0.url diff --git a/examples/data-solutions/data-platform-foundations/03-orchestration.tf b/examples/data-solutions/data-platform-foundations/03-orchestration.tf index a2c53940..d8b9254e 100644 --- a/examples/data-solutions/data-platform-foundations/03-orchestration.tf +++ b/examples/data-solutions/data-platform-foundations/03-orchestration.tf @@ -42,21 +42,16 @@ module "orch-project" { "roles/composer.environmentAndStorageObjectAdmin", "roles/iap.httpsResourceAccessor", "roles/iam.serviceAccountUser", - "roles/compute.networkUser", "roles/storage.objectAdmin", "roles/storage.admin", - "roles/compute.networkUser" ] } iam = { "roles/bigquery.dataEditor" = [ module.load-sa-df-0.iam_email, module.transf-sa-df-0.iam_email, - module.orch-sa-cmp-0.iam_email, ] "roles/bigquery.jobUser" = [ - module.load-sa-df-0.iam_email, - module.transf-sa-df-0.iam_email, module.orch-sa-cmp-0.iam_email, ] "roles/composer.worker" = [ @@ -66,14 +61,10 @@ module "orch-project" { module.orch-sa-cmp-0.iam_email ] "roles/storage.objectAdmin" = [ - module.load-sa-df-0.iam_email, module.orch-sa-cmp-0.iam_email, "serviceAccount:${module.orch-project.service_accounts.robots.composer}", ] - "roles/storage.admin" = [ - module.load-sa-df-0.iam_email, - module.transf-sa-df-0.iam_email - ] + "roles/storage.objectViewer" = [module.load-sa-df-0.iam_email] } oslogin = false policy_boolean = { @@ -104,17 +95,6 @@ module "orch-project" { attach = true host_project = local.shared_vpc_project service_identity_iam = {} - # service_identity_iam = { - # "roles/composer.sharedVpcAgent" = [ - # "composer" - # ] - # "roles/compute.networkUser" = [ - # "cloudservices", "container-engine", "dataflow" - # ] - # "roles/container.hostServiceAgentUser" = [ - # "container-engine" - # ] - # } } } @@ -125,8 +105,8 @@ module "orch-cs-0" { project_id = module.orch-project.project_id prefix = var.prefix name = "orc-cs-0" - location = var.region - storage_class = "REGIONAL" + location = var.location + storage_class = "MULTI_REGIONAL" encryption_key = try(local.service_encryption_keys.storage, null) } diff --git a/examples/data-solutions/data-platform-foundations/04-transformation.tf b/examples/data-solutions/data-platform-foundations/04-transformation.tf index 2cc7f420..66780c76 100644 --- a/examples/data-solutions/data-platform-foundations/04-transformation.tf +++ b/examples/data-solutions/data-platform-foundations/04-transformation.tf @@ -40,9 +40,6 @@ module "transf-project" { ] } iam = { - "roles/bigquery.dataViewer" = [ - module.orch-sa-cmp-0.iam_email - ] "roles/bigquery.jobUser" = [ module.transf-sa-bq-0.iam_email, ] @@ -54,7 +51,6 @@ module "transf-project" { ] "roles/storage.objectAdmin" = [ module.transf-sa-df-0.iam_email, - module.orch-sa-cmp-0.iam_email, "serviceAccount:${module.transf-project.service_accounts.robots.dataflow}" ] } @@ -85,12 +81,11 @@ module "transf-project" { # Cloud Storage module "transf-sa-df-0" { - source = "../../../modules/iam-service-account" - project_id = module.transf-project.project_id - prefix = var.prefix - name = "trf-df-0" - # TODO: descriptive name - display_name = "TODO" + source = "../../../modules/iam-service-account" + project_id = module.transf-project.project_id + prefix = var.prefix + name = "trf-df-0" + display_name = "Data platform Dataflow transformation service account" iam = { "roles/iam.serviceAccountTokenCreator" = [ local.groups_iam.data-engineers, @@ -107,20 +102,19 @@ module "transf-cs-df-0" { project_id = module.transf-project.project_id prefix = var.prefix name = "trf-cs-0" - location = var.region - storage_class = "REGIONAL" + location = var.location + storage_class = "MULTI_REGIONAL" encryption_key = try(local.service_encryption_keys.storage, null) } # BigQuery module "transf-sa-bq-0" { - source = "../../../modules/iam-service-account" - project_id = module.transf-project.project_id - prefix = var.prefix - name = "trf-bq-0" - # TODO: descriptive name - display_name = "TODO" + source = "../../../modules/iam-service-account" + project_id = module.transf-project.project_id + prefix = var.prefix + name = "trf-bq-0" + display_name = "Data platform BigQuery transformation service account" iam = { "roles/iam.serviceAccountTokenCreator" = [ local.groups_iam.data-engineers, diff --git a/examples/data-solutions/data-platform-foundations/05-datalake.tf b/examples/data-solutions/data-platform-foundations/05-datalake.tf index 71ad0447..86f5edf6 100644 --- a/examples/data-solutions/data-platform-foundations/05-datalake.tf +++ b/examples/data-solutions/data-platform-foundations/05-datalake.tf @@ -29,31 +29,46 @@ locals { "roles/storage.objectViewer", ] } - lake_iam = { + lake_plg_group_iam = { + (local.groups.data-engineers) = [ + "roles/bigquery.dataEditor", + "roles/storage.admin", + ], + (local.groups.data-analysts) = [ + "roles/bigquery.dataEditor", + "roles/bigquery.jobUser", + "roles/bigquery.user", + "roles/datacatalog.viewer", + "roles/datacatalog.tagTemplateViewer", + "roles/storage.objectAdmin", + ] + } + lake_0_iam = { "roles/bigquery.dataEditor" = [ module.load-sa-df-0.iam_email, module.transf-sa-df-0.iam_email, module.transf-sa-bq-0.iam_email, - module.orch-sa-cmp-0.iam_email, ] "roles/bigquery.jobUser" = [ module.load-sa-df-0.iam_email, - module.transf-sa-df-0.iam_email, - ] - "roles/storage.admin" = [ - module.load-sa-df-0.iam_email, - module.transf-sa-df-0.iam_email, ] "roles/storage.objectCreator" = [ module.load-sa-df-0.iam_email, + ] + } + lake_iam = { + "roles/bigquery.dataEditor" = [ module.transf-sa-df-0.iam_email, module.transf-sa-bq-0.iam_email, - module.orch-sa-cmp-0.iam_email, + ] + "roles/bigquery.jobUser" = [ + module.transf-sa-bq-0.iam_email, + ] + "roles/storage.objectCreator" = [ + module.transf-sa-df-0.iam_email, ] "roles/storage.objectViewer" = [ module.transf-sa-df-0.iam_email, - module.transf-sa-bq-0.iam_email, - module.orch-sa-cmp-0.iam_email, ] } lake_services = concat(var.project_services, [ @@ -79,7 +94,7 @@ module "lake-0-project" { prefix = var.prefix name = "dtl-0" group_iam = local.lake_group_iam - iam = local.lake_iam + iam = local.lake_0_iam services = local.lake_services service_encryption_key_ids = { bq = [try(local.service_encryption_keys.bq, null)] @@ -123,8 +138,8 @@ module "lake-plg-project" { billing_account = var.billing_account_id prefix = var.prefix name = "dtl-plg" - group_iam = local.lake_group_iam - iam = local.lake_iam + group_iam = local.lake_plg_group_iam + iam = {} services = local.lake_services service_encryption_key_ids = { bq = [try(local.service_encryption_keys.bq, null)] @@ -138,7 +153,7 @@ module "lake-0-bq-0" { source = "../../../modules/bigquery-dataset" project_id = module.lake-0-project.project_id id = "${replace(var.prefix, "-", "_")}_dtl_0_bq_0" - location = var.region + location = var.location encryption_key = try(local.service_encryption_keys.bq, null) } @@ -146,7 +161,7 @@ module "lake-1-bq-0" { source = "../../../modules/bigquery-dataset" project_id = module.lake-1-project.project_id id = "${replace(var.prefix, "-", "_")}_dtl_1_bq_0" - location = var.region + location = var.location encryption_key = try(local.service_encryption_keys.bq, null) } @@ -154,7 +169,7 @@ module "lake-2-bq-0" { source = "../../../modules/bigquery-dataset" project_id = module.lake-2-project.project_id id = "${replace(var.prefix, "-", "_")}_dtl_2_bq_0" - location = var.region + location = var.location encryption_key = try(local.service_encryption_keys.bq, null) } @@ -162,7 +177,7 @@ module "lake-plg-bq-0" { source = "../../../modules/bigquery-dataset" project_id = module.lake-plg-project.project_id id = "${replace(var.prefix, "-", "_")}_dtl_plg_bq_0" - location = var.region + location = var.location encryption_key = try(local.service_encryption_keys.bq, null) } @@ -173,8 +188,8 @@ module "lake-0-cs-0" { project_id = module.lake-0-project.project_id prefix = var.prefix name = "dtl-0-cs-0" - location = var.region - storage_class = "REGIONAL" + location = var.location + storage_class = "MULTI_REGIONAL" encryption_key = try(local.service_encryption_keys.storage, null) force_destroy = var.data_force_destroy } @@ -184,8 +199,8 @@ module "lake-1-cs-0" { project_id = module.lake-1-project.project_id prefix = var.prefix name = "dtl-1-cs-0" - location = var.region - storage_class = "REGIONAL" + location = var.location + storage_class = "MULTI_REGIONAL" encryption_key = try(local.service_encryption_keys.storage, null) force_destroy = var.data_force_destroy } @@ -195,8 +210,8 @@ module "lake-2-cs-0" { project_id = module.lake-2-project.project_id prefix = var.prefix name = "dtl-2-cs-0" - location = var.region - storage_class = "REGIONAL" + location = var.location + storage_class = "MULTI_REGIONAL" encryption_key = try(local.service_encryption_keys.storage, null) force_destroy = var.data_force_destroy } @@ -206,8 +221,8 @@ module "lake-plg-cs-0" { project_id = module.lake-plg-project.project_id prefix = var.prefix name = "dtl-plg-cs-0" - location = var.region - storage_class = "REGIONAL" + location = var.location + storage_class = "MULTI_REGIONAL" encryption_key = try(local.service_encryption_keys.storage, null) force_destroy = var.data_force_destroy } diff --git a/examples/data-solutions/data-platform-foundations/06-common.tf b/examples/data-solutions/data-platform-foundations/06-common.tf index 126f947e..59279d23 100644 --- a/examples/data-solutions/data-platform-foundations/06-common.tf +++ b/examples/data-solutions/data-platform-foundations/06-common.tf @@ -46,10 +46,10 @@ module "common-project" { # module "cmn-kms-0" { # source = "../../../modules/kms" -# project_id = module.cmn-prj.project_id +# project_id = module.common-project.project_id # keyring = { # name = "${var.prefix}-kr-global", -# location = var.location_config.region +# location = "global" # } # keys = { # pubsub = null @@ -58,10 +58,10 @@ module "common-project" { # module "cmn-kms-1" { # source = "../../../modules/kms" -# project_id = module.cmn-prj.project_id +# project_id = module.common-project.project_id # keyring = { # name = "${var.prefix}-kr-mregional", -# location = var.location_config.region +# location = var.location # } # keys = { # bq = null @@ -74,7 +74,7 @@ module "common-project" { # project_id = module.cmn-prj.project_id # keyring = { # name = "${var.prefix}-kr-regional", -# location = var.location_config.region +# location = var.region # } # keys = { # composer = null diff --git a/examples/data-solutions/data-platform-foundations/IAM.md b/examples/data-solutions/data-platform-foundations/IAM.md new file mode 100644 index 00000000..aed1c405 --- /dev/null +++ b/examples/data-solutions/data-platform-foundations/IAM.md @@ -0,0 +1,84 @@ +# IAM bindings reference + +Legend: + additive, conditional. + +## Project cmn + +| members | roles | +|---|---| +|gcp-data-engineers
group|[roles/dlp.estimatesAdmin](https://cloud.google.com/iam/docs/understanding-roles#dlp.estimatesAdmin)
[roles/dlp.reader](https://cloud.google.com/iam/docs/understanding-roles#dlp.reader)
[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) | +|gcp-data-security
group|[roles/dlp.admin](https://cloud.google.com/iam/docs/understanding-roles#dlp.admin) | +|load-df-0
serviceAccount|[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) | +|trf-df-0
serviceAccount|[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) | + +## Project dtl-0 + +| members | roles | +|---|---| +|gcp-data-analysts
group|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user)
[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer)
[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) | +|gcp-data-engineers
group|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) | +|load-df-0
serviceAccount|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator) | +|trf-bq-0
serviceAccount|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) | +|trf-df-0
serviceAccount|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) | + +## Project dtl-1 + +| members | roles | +|---|---| +|gcp-data-analysts
group|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user)
[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer)
[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) | +|gcp-data-engineers
group|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) | +|trf-bq-0
serviceAccount|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) | +|trf-df-0
serviceAccount|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) | + +## Project dtl-2 + +| members | roles | +|---|---| +|gcp-data-analysts
group|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user)
[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer)
[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) | +|gcp-data-engineers
group|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) | +|trf-bq-0
serviceAccount|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) | +|trf-df-0
serviceAccount|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) | + +## Project dtl-plg + +| members | roles | +|---|---| +|gcp-data-analysts
group|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user)
[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer)
[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer)
[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) | +|gcp-data-engineers
group|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) | + +## Project lnd + +| members | roles | +|---|---| +|gcp-data-engineers
group|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/pubsub.editor](https://cloud.google.com/iam/docs/understanding-roles#pubsub.editor)
[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) | +|lnd-bq-0
serviceAccount|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) | +|lnd-cs-0
serviceAccount|[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator) | +|lnd-ps-0
serviceAccount|[roles/pubsub.publisher](https://cloud.google.com/iam/docs/understanding-roles#pubsub.publisher) | +|load-df-0
serviceAccount|[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user)
[roles/pubsub.subscriber](https://cloud.google.com/iam/docs/understanding-roles#pubsub.subscriber)
[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin)
[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) | +|orc-cmp-0
serviceAccount|[roles/pubsub.subscriber](https://cloud.google.com/iam/docs/understanding-roles#pubsub.subscriber)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) | + +## Project lod + +| members | roles | +|---|---| +|gcp-data-engineers
group|[roles/compute.viewer](https://cloud.google.com/iam/docs/understanding-roles#compute.viewer)
[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin)
[roles/dataflow.developer](https://cloud.google.com/iam/docs/understanding-roles#dataflow.developer)
[roles/viewer](https://cloud.google.com/iam/docs/understanding-roles#viewer) | +|load-df-0
serviceAccount|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin)
[roles/dataflow.worker](https://cloud.google.com/iam/docs/understanding-roles#dataflow.worker)
[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) | +|orc-cmp-0
serviceAccount|[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) | + +## Project orc + +| members | roles | +|---|---| +|gcp-data-engineers
group|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/cloudbuild.builds.editor](https://cloud.google.com/iam/docs/understanding-roles#cloudbuild.builds.editor)
[roles/composer.admin](https://cloud.google.com/iam/docs/understanding-roles#composer.admin)
[roles/composer.environmentAndStorageObjectAdmin](https://cloud.google.com/iam/docs/understanding-roles#composer.environmentAndStorageObjectAdmin)
[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser)
[roles/iam.serviceAccountUser](https://cloud.google.com/iam/docs/understanding-roles#iam.serviceAccountUser)
[roles/iap.httpsResourceAccessor](https://cloud.google.com/iam/docs/understanding-roles#iap.httpsResourceAccessor)
[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin)
[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) | +|load-df-0
serviceAccount|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) | +|orc-cmp-0
serviceAccount|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/composer.worker](https://cloud.google.com/iam/docs/understanding-roles#composer.worker)
[roles/iam.serviceAccountUser](https://cloud.google.com/iam/docs/understanding-roles#iam.serviceAccountUser)
[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) | +|trf-df-0
serviceAccount|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) | + +## Project trf + +| members | roles | +|---|---| +|gcp-data-engineers
group|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) | +|orc-cmp-0
serviceAccount|[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) | +|trf-bq-0
serviceAccount|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) | +|trf-df-0
serviceAccount|[roles/dataflow.worker](https://cloud.google.com/iam/docs/understanding-roles#dataflow.worker)
[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) | diff --git a/examples/data-solutions/data-platform-foundations/README.md b/examples/data-solutions/data-platform-foundations/README.md index 5da95db1..90ed73fe 100644 --- a/examples/data-solutions/data-platform-foundations/README.md +++ b/examples/data-solutions/data-platform-foundations/README.md @@ -1,6 +1,6 @@ # Data Platform -This module implements an opinionated Data Platform Architecture that creates and sets up projects and related resources, to be used to create your end to end data environment. +This module implements an opinionated Data Platform Architecture that creates and setup projects and related resources that compose an end-to-end data environment. The code is intentionally simple, as it's intended to provide a generic initial setup and then allow easy customizations to complete the implementation of the intended design. @@ -8,7 +8,7 @@ The following diagram is a high-level reference of the resources created and man ![Data Platform architecture overview](./images/overview_diagram.png "Data Platform architecture overview") -A demo pipeline is also part of this example: it can be built and run on top of the foundational infrastructure to verify or test the setup quickly. +A demo Airflow pipeline is also part of this example: it can be built and run on top of the foundational infrastructure to verify or test the setup quickly. ## Design overview and choices @@ -17,11 +17,11 @@ Despite its simplicity, this stage implements the basics of a design that we've The approach adapts to different high-level requirements: - boundaries for each step -- clear and defined actors +- clearly defined actors - least privilege principle - rely on service account impersonation -The code in this example doesn't address Organization level configuration (Organization policy, VPC-SC, centralized logs). We expect to address those aspects on stages external to this script. +The code in this example doesn't address Organization-level configurations (Organization policy, VPC-SC, centralized logs). We expect those elements to be managed by automation stages external to this script like those in [FAST](../../../fast). ### Project structure @@ -34,89 +34,84 @@ The Data Platform is designed to rely on several projects, one project per data - transformation - exposure -This separation into projects allows adhering the least-privilege principle relying on project-level roles. +This separation into projects allows adhering to the least-privilege principle by using project-level roles. The script will create the following projects: -- **Landing** This project is intended to store data temporarily. Data are pushed to Cloud Storage, BigQuery, or Cloud PubSub. Resource configured with 3-months lifecycle policy. -- **Load** This project is intended to load data from `landing` to the `data lake`. The load is made with minimal to zero transformation logic (mainly `cast`). This stage can anonymization/tokenization Personally Identifiable Information (PII). Alternatively, it can be done in the transformation stage depending on your requirements. The use of [Cloud Dataflow templates](https://cloud.google.com/dataflow/docs/concepts/dataflow-templates) is recommended. -- **Data Lake** projects where data are stored. itìs composed of 3 layers that progressively process and define data: - - **L0 - Raw data** Structured Data, stored in the adequate format: structured data stored in BigQuery, unstructured data stored on Cloud Storage with additional metadata stored in BigQuery (for example pictures stored in Cloud Storage and analysis of the images for Cloud Vision API stored in BigQuery). +- **Landing** Used to store temporary data. Data is pushed to Cloud Storage, BigQuery, or Cloud PubSub. Resources are configured with a customizable lifecycle policy. +- **Load** Used to load data from landing to data lake. The load is made with minimal to zero transformation logic (mainly `cast`). Anonymization or tokenization of Personally Identifiable Information (PII) can be implemented here or in the transformation stage, depending on your requirements. The use of [Cloud Dataflow templates](https://cloud.google.com/dataflow/docs/concepts/dataflow-templates) is recommended. +- **Data Lake** Several projects distributed across 3 separate layers, to host progressively processed and refined data: + - **L0 - Raw data** Structured Data, stored in relevant formats: structured data stored in BigQuery, unstructured data stored on Cloud Storage with additional metadata stored in BigQuery (for example pictures stored in Cloud Storage and analysis of the images for Cloud Vision API stored in BigQuery). - **L1 - Cleansed, aggregated and standardized data** - **L2 - Curated layer** - - **Playground** Store temporary tables that Data Analyst may use to perform R&D on data available on other Data Lake layers -- **Orchestration** This project is intended to host Cloud Composer. Cloud Composer will orchestrate all tasks to move your data on its journey. -- **Transformation** This project is used to move data between layers of the Data Lake. We strongly suggest relying on BigQuery engine to perform transformations. If BigQuery doesn't have the feature needed to perform your transformation you recommend using Cloud Dataflow together with [Cloud Dataflow templates](https://cloud.google.com/dataflow/docs/concepts/dataflow-templates). This stage can optionally be used to anonymiza/tokenize PII. -- **Exposure** This project is intended to host resources to share your processed data with external systems your data. For the porpuse of this example we leace this project empty. Depending on the access pattern, data can be presented on Cloud SQL, BigQuery, or Bigtable. For BigQuery data, we strongly suggest relying on [Authorized views](https://cloud.google.com/bigquery/docs/authorized-views). + - **Playground** Temporary tables that Data Analyst may use to perform R&D on data available in other Data Lake layers. +- **Orchestration** Used to host Cloud Composer, which orchestrates all tasks that move data across layers. +- **Transformation** Used to move data between Data Lake layers. We strongly suggest relying on BigQuery Engine to perform the transformations. If BigQuery doesn't have the features needed to perform your transformations, you can use Cloud Dataflow with [Cloud Dataflow templates](https://cloud.google.com/dataflow/docs/concepts/dataflow-templates). This stage can also optionally anonymize or tokenize PII. +- **Exposure** Used to host resources that share processed data with external systems. Depending on the access pattern, data can be presented via Cloud SQL, BigQuery, or Bigtable. For BigQuery data, we strongly suggest relying on [Authorized views](https://cloud.google.com/bigquery/docs/authorized-views). ### Roles -We assign roles on resources at project level setting the appropriate role to groups. We recommend not adding human users directly to the resource-access groups with IAM permissions to access data. +We assign roles on resources at the project level, granting the appropriate roles via groups (humans) and service accounts (services and applications) according to best practices. ### Service accounts -Service account creation follows the least privilege principle, performing a single task which requires access to a defined set of resources. In the table below you can find an high level overview on roles for each service account on each data layer. For semplicy `READ` or `WRITE` roles are used, for detailed roles please refer to the code. - +Service account creation follows the least privilege principle, performing a single task which requires access to a defined set of resources. The table below shows a high level overview of roles for each service account on each data layer, using `READ` or `WRITE` access patterns for simplicity. For detailed roles please refer to the code. |Service Account|Landing|DataLake L0|DataLake L1|DataLake L2| |-|:-:|:-:|:-:|:-:| -|landing-sa|WRITE|-|-|-| -|load-sa|READ|READ/WRITE|-|-| -|transformation-sa|-|READ/WRITE|READ/WRITE|READ/WRITE| -|orchestration-sa|-|-|-|-| +|`landing-sa`|`WRITE`|-|-|-| +|`load-sa`|`READ`|`READ`/`WRITE`|-|-| +|`transformation-sa`|-|`READ`/`WRITE`|`READ`/`WRITE`|`READ`/`WRITE`| +|`orchestration-sa`|-|-|-|-| -- Each service account perform a single task having access to the minimum number of resources (example: the Cloud Dataflow Service Account has access to the Landing project and the Data Lake L0 project) -- Each Service Account has the least privilege on each project. +A full reference of IAM roles managed by the Data Platform [is available here](./IAM.md). -#### Service Account Keys - -The use of SAK within a data pipeline incurs several security risks, as these credentials, that could be leaked without oversight or control. This example relies on Service Account Impersonation to avoid the creation of private keys. +Using of service account keys within a data pipeline exposes to several security risks deriving from a credentials leak. This example shows how to leverage impersonation to avoid the need of creating keys. ### User groups -User groups are important. They provide a stable frame of reference that allows decoupling the final set of permissions for each group, from the stage where entities and resources are created and their IAM bindings defined. +User groups provide a stable frame of reference that allows decoupling the final set of permissions from the stage where entities and resources are created, and their IAM bindings defined. We use three groups to control access to resources: - *Data Engineers* They handle and run the Data Hub, with read access to all resources in order to troubleshoot possible issues with pipelines. This team can also impersonate any service account. -- *Data Analyst*. They perform analysis on datasets, with read access to the data lake L2 project, and BigQuery READ/WRITE access to the playground project. +- *Data Analysts*. They perform analysis on datasets, with read access to the data lake L2 project, and BigQuery READ/WRITE access to the playground project. - *Data Security*:. They handle security configurations related to the Data Hub. This team has admin access to the common project to configure Cloud DLP templates or Data Catalog policy tags. -In the table below you can find an high level overview on roles for each group on each project. For semplicy `READ`, `WRITE` and `ADMIN` roles are used, for detailed roles please refer to the code. +The table below shows a high level overview of roles for each group on each project, using `READ`, `WRITE` and `ADMIN` access patterns for simplicity. For detailed roles please refer to the code. |Group|Landing|Load|Transformation|Data Lake L0|Data Lake L1|Data Lake L2|Data Lake Playground|Orchestration|Common| |-|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:| -|Data Engineers|ADMIN|ADMIN|ADMIN|ADMIN|ADMIN|ADMIN|ADMIN|ADMIN|ADMIN| -|Data Analyst|-|-|-|-|-|READ|READ/WRITE|-|-| -|Data Security|-|-|-|-|-|-|-|-|ADMIN| +|Data Engineers|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`| +|Data Analysts|-|-|-|-|-|`READ`|`READ`/`WRITE`|-|-| +|Data Security|-|-|-|-|-|-|-|-|`ADMIN`| -### Groups - -We use thress groups based on the required access: - -- *Data Engineers*: the group that handles and runs the Data Hub. The group has Read access to all resources to troubleshoot possible issues with the pipeline. The team also can impersonate all service accounts. Default value: `gcp-data-engineers@DOMAIN.COM`. -- *Data Analyst*: the group that performs analysis on the dataset. The group has Read access to the Data Lake L2 project and BigQuery READ/WRITE access to the `playground` project. Default value: `gcp-data-analyst@DOMAIN.COM` -- *Data Security*: the group handling security configurations related to the Data Hub. Default name: `gcp-data-security@DOMAIN.com` +You can configure groups via the `groups` variable. ### Virtual Private Cloud (VPC) design -The Data Platform accepts as input an existing [Shared-VPC](https://cloud.google.com/vpc/docs/shared-vpc) to run resources. You can configure subnets for data resources by specifying the link to the subnet in the `network_config` variable. You may want to configure a shared-VPC to host your resources if your pipelines may need to reach on-premise resources. +As is often the case in real-world configurations, this example accepts as input an existing [Shared-VPC](https://cloud.google.com/vpc/docs/shared-vpc) via the `network_config` variable. Make sure that the GKE API (`container.googleapis.com`) is enabled in the VPC host project. -If `network_config` variable is not provided, the script will create a VPC on each project that requires a VPC: *load*, *transformation*, and *orchestration* projects with the default configuration. +If the `network_config` variable is not provided, one VPC will be created in each project that supports network resources (load, transformation and orchestration). -### IP ranges, subnetting +### IP ranges and subnetting -To deploy your Data Platform you need the following ranges: +To deploy this example with self-managed VPCs you need the following ranges: -- Load project VPC for Cloud Dataflow workers. Range: '/24'. -- Transformation VPC for Cloud Dataflow workers. Range: '/24'. -- Orchestration VPC for Cloud Composer: - - Cloud SQL. Range: '/24' - - GKE Master. Range: '/28' - - Web Server: Range: '/28' - - Secondary IP ranges. Pods range: '/22', Services range: '/24' +- one /24 for the load project VPC subnet used for Cloud Dataflow workers +- one /24 for the transformation VPC subnet used for Cloud Dataflow workers +- one /24 range for the orchestration VPC subnet used for Composer workers +- one /22 and one /24 ranges for the secondary ranges associated with the orchestration VPC subnet -### Resource naming convention +If you are using Shared VPC, you need one subnet with one /22 and one /24 secondary range defined for Composer pods and services. + +In both VPC scenarios, you also need these ranges for Composer: + +- one /24 for Cloud SQL +- one /28 for the GKE control plane +- one /28 for the web server + +### Resource naming conventions Resources follow the naming convention described below. @@ -126,34 +121,39 @@ Resources follow the naming convention described below. ### Encryption -We suggest a centralized approach to key management, where Organization Security is the only team that can access encryption material, and keyrings and keys are managed in a project external to the DP. +We suggest a centralized approach to key management, where Organization Security is the only team that can access encryption material, and keyrings and keys are managed in a project external to the Data Platform. ![Centralized Cloud Key Management high-level diagram](./images/kms_diagram.png "Centralized Cloud Key Management high-level diagram") -To configure the use of Cloud Key Management on resources you have to specify the key URL on the 'service_encryption_keys'. Keys location should match the resource location. Example: +To configure the use of Cloud KMS on resources, you have to specify the key id on the `service_encryption_keys` variable. Key locations should match resource locations. Example: ```hcl service_encryption_keys = { - bq = "KEY_URL_MULTIREGIONAL" - composer = "KEY_URL_REGIONAL" - dataflow = "KEY_URL_REGIONAL" - storage = "KEY_URL_MULTIREGIONAL" - pubsub = "KEY_URL_MULTIREGIONAL" + bq = "KEY_URL_MULTIREGIONAL" + composer = "KEY_URL_REGIONAL" + dataflow = "KEY_URL_REGIONAL" + storage = "KEY_URL_MULTIREGIONAL" + pubsub = "KEY_URL_MULTIREGIONAL" +} ``` -We consider this step optional, it depends on customer policy and security best practices. +This step is optional and depends on customer policies and security best practices. ## Data Anonymization -We suggest using Cloud Data Loss Prevention to identify/mask/tokenize your confidential data. Implementing the Data Loss Prevention strategy is out of scope for this example. We enable the service in 2 different projects to implement the data loss prevention strategy. We expect you will use [Cloud Data Loss Prevention templates](https://cloud.google.com/dlp/docs/concepts-templates) in one of the following ways: +We suggest using Cloud Data Loss Prevention to identify/mask/tokenize your confidential data. -- During the ingestion phase, from Dataflow -- During the transformation phase, from [BigQuery](https://cloud.google.com/bigquery/docs/scan-with-dlp) or [Cloud Dataflow](https://cloud.google.com/architecture/running-automated-dataflow-pipeline-de-identify-pii-dataset) +While implementing a Data Loss Prevention strategy is out of scope for this example, we enable the service in two different projects so that [Cloud Data Loss Prevention templates](https://cloud.google.com/dlp/docs/concepts-templates) can be configured in one of two ways: -We implemented a centralized model for Cloud Data Loss Prevention resources. Templates will be stored in the security project: +- during the ingestion phase, from Dataflow +- during the transformation phase, from [BigQuery](https://cloud.google.com/bigquery/docs/scan-with-dlp) or [Cloud Dataflow](https://cloud.google.com/architecture/running-automated-dataflow-pipeline-de-identify-pii-dataset) + +Cloud Data Loss Prevention resources and templates should be stored in the security project: ![Centralized Cloud Data Loss Prevention high-level diagram](./images/dlp_diagram.png "Centralized Cloud Data Loss Prevention high-level diagram") +You can find more details and best practices on using DLP to De-identification and re-identification of PII in large-scale datasets in the [GCP documentation](https://cloud.google.com/architecture/de-identification-re-identification-pii-using-cloud-dlp). + ## How to run this script To deploy this example on your GCP organization, you will need @@ -163,35 +163,26 @@ To deploy this example on your GCP organization, you will need The Data Platform is meant to be executed by a Service Account (or a regular user) having this minimal set of permission: -- Org level - - `"compute.organizations.enableXpnResource"` - - `"compute.organizations.disableXpnResource"` - - `"compute.subnetworks.setIamPolicy"` -- Folder level - - `"roles/logging.admin"` - - `"roles/owner"` - - `"roles/resourcemanager.folderAdmin"` - - `"roles/resourcemanager.projectCreator"` -- Cloud Key Management Keys** (if Cloud Key Management keys are configured): - - `"roles/cloudkms.admin"` or Permissions: `cloudkms.cryptoKeys.getIamPolicy`, `cloudkms.cryptoKeys.list`, `cloudkms.cryptoKeys.setIamPolicy` -- on the host project for the Shared VPC/s - - `"roles/browser"` - - `"roles/compute.viewer"` - - `"roles/dns.admin"` +- **Billing account** + - `roles/billing.user` +- **Folder level**: + - `roles/resourcemanager.folderAdmin` + - `roles/resourcemanager.projectCreator` +- **KMS Keys** (If CMEK encryption in use): + - `roles/cloudkms.admin` or a custom role with `cloudkms.cryptoKeys.getIamPolicy`, `cloudkms.cryptoKeys.list`, `cloudkms.cryptoKeys.setIamPolicy` permissions +- **Shared VPC host project** (if configured):\ + - `roles/compute.xpnAdmin` on the host project folder or org + - `roles/resourcemanager.projectIamAdmin` on the host project, either with no conditions or with a condition allowing [delegated role grants](https://medium.com/google-cloud/managing-gcp-service-usage-through-delegated-role-grants-a843610f2226#:~:text=Delegated%20role%20grants%20is%20a,setIamPolicy%20permission%20on%20a%20resource.) for `roles/compute.networkUser`, `roles/composer.sharedVpcAgent`, `roles/container.hostServiceAgentUser` ## Variable configuration There are three sets of variables you will need to fill in: ```hcl -prefix = "PRFX" -project_create = { - parent = "folders/123456789012" - billing_account_id = "111111-222222-333333" -} -organization = { - domain = "DOMAIN.com" -} +billing_account_id = "111111-222222-333333" +older_id = "folders/123456789012" +organization_domain = "domain.com" +prefix = "myco" ``` For more fine details check variables on [`variables.tf`](./variables.tf) and update according to the desired configuration. Remember to create team groups described [below](#groups). @@ -234,15 +225,16 @@ Description of commands: | name | description | type | required | default | |---|---|:---:|:---:|:---:| | [billing_account_id](variables.tf#L17) | Billing account id. | string | ✓ | | -| [folder_id](variables.tf#L41) | Folder to be used for the networking resources in folders/nnnn format. | string | ✓ | | -| [organization_domain](variables.tf#L79) | Organization domain. | string | ✓ | | -| [prefix](variables.tf#L84) | Unique prefix used for resource names. | string | ✓ | | -| [composer_config](variables.tf#L22) | | object({…}) | | {…} | -| [data_force_destroy](variables.tf#L35) | Flag to set 'force_destroy' on data services like BiguQery or Cloud Storage. | bool | | false | -| [groups](variables.tf#L46) | Groups. | map(string) | | {…} | -| [network_config](variables.tf#L56) | Shared VPC network configurations to use. If null networks will be created in projects with preconfigured values. | object({…}) | | null | -| [project_services](variables.tf#L89) | List of core services enabled on all projects. | list(string) | | […] | -| [region](variables.tf#L100) | Region used for regional resources. | string | | "europe-west1" | +| [folder_id](variables.tf#L42) | Folder to be used for the networking resources in folders/nnnn format. | string | ✓ | | +| [organization_domain](variables.tf#L86) | Organization domain. | string | ✓ | | +| [prefix](variables.tf#L91) | Unique prefix used for resource names. | string | ✓ | | +| [composer_config](variables.tf#L22) | Cloud Composer config. | object({…}) | | {…} | +| [data_force_destroy](variables.tf#L36) | Flag to set 'force_destroy' on data services like BiguQery or Cloud Storage. | bool | | false | +| [groups](variables.tf#L53) | User groups. | map(string) | | {…} | +| [location](variables.tf#L47) | Location used for multi-regional resources. | string | | "eu" | +| [network_config](variables.tf#L63) | Shared VPC network configurations to use. If null networks will be created in projects with preconfigured values. | object({…}) | | null | +| [project_services](variables.tf#L96) | List of core services enabled on all projects. | list(string) | | […] | +| [region](variables.tf#L107) | Region used for regional resources. | string | | "europe-west1" | ## Outputs @@ -257,18 +249,17 @@ Description of commands: | [vpc_subnet](outputs.tf#L84) | VPC subnetworks. | | - ## TODOs -Features to add in future releases +Features to add in future releases: -- add support for column level access on BigQuery -- add example templates for Data Catalog -- add example on how to use Cloud Data Loss Prevention -- add solution to handle tables, views, and authorized views lifecycle -- add solution to handle metadata lifecycle +- Add support for Column level access on BigQuery +- Add example templates for Data Catalog +- Add example on how to use Cloud Data Loss Prevention +- Add solution to handle Tables, Views, and Authorized Views lifecycle +- Add solution to handle Metadata lifecycle -Fixes +## To Test/Fix -- composer requires "Require OS Login" not enforced -- external Shared VPC +- Composer require "Require OS Login" not enforced +- External Shared-VPC diff --git a/examples/data-solutions/data-platform-foundations/demo/datapipeline.py b/examples/data-solutions/data-platform-foundations/demo/datapipeline.py index 6581348b..878d641a 100644 --- a/examples/data-solutions/data-platform-foundations/demo/datapipeline.py +++ b/examples/data-solutions/data-platform-foundations/demo/datapipeline.py @@ -30,6 +30,7 @@ from airflow.providers.google.cloud.operators.bigquery import BigQueryInsertJob # -------------------------------------------------------------------------------- # Set variables # ------------------------------------------------------------ +BQ_LOCATION = os.environ.get("BQ_LOCATION") DTL_L0_PRJ = os.environ.get("DTL_L0_PRJ") DTL_L0_BQ_DATASET = os.environ.get("DTL_L0_BQ_DATASET") DTL_L0_GCS = os.environ.get("DTL_L0_GCS") @@ -61,7 +62,7 @@ TRF_NET_SUBNET = os.environ.get("TRF_NET_SUBNET") TRF_SA_DF = os.environ.get("TRF_SA_DF") TRF_SA_BQ = os.environ.get("TRF_SA_BQ") DF_ZONE = os.environ.get("GCP_REGION") + "-b" -DF_REGION = BQ_REGION = os.environ.get("GCP_REGION") +DF_REGION = os.environ.get("GCP_REGION") # -------------------------------------------------------------------------------- # Set default arguments @@ -141,7 +142,7 @@ with models.DAG( task_id='bq_join_customer_purchase', gcp_conn_id='bigquery_default', project_id=TRF_PRJ, - location=BQ_REGION, + location=BQ_LOCATION, configuration={ 'jobType':'QUERY', 'query':{ @@ -172,7 +173,7 @@ with models.DAG( task_id='bq_l2_customer_purchase', gcp_conn_id='bigquery_default', project_id=TRF_PRJ, - location=BQ_REGION, + location=BQ_LOCATION, configuration={ 'jobType':'QUERY', 'query':{ diff --git a/examples/data-solutions/data-platform-foundations/main.tf b/examples/data-solutions/data-platform-foundations/main.tf index d51c7597..59a2f33b 100644 --- a/examples/data-solutions/data-platform-foundations/main.tf +++ b/examples/data-solutions/data-platform-foundations/main.tf @@ -15,6 +15,20 @@ # tfdoc:file:description Core locals. locals { + # we cannot reference service accounts directly as they are dynamic + _shared_vpc_bindings = { + "roles/compute.networkUser" = [ + "load-robot-df", "load-sa-df-worker", + "orch-cloudservices", "orch-robot-df", "orch-robot-gke", + "transf-robot-df", "transf-sa-df-worker", + ] + "roles/composer.sharedVpcAgent" = [ + "orch-robot-cs" + ] + "roles/container.hostServiceAgentUser" = [ + "orch-robot-df" + ] + } groups = { for k, v in var.groups : k => "${v}@${var.organization_domain}" } @@ -23,31 +37,31 @@ locals { } service_encryption_keys = var.service_encryption_keys shared_vpc_project = try(var.network_config.host_project, null) - use_shared_vpc = var.network_config != null + # this is needed so that for_each only uses static values + shared_vpc_role_members = { + load-robot-df = module.load-project.service_accounts.robots.dataflow + load-sa-df-worker = module.load-sa-df-0.iam_email + orch-cloudservices = module.orch-project.service_accounts.cloud_services + orch-robot-cs = module.orch-project.service_accounts.robots.composer + orch-robot-df = module.orch-project.service_accounts.robots.dataflow + orch-robot-gke = module.orch-project.service_accounts.robots.container-engine + transf-robot-df = module.transf-project.service_accounts.robots.dataflow + transf-sa-df-worker = module.transf-sa-df-0.iam_email + } + # reassemble in a format suitable for for_each + shared_vpc_bindings_map = { + for binding in flatten([ + for role, members in local._shared_vpc_bindings : [ + for member in members : { role = role, member = member } + ] + ]) : "${binding.role}-${binding.member}" => binding + } + use_shared_vpc = var.network_config != null } -module "shared-vpc-project" { - source = "../../../modules/project" - count = local.use_shared_vpc ? 1 : 0 - name = var.network_config.host_project - project_create = false - iam_additive = { - "roles/compute.networkUser" = [ - # load Dataflow service agent and worker service account - module.load-project.service_accounts.robots.dataflow, - module.load-sa-df-0.iam_email, - # orchestration Composer service agents - module.orch-project.service_accounts.robots.cloudservices, - module.orch-project.service_accounts.robots.container-engine, - module.orch-project.service_accounts.robots.dataflow, - ], - "roles/composer.sharedVpcAgent" = [ - # orchestration Composer service agent - module.orch-project.service_accounts.robots.composer - ], - "roles/container.hostServiceAgentUser" = [ - # orchestration Composer service agents - module.orch-project.service_accounts.robots.dataflow, - ] - } +resource "google_project_iam_member" "shared_vpc" { + for_each = local.use_shared_vpc ? local.shared_vpc_bindings_map : {} + project = var.network_config.host_project + role = each.value.role + member = lookup(local.shared_vpc_role_members, each.value.member) } diff --git a/examples/data-solutions/data-platform-foundations/terraform.tfvars.sample b/examples/data-solutions/data-platform-foundations/terraform.tfvars.sample index 2d43e583..e530499f 100644 --- a/examples/data-solutions/data-platform-foundations/terraform.tfvars.sample +++ b/examples/data-solutions/data-platform-foundations/terraform.tfvars.sample @@ -1,8 +1,4 @@ -prefix = "prefix" -project_create = { - parent = "folders/123456789012" - billing_account_id = "111111-222222-333333" -} -organization = { - domain = "example.com" -} +prefix = "prefix" +folder_id = "folders/123456789012" +billing_account_id = "111111-222222-333333" +organization_domain = "example.com" diff --git a/examples/data-solutions/data-platform-foundations/variables.tf b/examples/data-solutions/data-platform-foundations/variables.tf index 35dc3862..f403243a 100644 --- a/examples/data-solutions/data-platform-foundations/variables.tf +++ b/examples/data-solutions/data-platform-foundations/variables.tf @@ -20,6 +20,7 @@ variable "billing_account_id" { } variable "composer_config" { + description = "Cloud Composer config." type = object({ node_count = number airflow_version = string @@ -43,8 +44,14 @@ variable "folder_id" { type = string } +variable "location" { + description = "Location used for multi-regional resources." + type = string + default = "eu" +} + variable "groups" { - description = "Groups." + description = "User groups." type = map(string) default = { data-analysts = "gcp-data-analysts" diff --git a/fast/stages/00-bootstrap/IAM.md b/fast/stages/00-bootstrap/IAM.md index 3d4fdbc5..3fc844a3 100644 --- a/fast/stages/00-bootstrap/IAM.md +++ b/fast/stages/00-bootstrap/IAM.md @@ -20,7 +20,7 @@ Legend: + additive, conditional. |---|---| |prod-bootstrap-0
serviceAccount|[roles/owner](https://cloud.google.com/iam/docs/understanding-roles#owner) | -## Project prod-billing-export-0 +## Project prod-billing-exp-0 | members | roles | |---|---| diff --git a/fast/stages/00-bootstrap/README.md b/fast/stages/00-bootstrap/README.md index 52a2e107..0add953b 100644 --- a/fast/stages/00-bootstrap/README.md +++ b/fast/stages/00-bootstrap/README.md @@ -66,7 +66,7 @@ We are intentionally not supporting random prefix/suffixes for names, as that is What is implemented here is a fairly common convention, composed of tokens ordered by relative importance: -- a static prefix (e.g. `myco` or `myco-gcp`) +- a static prefix less or equal to 9 characters (e.g. `myco` or `myco-gcp`) - an environment identifier (e.g. `prod`) - a team/owner identifier (e.g. `sec` for Security) - a context identifier (e.g. `core` or `kms`) @@ -169,7 +169,7 @@ Then make sure you have configured the correct values for the following variable - `organization.id`, `organization.domain`, `organization.customer_id` the id, domain and customer id of your organization, derived from the Cloud Console UI or by running `gcloud organizations list` - `prefix` - the fixed prefix used in your naming convention + the fixed prefix used in your naming, maximum 9 characters long You can also adapt the example that follows to your needs: @@ -337,7 +337,7 @@ Names used in internal references (e.g. `module.foo-prod.id`) are only used by T |---|---|:---:|:---:|:---:|:---:| | [billing_account](variables.tf#L17) | Billing account id and organization id ('nnnnnnnn' or null). | object({…}) | ✓ | | | | [organization](variables.tf#L96) | Organization details. | object({…}) | ✓ | | | -| [prefix](variables.tf#L111) | Prefix used for resources that need unique names. | string | ✓ | | | +| [prefix](variables.tf#L111) | Prefix used for resources that need unique names. Use 9 characters or less. | string | ✓ | | | | [bootstrap_user](variables.tf#L25) | Email of the nominal user running this stage for the first time. | string | | null | | | [custom_role_names](variables.tf#L31) | Names of custom roles defined at the org level. | object({…}) | | {…} | | | [groups](variables.tf#L43) | Group names to grant organization-level permissions. | map(string) | | {…} | | diff --git a/fast/stages/00-bootstrap/billing.tf b/fast/stages/00-bootstrap/billing.tf index e8227041..70053c71 100644 --- a/fast/stages/00-bootstrap/billing.tf +++ b/fast/stages/00-bootstrap/billing.tf @@ -31,7 +31,7 @@ module "billing-export-project" { source = "../../../modules/project" count = local.billing_org ? 1 : 0 billing_account = var.billing_account.id - name = "billing-export-0" + name = "billing-exp-0" parent = "organizations/${var.organization.id}" prefix = local.prefix iam = { diff --git a/fast/stages/00-bootstrap/variables.tf b/fast/stages/00-bootstrap/variables.tf index 33709703..68d54650 100644 --- a/fast/stages/00-bootstrap/variables.tf +++ b/fast/stages/00-bootstrap/variables.tf @@ -109,6 +109,11 @@ variable "outputs_location" { } variable "prefix" { - description = "Prefix used for resources that need unique names." + description = "Prefix used for resources that need unique names. Use 9 characters or less." type = string + + validation { + condition = try(length(var.prefix), 0) < 10 + error_message = "Use a maximum of 9 characters for prefix." + } } diff --git a/fast/stages/01-resman/IAM.md b/fast/stages/01-resman/IAM.md index f915bb20..269f5f09 100644 --- a/fast/stages/01-resman/IAM.md +++ b/fast/stages/01-resman/IAM.md @@ -7,38 +7,38 @@ Legend: + additive, conditional. | members | roles | |---|---| |dev-resman-pf-0
serviceAccount|[roles/billing.costsManager](https://cloud.google.com/iam/docs/understanding-roles#billing.costsManager) +
[roles/billing.user](https://cloud.google.com/iam/docs/understanding-roles#billing.user) +
[roles/orgpolicy.policyAdmin](https://cloud.google.com/iam/docs/understanding-roles#orgpolicy.policyAdmin) +| -|prod-resman-networking-0
serviceAccount|[roles/billing.user](https://cloud.google.com/iam/docs/understanding-roles#billing.user) +
[roles/compute.orgFirewallPolicyAdmin](https://cloud.google.com/iam/docs/understanding-roles#compute.orgFirewallPolicyAdmin) +
[roles/compute.xpnAdmin](https://cloud.google.com/iam/docs/understanding-roles#compute.xpnAdmin) +| +|prod-resman-net-0
serviceAccount|[roles/billing.user](https://cloud.google.com/iam/docs/understanding-roles#billing.user) +
[roles/compute.orgFirewallPolicyAdmin](https://cloud.google.com/iam/docs/understanding-roles#compute.orgFirewallPolicyAdmin) +
[roles/compute.xpnAdmin](https://cloud.google.com/iam/docs/understanding-roles#compute.xpnAdmin) +| |prod-resman-pf-0
serviceAccount|[roles/billing.costsManager](https://cloud.google.com/iam/docs/understanding-roles#billing.costsManager) +
[roles/billing.user](https://cloud.google.com/iam/docs/understanding-roles#billing.user) +
[roles/orgpolicy.policyAdmin](https://cloud.google.com/iam/docs/understanding-roles#orgpolicy.policyAdmin) +| -|prod-resman-security-0
serviceAccount|[roles/accesscontextmanager.policyAdmin](https://cloud.google.com/iam/docs/understanding-roles#accesscontextmanager.policyAdmin) +
[roles/billing.user](https://cloud.google.com/iam/docs/understanding-roles#billing.user) +| +|prod-resman-sec-0
serviceAccount|[roles/accesscontextmanager.policyAdmin](https://cloud.google.com/iam/docs/understanding-roles#accesscontextmanager.policyAdmin) +
[roles/billing.user](https://cloud.google.com/iam/docs/understanding-roles#billing.user) +| + +## Folder development + +| members | roles | +|---|---| +|dev-resman-pf-0
serviceAccount|[roles/compute.xpnAdmin](https://cloud.google.com/iam/docs/understanding-roles#compute.xpnAdmin) | ## Folder networking | members | roles | |---|---| |gcp-network-admins
group|[roles/editor](https://cloud.google.com/iam/docs/understanding-roles#editor) | -|prod-resman-networking-0
serviceAccount|[roles/compute.xpnAdmin](https://cloud.google.com/iam/docs/understanding-roles#compute.xpnAdmin)
[roles/logging.admin](https://cloud.google.com/iam/docs/understanding-roles#logging.admin)
[roles/owner](https://cloud.google.com/iam/docs/understanding-roles#owner)
[roles/resourcemanager.folderAdmin](https://cloud.google.com/iam/docs/understanding-roles#resourcemanager.folderAdmin)
[roles/resourcemanager.projectCreator](https://cloud.google.com/iam/docs/understanding-roles#resourcemanager.projectCreator) | +|prod-resman-net-0
serviceAccount|[roles/compute.xpnAdmin](https://cloud.google.com/iam/docs/understanding-roles#compute.xpnAdmin)
[roles/logging.admin](https://cloud.google.com/iam/docs/understanding-roles#logging.admin)
[roles/owner](https://cloud.google.com/iam/docs/understanding-roles#owner)
[roles/resourcemanager.folderAdmin](https://cloud.google.com/iam/docs/understanding-roles#resourcemanager.folderAdmin)
[roles/resourcemanager.projectCreator](https://cloud.google.com/iam/docs/understanding-roles#resourcemanager.projectCreator) | + +## Folder production + +| members | roles | +|---|---| +|prod-resman-pf-0
serviceAccount|[roles/compute.xpnAdmin](https://cloud.google.com/iam/docs/understanding-roles#compute.xpnAdmin) | ## Folder sandbox | members | roles | |---|---| -|dev-resman-sandbox-0
serviceAccount|[roles/logging.admin](https://cloud.google.com/iam/docs/understanding-roles#logging.admin)
[roles/owner](https://cloud.google.com/iam/docs/understanding-roles#owner)
[roles/resourcemanager.folderAdmin](https://cloud.google.com/iam/docs/understanding-roles#resourcemanager.folderAdmin)
[roles/resourcemanager.projectCreator](https://cloud.google.com/iam/docs/understanding-roles#resourcemanager.projectCreator) | +|dev-resman-sbox-0
serviceAccount|[roles/logging.admin](https://cloud.google.com/iam/docs/understanding-roles#logging.admin)
[roles/owner](https://cloud.google.com/iam/docs/understanding-roles#owner)
[roles/resourcemanager.folderAdmin](https://cloud.google.com/iam/docs/understanding-roles#resourcemanager.folderAdmin)
[roles/resourcemanager.projectCreator](https://cloud.google.com/iam/docs/understanding-roles#resourcemanager.projectCreator) | ## Folder security | members | roles | |---|---| |gcp-security-admins
group|[roles/viewer](https://cloud.google.com/iam/docs/understanding-roles#viewer) | -|prod-resman-security-0
serviceAccount|[roles/logging.admin](https://cloud.google.com/iam/docs/understanding-roles#logging.admin)
[roles/owner](https://cloud.google.com/iam/docs/understanding-roles#owner)
[roles/resourcemanager.folderAdmin](https://cloud.google.com/iam/docs/understanding-roles#resourcemanager.folderAdmin)
[roles/resourcemanager.projectCreator](https://cloud.google.com/iam/docs/understanding-roles#resourcemanager.projectCreator) | - -## Folder dev - -| members | roles | -|---|---| -|dev-resman-pf-0
serviceAccount|[roles/compute.xpnAdmin](https://cloud.google.com/iam/docs/understanding-roles#compute.xpnAdmin) | - -## Folder prod - -| members | roles | -|---|---| -|prod-resman-pf-0
serviceAccount|[roles/compute.xpnAdmin](https://cloud.google.com/iam/docs/understanding-roles#compute.xpnAdmin) | +|prod-resman-sec-0
serviceAccount|[roles/logging.admin](https://cloud.google.com/iam/docs/understanding-roles#logging.admin)
[roles/owner](https://cloud.google.com/iam/docs/understanding-roles#owner)
[roles/resourcemanager.folderAdmin](https://cloud.google.com/iam/docs/understanding-roles#resourcemanager.folderAdmin)
[roles/resourcemanager.projectCreator](https://cloud.google.com/iam/docs/understanding-roles#resourcemanager.projectCreator) | diff --git a/fast/stages/01-resman/README.md b/fast/stages/01-resman/README.md index e39cf3d2..8ff41f7d 100644 --- a/fast/stages/01-resman/README.md +++ b/fast/stages/01-resman/README.md @@ -166,12 +166,12 @@ Due to its simplicity, this stage lends itself easily to customizations: adding | [automation_project_id](variables.tf#L29) | Project id for the automation project created by the bootstrap stage. | string | ✓ | | 00-bootstrap | | [billing_account](variables.tf#L20) | Billing account id and organization id ('nnnnnnnn' or null). | object({…}) | ✓ | | 00-bootstrap | | [organization](variables.tf#L57) | Organization details. | object({…}) | ✓ | | 00-bootstrap | -| [prefix](variables.tf#L81) | Prefix used for resources that need unique names. | string | ✓ | | 00-bootstrap | +| [prefix](variables.tf#L81) | Prefix used for resources that need unique names. Use 9 characters or less. | string | ✓ | | 00-bootstrap | | [custom_roles](variables.tf#L35) | Custom roles defined at the org level, in key => id format. | map(string) | | {} | 00-bootstrap | | [groups](variables.tf#L42) | Group names to grant organization-level permissions. | map(string) | | {…} | 00-bootstrap | | [organization_policy_configs](variables.tf#L67) | Organization policies customization. | object({…}) | | null | | | [outputs_location](variables.tf#L75) | Path where providers and tfvars files for the following stages are written. Leave empty to disable. | string | | null | | -| [team_folders](variables.tf#L87) | Team folders to be created. Format is described in a code comment. | map(object({…})) | | null | | +| [team_folders](variables.tf#L92) | Team folders to be created. Format is described in a code comment. | map(object({…})) | | null | | ## Outputs diff --git a/fast/stages/01-resman/branch-networking.tf b/fast/stages/01-resman/branch-networking.tf index bf712c4f..153aecb1 100644 --- a/fast/stages/01-resman/branch-networking.tf +++ b/fast/stages/01-resman/branch-networking.tf @@ -43,16 +43,16 @@ module "branch-network-folder" { module "branch-network-sa" { source = "../../../modules/iam-service-account" project_id = var.automation_project_id - name = "resman-networking-0" + name = "prod-resman-net-0" description = "Terraform resman networking service account." - prefix = local.prefixes.prod + prefix = var.prefix } module "branch-network-gcs" { source = "../../../modules/gcs" project_id = var.automation_project_id - name = "resman-networking-0" - prefix = local.prefixes.prod + name = "prod-resman-net-0" + prefix = var.prefix versioning = true iam = { "roles/storage.objectAdmin" = [module.branch-network-sa.iam_email] diff --git a/fast/stages/01-resman/branch-sandbox.tf b/fast/stages/01-resman/branch-sandbox.tf index e40aa3fe..0e145b6b 100644 --- a/fast/stages/01-resman/branch-sandbox.tf +++ b/fast/stages/01-resman/branch-sandbox.tf @@ -42,8 +42,8 @@ module "branch-sandbox-folder" { module "branch-sandbox-gcs" { source = "../../../modules/gcs" project_id = var.automation_project_id - name = "resman-sandbox-0" - prefix = local.prefixes.dev + name = "dev-resman-sbox-0" + prefix = var.prefix versioning = true iam = { "roles/storage.objectAdmin" = [module.branch-sandbox-sa.iam_email] @@ -53,7 +53,7 @@ module "branch-sandbox-gcs" { module "branch-sandbox-sa" { source = "../../../modules/iam-service-account" project_id = var.automation_project_id - name = "resman-sandbox-0" + name = "dev-resman-sbox-0" description = "Terraform resman sandbox service account." - prefix = local.prefixes.dev + prefix = var.prefix } diff --git a/fast/stages/01-resman/branch-security.tf b/fast/stages/01-resman/branch-security.tf index 94f68ecd..33bd5de0 100644 --- a/fast/stages/01-resman/branch-security.tf +++ b/fast/stages/01-resman/branch-security.tf @@ -44,16 +44,16 @@ module "branch-security-folder" { module "branch-security-sa" { source = "../../../modules/iam-service-account" project_id = var.automation_project_id - name = "resman-security-0" + name = "prod-resman-sec-0" description = "Terraform resman security service account." - prefix = local.prefixes.prod + prefix = var.prefix } module "branch-security-gcs" { source = "../../../modules/gcs" project_id = var.automation_project_id - name = "resman-security-0" - prefix = local.prefixes.prod + name = "prod-resman-sec-0" + prefix = var.prefix versioning = true iam = { "roles/storage.objectAdmin" = [module.branch-security-sa.iam_email] diff --git a/fast/stages/01-resman/branch-teams.tf b/fast/stages/01-resman/branch-teams.tf index 408a34ce..41caeadc 100644 --- a/fast/stages/01-resman/branch-teams.tf +++ b/fast/stages/01-resman/branch-teams.tf @@ -27,9 +27,9 @@ module "branch-teams-folder" { module "branch-teams-prod-sa" { source = "../../../modules/iam-service-account" project_id = var.automation_project_id - name = "resman-teams-0" + name = "prod-resman-teams-0" description = "Terraform resman production service account." - prefix = local.prefixes.prod + prefix = var.prefix } # Team-level folders, service accounts and buckets for each individual team @@ -46,9 +46,9 @@ module "branch-teams-team-sa" { source = "../../../modules/iam-service-account" for_each = coalesce(var.team_folders, {}) project_id = var.automation_project_id - name = "teams-${each.key}-0" + name = "prod-teams-${each.key}-0" description = "Terraform team ${each.key} service account." - prefix = local.prefixes.prod + prefix = var.prefix iam = { "roles/iam.serviceAccountTokenCreator" = ( each.value.impersonation_groups == null @@ -62,8 +62,8 @@ module "branch-teams-team-gcs" { source = "../../../modules/gcs" for_each = coalesce(var.team_folders, {}) project_id = var.automation_project_id - name = "teams-${each.key}-0" - prefix = local.prefixes.prod + name = "prod-teams-${each.key}-0" + prefix = var.prefix versioning = true iam = { "roles/storage.objectAdmin" = [module.branch-teams-team-sa[each.key].iam_email] @@ -103,17 +103,17 @@ module "branch-teams-team-dev-folder" { module "branch-teams-dev-projectfactory-sa" { source = "../../../modules/iam-service-account" project_id = var.automation_project_id - name = "resman-pf-0" + name = "dev-resman-pf-0" # naming: environment in description description = "Terraform project factory development service account." - prefix = local.prefixes.dev + prefix = var.prefix } module "branch-teams-dev-projectfactory-gcs" { source = "../../../modules/gcs" project_id = var.automation_project_id - name = "resman-pf-0" - prefix = local.prefixes.dev + name = "dev-resman-pf-0" + prefix = var.prefix versioning = true iam = { "roles/storage.objectAdmin" = [module.branch-teams-dev-projectfactory-sa.iam_email] @@ -153,17 +153,17 @@ module "branch-teams-team-prod-folder" { module "branch-teams-prod-projectfactory-sa" { source = "../../../modules/iam-service-account" project_id = var.automation_project_id - name = "resman-pf-0" + name = "prod-resman-pf-0" # naming: environment in description description = "Terraform project factory production service account." - prefix = local.prefixes.prod + prefix = var.prefix } module "branch-teams-prod-projectfactory-gcs" { source = "../../../modules/gcs" project_id = var.automation_project_id - name = "resman-pf-0" - prefix = local.prefixes.prod + name = "prod-resman-pf-0" + prefix = var.prefix versioning = true iam = { "roles/storage.objectAdmin" = [module.branch-teams-prod-projectfactory-sa.iam_email] diff --git a/fast/stages/01-resman/main.tf b/fast/stages/01-resman/main.tf index 2aedb7ce..9d12239e 100644 --- a/fast/stages/01-resman/main.tf +++ b/fast/stages/01-resman/main.tf @@ -27,9 +27,4 @@ locals { for k, v in local.groups : k => "group:${v}" } - # naming: environment names - prefixes = { - dev = "${var.prefix}-dev" - prod = "${var.prefix}-prod" - } } diff --git a/fast/stages/01-resman/organization.tf b/fast/stages/01-resman/organization.tf index c4e2282d..f19afef8 100644 --- a/fast/stages/01-resman/organization.tf +++ b/fast/stages/01-resman/organization.tf @@ -113,15 +113,13 @@ module "organization" { local.list_allow, { values = ["in:INTERNAL"] } ) "constraints/compute.vmExternalIpAccess" = local.list_deny - "constraints/iam.allowedPolicyMemberDomains" = { - inherit_from_parent = false - suggested_value = null - status = true - values = concat( - [var.organization.customer_id], - try(local.policy_configs.allowed_policy_member_domains, []) - ) - } + "constraints/iam.allowedPolicyMemberDomains" = merge( + local.list_allow, { + values = concat( + [var.organization.customer_id], + try(local.policy_configs.allowed_policy_member_domains, []) + ) + }) "constraints/run.allowedIngress" = merge( local.list_allow, { values = ["is:internal"] } ) diff --git a/fast/stages/01-resman/variables.tf b/fast/stages/01-resman/variables.tf index 3570441e..93398a2e 100644 --- a/fast/stages/01-resman/variables.tf +++ b/fast/stages/01-resman/variables.tf @@ -80,8 +80,13 @@ variable "outputs_location" { variable "prefix" { # tfdoc:variable:source 00-bootstrap - description = "Prefix used for resources that need unique names." + description = "Prefix used for resources that need unique names. Use 9 characters or less." type = string + + validation { + condition = try(length(var.prefix), 0) < 10 + error_message = "Use a maximum of 9 characters for prefix." + } } variable "team_folders" { diff --git a/fast/stages/02-networking-nva/README.md b/fast/stages/02-networking-nva/README.md index 2ff9056f..014a0736 100644 --- a/fast/stages/02-networking-nva/README.md +++ b/fast/stages/02-networking-nva/README.md @@ -366,17 +366,17 @@ Don't forget to add a peering zone in the landing project and point it to the ne | [billing_account_id](variables.tf#L17) | Billing account id. | string | ✓ | | 00-bootstrap | | [folder_ids](variables.tf#L59) | Folders to be used for the networking resources in folders/nnnnnnnnnnn format. If null, folder will be created. | map(string) | ✓ | | 01-resman | | [organization](variables.tf#L91) | Organization details. | object({…}) | ✓ | | 00-bootstrap | -| [prefix](variables.tf#L107) | Prefix used for resources that need unique names. | string | ✓ | | 00-bootstrap | +| [prefix](variables.tf#L107) | Prefix used for resources that need unique names. Use 9 characters or less. | string | ✓ | | 00-bootstrap | | [custom_adv](variables.tf#L23) | Custom advertisement definitions in name => range format. | map(string) | | {…} | | | [data_dir](variables.tf#L45) | Relative path for the folder storing configuration data for network resources. | string | | "data" | | | [dns](variables.tf#L51) | Onprem DNS resolvers | map(list(string)) | | {…} | | | [l7ilb_subnets](variables.tf#L65) | Subnets used for L7 ILBs. | map(list(object({…}))) | | {…} | | | [onprem_cidr](variables.tf#L83) | Onprem addresses in name => range format. | map(string) | | {…} | | | [outputs_location](variables.tf#L101) | Path where providers and tfvars files for the following stages are written. Leave empty to disable. | string | | null | | -| [project_factory_sa](variables.tf#L113) | IAM emails for project factory service accounts | map(string) | | {} | 01-resman | -| [psa_ranges](variables.tf#L120) | IP ranges used for Private Service Access (e.g. CloudSQL). | map(map(string)) | | {…} | | -| [router_configs](variables.tf#L139) | Configurations for CRs and onprem routers. | map(object({…})) | | {…} | | -| [vpn_onprem_configs](variables.tf#L162) | VPN gateway configuration for onprem interconnection. | map(object({…})) | | {…} | | +| [project_factory_sa](variables.tf#L118) | IAM emails for project factory service accounts | map(string) | | {} | 01-resman | +| [psa_ranges](variables.tf#L125) | IP ranges used for Private Service Access (e.g. CloudSQL). | map(map(string)) | | {…} | | +| [router_configs](variables.tf#L144) | Configurations for CRs and onprem routers. | map(object({…})) | | {…} | | +| [vpn_onprem_configs](variables.tf#L167) | VPN gateway configuration for onprem interconnection. | map(object({…})) | | {…} | | ## Outputs diff --git a/fast/stages/02-networking-nva/variables.tf b/fast/stages/02-networking-nva/variables.tf index 0659628d..8fca0ba7 100644 --- a/fast/stages/02-networking-nva/variables.tf +++ b/fast/stages/02-networking-nva/variables.tf @@ -106,8 +106,13 @@ variable "outputs_location" { variable "prefix" { # tfdoc:variable:source 00-bootstrap - description = "Prefix used for resources that need unique names." + description = "Prefix used for resources that need unique names. Use 9 characters or less." type = string + + validation { + condition = try(length(var.prefix), 0) < 10 + error_message = "Use a maximum of 9 characters for prefix." + } } variable "project_factory_sa" { diff --git a/fast/stages/02-networking-vpn/README.md b/fast/stages/02-networking-vpn/README.md index afab994a..4b96eba8 100644 --- a/fast/stages/02-networking-vpn/README.md +++ b/fast/stages/02-networking-vpn/README.md @@ -311,18 +311,18 @@ DNS configurations are centralised in the `dns.tf` file. Spokes delegate DNS res | [billing_account_id](variables.tf#L17) | Billing account id. | string | ✓ | | 00-bootstrap | | [folder_ids](variables.tf#L61) | Folders to be used for the networking resources in folders/nnnnnnnnnnn format. If null, folder will be created. | map(string) | ✓ | | 01-resman | | [organization](variables.tf#L85) | Organization details. | object({…}) | ✓ | | 00-bootstrap | -| [prefix](variables.tf#L101) | Prefix used for resources that need unique names. | string | ✓ | | 00-bootstrap | +| [prefix](variables.tf#L101) | Prefix used for resources that need unique names. Use 9 characters or less. | string | ✓ | | 00-bootstrap | | [custom_adv](variables.tf#L23) | Custom advertisement definitions in name => range format. | map(string) | | {…} | | | [custom_roles](variables.tf#L40) | Custom roles defined at the org level, in key => id format. | map(string) | | {} | 00-bootstrap | | [data_dir](variables.tf#L47) | Relative path for the folder storing configuration data for network resources. | string | | "data" | | | [dns](variables.tf#L53) | Onprem DNS resolvers. | map(list(string)) | | {…} | | | [l7ilb_subnets](variables.tf#L67) | Subnets used for L7 ILBs. | map(list(object({…}))) | | {…} | | | [outputs_location](variables.tf#L95) | Path where providers and tfvars files for the following stages are written. Leave empty to disable. | string | | null | | -| [project_factory_sa](variables.tf#L107) | IAM emails for project factory service accounts. | map(string) | | {} | 01-resman | -| [psa_ranges](variables.tf#L114) | IP ranges used for Private Service Access (e.g. CloudSQL). | map(map(string)) | | {…} | | -| [router_configs](variables.tf#L129) | Configurations for CRs and onprem routers. | map(object({…})) | | {…} | | -| [vpn_onprem_configs](variables.tf#L153) | VPN gateway configuration for onprem interconnection. | map(object({…})) | | {…} | | -| [vpn_spoke_configs](variables.tf#L209) | VPN gateway configuration for spokes. | map(object({…})) | | {…} | | +| [project_factory_sa](variables.tf#L112) | IAM emails for project factory service accounts. | map(string) | | {} | 01-resman | +| [psa_ranges](variables.tf#L119) | IP ranges used for Private Service Access (e.g. CloudSQL). | map(map(string)) | | {…} | | +| [router_configs](variables.tf#L134) | Configurations for CRs and onprem routers. | map(object({…})) | | {…} | | +| [vpn_onprem_configs](variables.tf#L158) | VPN gateway configuration for onprem interconnection. | map(object({…})) | | {…} | | +| [vpn_spoke_configs](variables.tf#L214) | VPN gateway configuration for spokes. | map(object({…})) | | {…} | | ## Outputs diff --git a/fast/stages/02-networking-vpn/variables.tf b/fast/stages/02-networking-vpn/variables.tf index e67b839a..20fc21f3 100644 --- a/fast/stages/02-networking-vpn/variables.tf +++ b/fast/stages/02-networking-vpn/variables.tf @@ -107,8 +107,13 @@ variable "outputs_location" { variable "prefix" { # tfdoc:variable:source 00-bootstrap - description = "Prefix used for resources that need unique names." + description = "Prefix used for resources that need unique names. Use 9 characters or less." type = string + + validation { + condition = try(length(var.prefix), 0) < 10 + error_message = "Use a maximum of 9 characters for prefix." + } } variable "project_factory_sa" { diff --git a/fast/stages/02-security/README.md b/fast/stages/02-security/README.md index 621db3e9..613b1c49 100644 --- a/fast/stages/02-security/README.md +++ b/fast/stages/02-security/README.md @@ -288,19 +288,19 @@ Some references that might be useful in setting up this stage: | [billing_account_id](variables.tf#L17) | Billing account id. | string | ✓ | | bootstrap | | [folder_id](variables.tf#L23) | Folder to be used for the networking resources in folders/nnnn format. | string | ✓ | | resman | | [organization](variables.tf#L73) | Organization details. | object({…}) | ✓ | | bootstrap | -| [prefix](variables.tf#L89) | Prefix used for resources that need unique names. | string | ✓ | | | +| [prefix](variables.tf#L89) | Prefix used for resources that need unique names. Use 9 characters or less. | string | ✓ | | 00-bootstrap | | [groups](variables.tf#L29) | Group names to grant organization-level permissions. | map(string) | | {…} | bootstrap | | [kms_defaults](variables.tf#L44) | Defaults used for KMS keys. | object({…}) | | {…} | | | [kms_keys](variables.tf#L56) | KMS keys to create, keyed by name. Null attributes will be interpolated with defaults. | map(object({…})) | | {} | | | [kms_restricted_admins](variables.tf#L67) | Map of environment => [identities] who can assign the encrypt/decrypt roles on keys. | map(list(string)) | | {} | | | [outputs_location](variables.tf#L83) | Path where providers, tfvars files, and lists for the following stages are written. Leave empty to disable. | string | | null | | -| [vpc_sc_access_levels](variables.tf#L94) | VPC SC access level definitions. | map(object({…})) | | {} | | -| [vpc_sc_egress_policies](variables.tf#L109) | VPC SC egress policy defnitions. | map(object({…})) | | {} | | -| [vpc_sc_ingress_policies](variables.tf#L127) | VPC SC ingress policy defnitions. | map(object({…})) | | {} | | -| [vpc_sc_perimeter_access_levels](variables.tf#L147) | VPC SC perimeter access_levels. | object({…}) | | null | | -| [vpc_sc_perimeter_egress_policies](variables.tf#L157) | VPC SC egress policies per perimeter, values reference keys defined in the `vpc_sc_ingress_policies` variable. | object({…}) | | null | | -| [vpc_sc_perimeter_ingress_policies](variables.tf#L167) | VPC SC ingress policies per perimeter, values reference keys defined in the `vpc_sc_ingress_policies` variable. | object({…}) | | null | | -| [vpc_sc_perimeter_projects](variables.tf#L177) | VPC SC perimeter resources. | object({…}) | | null | | +| [vpc_sc_access_levels](variables.tf#L100) | VPC SC access level definitions. | map(object({…})) | | {} | | +| [vpc_sc_egress_policies](variables.tf#L115) | VPC SC egress policy defnitions. | map(object({…})) | | {} | | +| [vpc_sc_ingress_policies](variables.tf#L133) | VPC SC ingress policy defnitions. | map(object({…})) | | {} | | +| [vpc_sc_perimeter_access_levels](variables.tf#L153) | VPC SC perimeter access_levels. | object({…}) | | null | | +| [vpc_sc_perimeter_egress_policies](variables.tf#L163) | VPC SC egress policies per perimeter, values reference keys defined in the `vpc_sc_ingress_policies` variable. | object({…}) | | null | | +| [vpc_sc_perimeter_ingress_policies](variables.tf#L173) | VPC SC ingress policies per perimeter, values reference keys defined in the `vpc_sc_ingress_policies` variable. | object({…}) | | null | | +| [vpc_sc_perimeter_projects](variables.tf#L183) | VPC SC perimeter resources. | object({…}) | | null | | ## Outputs diff --git a/fast/stages/02-security/variables.tf b/fast/stages/02-security/variables.tf index 0829f098..00cbe4dc 100644 --- a/fast/stages/02-security/variables.tf +++ b/fast/stages/02-security/variables.tf @@ -87,8 +87,14 @@ variable "outputs_location" { } variable "prefix" { - description = "Prefix used for resources that need unique names." + # tfdoc:variable:source 00-bootstrap + description = "Prefix used for resources that need unique names. Use 9 characters or less." type = string + + validation { + condition = try(length(var.prefix), 0) < 10 + error_message = "Use a maximum of 9 characters for prefix." + } } variable "vpc_sc_access_levels" { diff --git a/fast/stages/02-security/vpc-sc.tf b/fast/stages/02-security/vpc-sc.tf index 49611e0f..e9887d20 100644 --- a/fast/stages/02-security/vpc-sc.tf +++ b/fast/stages/02-security/vpc-sc.tf @@ -116,8 +116,11 @@ locals { for k in local._perimeter_names : k => [] } : { - for k, v in local._perimeter_names : - k => v == null ? [] : v + for k in local._perimeter_names : k => ( + var.vpc_sc_perimeter_projects[k] == null + ? [] + : var.vpc_sc_perimeter_projects[k] + ) } ) # get the list of restricted services from the yaml file diff --git a/fast/stages/03-project-factory/prod/README.md b/fast/stages/03-project-factory/prod/README.md index 7c2d3d68..c784342c 100644 --- a/fast/stages/03-project-factory/prod/README.md +++ b/fast/stages/03-project-factory/prod/README.md @@ -108,12 +108,12 @@ terraform apply | name | description | type | required | default | producer | |---|---|:---:|:---:|:---:|:---:| | [billing_account_id](variables.tf#L19) | Billing account id. | string | ✓ | | 00-bootstrap | -| [prefix](variables.tf#L44) | Prefix used for resources that need unique names. | string | ✓ | | 00-bootstrap | +| [prefix](variables.tf#L44) | Prefix used for resources that need unique names. Use 9 characters or less. | string | ✓ | | 00-bootstrap | | [data_dir](variables.tf#L25) | Relative path for the folder storing configuration data. | string | | "data/projects" | | | [defaults_file](variables.tf#L38) | Relative path for the file storing the project factory configuration. | string | | "data/defaults.yaml" | | | [environment_dns_zone](variables.tf#L31) | DNS zone suffix for environment. | string | | null | 02-networking | -| [shared_vpc_self_link](variables.tf#L50) | Self link for the shared VPC. | string | | null | 02-networking | -| [vpc_host_project](variables.tf#L57) | Host project for the shared VPC. | string | | null | 02-networking | +| [shared_vpc_self_link](variables.tf#L55) | Self link for the shared VPC. | string | | null | 02-networking | +| [vpc_host_project](variables.tf#L62) | Host project for the shared VPC. | string | | null | 02-networking | ## Outputs diff --git a/fast/stages/03-project-factory/prod/variables.tf b/fast/stages/03-project-factory/prod/variables.tf index 2e2b2c95..a580260c 100644 --- a/fast/stages/03-project-factory/prod/variables.tf +++ b/fast/stages/03-project-factory/prod/variables.tf @@ -43,8 +43,13 @@ variable "defaults_file" { variable "prefix" { # tfdoc:variable:source 00-bootstrap - description = "Prefix used for resources that need unique names." + description = "Prefix used for resources that need unique names. Use 9 characters or less." type = string + + validation { + condition = try(length(var.prefix), 0) < 10 + error_message = "Use a maximum of 9 characters for prefix." + } } variable "shared_vpc_self_link" { diff --git a/modules/dns/README.md b/modules/dns/README.md index d1a17aee..bd40d7c7 100644 --- a/modules/dns/README.md +++ b/modules/dns/README.md @@ -17,7 +17,7 @@ module "private-dns" { domain = "test.example." client_networks = [var.vpc.self_link] recordsets = { - "A localhost" = { type = "A", ttl = 300, records = ["127.0.0.1"] } + "A localhost" = { ttl = 300, records = ["127.0.0.1"] } } } # tftest modules=1 resources=2 diff --git a/modules/vpc-sc/README.md b/modules/vpc-sc/README.md index 0489abd3..98381bae 100644 --- a/modules/vpc-sc/README.md +++ b/modules/vpc-sc/README.md @@ -15,7 +15,7 @@ By default, the module is configured to use an existing policy, passed in by nam ```hcl module "test" { source = "./modules/vpc-sc" - access_policy = "accessPolicies/12345678" + access_policy = "12345678" } # tftest modules=0 resources=0 ``` diff --git a/tests/examples/data_solutions/data_platform_foundations/test_plan.py b/tests/examples/data_solutions/data_platform_foundations/test_plan.py index e8a70a83..b000b126 100644 --- a/tests/examples/data_solutions/data_platform_foundations/test_plan.py +++ b/tests/examples/data_solutions/data_platform_foundations/test_plan.py @@ -24,4 +24,4 @@ def test_resources(e2e_plan_runner): "Test that plan works and the numbers of resources is as expected." modules, resources = e2e_plan_runner(FIXTURES_DIR) assert len(modules) == 40 - assert len(resources) == 287 + assert len(resources) == 282 diff --git a/tools/check_names.py b/tools/check_names.py new file mode 100755 index 00000000..c487d693 --- /dev/null +++ b/tools/check_names.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python3 +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +'Parse names from specific Terraform resources and optionally check length.' + +import collections +import enum +import logging +import pathlib +import re + +import click + + +BASEDIR = pathlib.Path(__file__).resolve().parents[1] +LOGGER = logging.getLogger() +MOD_TOKENS = [ + ('NAME', r'\s*module\s*"([^"]+)"\s*\{\s*'), + ('SOURCE', r'\s*source\s*=\s*"([^"]+)"\s*'), + ('VALUE', r'\s*name\s*=\s*"([^"]+)"\s*'), + ('REST', r'(.*)') +] +MOD = enum.Enum('MOD', ' '.join(name for name, _ in MOD_TOKENS)) +MOD_RE = re.compile('|'.join(f'(?:{pattern})' for _, pattern in MOD_TOKENS)) +MOD_LIMITS = { + 'project': 30, 'iam-service-account': 30, 'gcs': 63 +} + +Name = collections.namedtuple('Name', 'source name value length') + + +def get_names(dir_name): + dir_path = BASEDIR / dir_name + for tf_path in sorted(dir_path.glob('**/*.tf')): + if '.terraform' in str(tf_path): + continue + LOGGER.debug(f'file {tf_path}') + doc = tf_path.read_text() + name = source = None + for m in MOD_RE.finditer(doc): + token_type = MOD(m.lastindex) + if token_type == MOD.REST: + continue + value = m.group(m.lastindex).strip() + LOGGER.debug(f'{token_type}: {value}') + if token_type == MOD.NAME: + if name: + LOGGER.debug(f'module {name} already open ({value})') + name = value + source = None + elif token_type == MOD.SOURCE: + source = value.split('/')[-1] + LOGGER.debug(f'{name} {source}') + elif token_type == MOD.VALUE: + if name is None or source not in MOD_LIMITS: + continue + if '$' in value: + LOGGER.debug(f'interpolation in {name} ({value}), skipping') + else: + yield Name(source, name, value, len(value)) + name = source = None + + +@click.command() +@click.argument('dirs', type=str, nargs=-1) +@click.option('--prefix-length', default=7, type=int) +def main(dirs, prefix_length=None): + 'Parse names in dirs.' + import json + logging.basicConfig(level=logging.INFO) + names = [] + for dir_name in dirs: + for name in get_names(dir_name): + names.append(name) + names.sort() + source_just = max(len(k) for k in MOD_LIMITS) + name_just = max(len(n.name) for n in names) + value_just = max(len(n.value) for n in names) + for name in names: + name_length = name.length + prefix_length + flag = '✗' if name_length >= MOD_LIMITS[name.source] else '✓' + print(( + f'[{flag}] {name.source.ljust(source_just)} ' + f'{name.name.ljust(name_just)} ' + f'{name.value.ljust(value_just)} ' + f'({name_length})' + )) + + +if __name__ == '__main__': + main()