diff --git a/CHANGELOG.md b/CHANGELOG.md index 4f4763f2..ddd8b0c9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,8 @@ All notable changes to this project will be documented in this file. ### BLUEPRINTS +- [[#1081](https://github.com/GoogleCloudPlatform/cloud-foundation-fabric/pull/1081)] Apigee hybrid on GKE ([apichick](https://github.com/apichick)) +- [[#1082](https://github.com/GoogleCloudPlatform/cloud-foundation-fabric/pull/1082)] Fixes in Apigee Bigquery Analytics blueprint ([apichick](https://github.com/apichick)) - [[#1071](https://github.com/GoogleCloudPlatform/cloud-foundation-fabric/pull/1071)] Moved apigee bigquery analytics blueprint, added apigee network patterns ([apichick](https://github.com/apichick)) - [[#1073](https://github.com/GoogleCloudPlatform/cloud-foundation-fabric/pull/1073)] Allow setting no ranges in firewall module custom rules ([ludoo](https://github.com/ludoo)) - [[#1072](https://github.com/GoogleCloudPlatform/cloud-foundation-fabric/pull/1072)] **incompatible change:** Add gc_policy to Bigtable module, bump provider versions to 4.47 ([iht](https://github.com/iht)) @@ -18,17 +20,25 @@ All notable changes to this project will be documented in this file. ### DOCUMENTATION +- [[#1084](https://github.com/GoogleCloudPlatform/cloud-foundation-fabric/pull/1084)] Fixes in Apigee blueprints README files ([apichick](https://github.com/apichick)) +- [[#1081](https://github.com/GoogleCloudPlatform/cloud-foundation-fabric/pull/1081)] Apigee hybrid on GKE ([apichick](https://github.com/apichick)) +- [[#1074](https://github.com/GoogleCloudPlatform/cloud-foundation-fabric/pull/1074)] Adding new section for Authentication issues ([agutta](https://github.com/agutta)) - [[#1071](https://github.com/GoogleCloudPlatform/cloud-foundation-fabric/pull/1071)] Moved apigee bigquery analytics blueprint, added apigee network patterns ([apichick](https://github.com/apichick)) - [[#1057](https://github.com/GoogleCloudPlatform/cloud-foundation-fabric/pull/1057)] Adding new file FAQ and an image ([agutta](https://github.com/agutta)) ### FAST +- [[#1085](https://github.com/GoogleCloudPlatform/cloud-foundation-fabric/pull/1085)] fix restricted services not being added to the perimeter configurations ([drebes](https://github.com/drebes)) - [[#1057](https://github.com/GoogleCloudPlatform/cloud-foundation-fabric/pull/1057)] Adding new file FAQ and an image ([agutta](https://github.com/agutta)) - [[#1054](https://github.com/GoogleCloudPlatform/cloud-foundation-fabric/pull/1054)] FAST: fix typo in bootstrap stage README ([agutta](https://github.com/agutta)) - [[#1051](https://github.com/GoogleCloudPlatform/cloud-foundation-fabric/pull/1051)] FAST: add instructions for billing export to stage 0 README ([KPRepos](https://github.com/KPRepos)) ### MODULES +- [[#1078](https://github.com/GoogleCloudPlatform/cloud-foundation-fabric/pull/1078)] Fixed delete_rule in compute-mig module for stateful disks ([rosmo](https://github.com/rosmo)) +- [[#1080](https://github.com/GoogleCloudPlatform/cloud-foundation-fabric/pull/1080)] Added device_name field to compute-vm attached_disks parameter ([rosmo](https://github.com/rosmo)) +- [[#1079](https://github.com/GoogleCloudPlatform/cloud-foundation-fabric/pull/1079)] Reorder org policy rules ([juliocc](https://github.com/juliocc)) +- [[#1075](https://github.com/GoogleCloudPlatform/cloud-foundation-fabric/pull/1075)] **incompatible change:** Add cluster replicas to Bigtable module. ([iht](https://github.com/iht)) - [[#1073](https://github.com/GoogleCloudPlatform/cloud-foundation-fabric/pull/1073)] Allow setting no ranges in firewall module custom rules ([ludoo](https://github.com/ludoo)) - [[#1072](https://github.com/GoogleCloudPlatform/cloud-foundation-fabric/pull/1072)] **incompatible change:** Add gc_policy to Bigtable module, bump provider versions to 4.47 ([iht](https://github.com/iht)) - [[#1070](https://github.com/GoogleCloudPlatform/cloud-foundation-fabric/pull/1070)] Fix MIG health check variable ([ludoo](https://github.com/ludoo)) @@ -43,6 +53,7 @@ All notable changes to this project will be documented in this file. ### TOOLS +- [[#1091](https://github.com/GoogleCloudPlatform/cloud-foundation-fabric/pull/1091)] Fix check_documentation output ([juliocc](https://github.com/juliocc)) - [[#1053](https://github.com/GoogleCloudPlatform/cloud-foundation-fabric/pull/1053)] Extend inventory-based testing to examples ([juliocc](https://github.com/juliocc)) ## [19.0.0] - 2022-12-13 diff --git a/blueprints/data-solutions/data-platform-foundations/03-composer.tf b/blueprints/data-solutions/data-platform-foundations/03-composer.tf index 2622ffa2..33a21408 100644 --- a/blueprints/data-solutions/data-platform-foundations/03-composer.tf +++ b/blueprints/data-solutions/data-platform-foundations/03-composer.tf @@ -14,6 +14,40 @@ # tfdoc:file:description Orchestration Cloud Composer definition. +locals { + env_variables = { + BQ_LOCATION = var.location + DATA_CAT_TAGS = try(jsonencode(module.common-datacatalog.tags), "{}") + DF_KMS_KEY = try(var.service_encryption_keys.dataflow, "") + DRP_PRJ = module.drop-project.project_id + DRP_BQ = module.drop-bq-0.dataset_id + DRP_GCS = module.drop-cs-0.url + DRP_PS = module.drop-ps-0.id + DWH_LAND_PRJ = module.dwh-lnd-project.project_id + DWH_LAND_BQ_DATASET = module.dwh-lnd-bq-0.dataset_id + DWH_LAND_GCS = module.dwh-lnd-cs-0.url + DWH_CURATED_PRJ = module.dwh-cur-project.project_id + DWH_CURATED_BQ_DATASET = module.dwh-cur-bq-0.dataset_id + DWH_CURATED_GCS = module.dwh-cur-cs-0.url + DWH_CONFIDENTIAL_PRJ = module.dwh-conf-project.project_id + DWH_CONFIDENTIAL_BQ_DATASET = module.dwh-conf-bq-0.dataset_id + DWH_CONFIDENTIAL_GCS = module.dwh-conf-cs-0.url + GCP_REGION = var.region + LOD_PRJ = module.load-project.project_id + LOD_GCS_STAGING = module.load-cs-df-0.url + LOD_NET_VPC = local.load_vpc + LOD_NET_SUBNET = local.load_subnet + LOD_SA_DF = module.load-sa-df-0.email + ORC_PRJ = module.orch-project.project_id + ORC_GCS = module.orch-cs-0.url + TRF_PRJ = module.transf-project.project_id + TRF_GCS_STAGING = module.transf-cs-df-0.url + TRF_NET_VPC = local.transf_vpc + TRF_NET_SUBNET = local.transf_subnet + TRF_SA_DF = module.transf-sa-df-0.email + TRF_SA_BQ = module.transf-sa-bq-0.email + } +} module "orch-sa-cmp-0" { source = "../../../modules/iam-service-account" project_id = module.orch-project.project_id @@ -27,21 +61,51 @@ module "orch-sa-cmp-0" { } resource "google_composer_environment" "orch-cmp-0" { - provider = google-beta - project = module.orch-project.project_id - name = "${var.prefix}-orc-cmp-0" - region = var.region + count = var.composer_config.disable_deployment == true ? 0 : 1 + project = module.orch-project.project_id + name = "${var.prefix}-orc-cmp-0" + region = var.region config { - node_count = var.composer_config.node_count + software_config { + airflow_config_overrides = try(var.composer_config.software_config.airflow_config_overrides, null) + pypi_packages = try(var.composer_config.software_config.pypi_packages, null) + env_variables = merge(try(var.composer_config.software_config.env_variables, null), local.env_variables) + image_version = try(var.composer_config.software_config.image_version, null) + } + dynamic "workloads_config" { + for_each = (try(var.composer_config.workloads_config, null) != null ? { 1 = 1 } : {}) + + content { + scheduler { + cpu = try(var.composer_config.workloads_config.scheduler.cpu, null) + memory_gb = try(var.composer_config.workloads_config.scheduler.memory_gb, null) + storage_gb = try(var.composer_config.workloads_config.scheduler.storage_gb, null) + count = try(var.composer_config.workloads_config.scheduler.count, null) + } + web_server { + cpu = try(var.composer_config.workloads_config.web_server.cpu, null) + memory_gb = try(var.composer_config.workloads_config.web_server.memory_gb, null) + storage_gb = try(var.composer_config.workloads_config.web_server.storage_gb, null) + } + worker { + cpu = try(var.composer_config.workloads_config.worker.cpu, null) + memory_gb = try(var.composer_config.workloads_config.worker.memory_gb, null) + storage_gb = try(var.composer_config.workloads_config.worker.storage_gb, null) + min_count = try(var.composer_config.workloads_config.worker.min_count, null) + max_count = try(var.composer_config.workloads_config.worker.max_count, null) + } + } + } + + environment_size = var.composer_config.environment_size + node_config { - zone = "${var.region}-b" - service_account = module.orch-sa-cmp-0.email network = local.orch_vpc subnetwork = local.orch_subnet - tags = ["composer-worker", "http-server", "https-server"] - enable_ip_masq_agent = true + service_account = module.orch-sa-cmp-0.email + enable_ip_masq_agent = "true" + tags = ["composer-worker"] ip_allocation_policy { - use_ip_aliases = "true" cluster_secondary_range_name = try( var.network_config.composer_secondary_ranges.pods, "pods" ) @@ -58,80 +122,20 @@ resource "google_composer_environment" "orch-cmp-0" { master_ipv4_cidr_block = try( var.network_config.composer_ip_ranges.gke_master, "10.20.11.0/28" ) - web_server_ipv4_cidr_block = try( - var.network_config.composer_ip_ranges.web_server, "10.20.11.16/28" - ) } - software_config { - image_version = var.composer_config.airflow_version - env_variables = merge( - var.composer_config.env_variables, { - BQ_LOCATION = var.location - DATA_CAT_TAGS = try(jsonencode(module.common-datacatalog.tags), "{}") - DF_KMS_KEY = try(var.service_encryption_keys.dataflow, "") - DRP_PRJ = module.drop-project.project_id - DRP_BQ = module.drop-bq-0.dataset_id - DRP_GCS = module.drop-cs-0.url - DRP_PS = module.drop-ps-0.id - DWH_LAND_PRJ = module.dwh-lnd-project.project_id - DWH_LAND_BQ_DATASET = module.dwh-lnd-bq-0.dataset_id - DWH_LAND_GCS = module.dwh-lnd-cs-0.url - DWH_CURATED_PRJ = module.dwh-cur-project.project_id - DWH_CURATED_BQ_DATASET = module.dwh-cur-bq-0.dataset_id - DWH_CURATED_GCS = module.dwh-cur-cs-0.url - DWH_CONFIDENTIAL_PRJ = module.dwh-conf-project.project_id - DWH_CONFIDENTIAL_BQ_DATASET = module.dwh-conf-bq-0.dataset_id - DWH_CONFIDENTIAL_GCS = module.dwh-conf-cs-0.url - DWH_PLG_PRJ = module.dwh-plg-project.project_id - DWH_PLG_BQ_DATASET = module.dwh-plg-bq-0.dataset_id - DWH_PLG_GCS = module.dwh-plg-cs-0.url - GCP_REGION = var.region - LOD_PRJ = module.load-project.project_id - LOD_GCS_STAGING = module.load-cs-df-0.url - LOD_NET_VPC = local.load_vpc - LOD_NET_SUBNET = local.load_subnet - LOD_SA_DF = module.load-sa-df-0.email - ORC_PRJ = module.orch-project.project_id - ORC_GCS = module.orch-cs-0.url - TRF_PRJ = module.transf-project.project_id - TRF_GCS_STAGING = module.transf-cs-df-0.url - TRF_NET_VPC = local.transf_vpc - TRF_NET_SUBNET = local.transf_subnet - TRF_SA_DF = module.transf-sa-df-0.email - TRF_SA_BQ = module.transf-sa-bq-0.email - } - ) - } - dynamic "encryption_config" { for_each = ( - try(local.service_encryption_keys.composer != null, false) + try(var.service_encryption_keys[var.region], null) != null ? { 1 = 1 } : {} ) content { - kms_key_name = try(local.service_encryption_keys.composer, null) + kms_key_name = try(var.service_encryption_keys[var.region], null) } } - - # dynamic "web_server_network_access_control" { - # for_each = toset( - # var.network_config.web_server_network_access_control == null - # ? [] - # : [var.network_config.web_server_network_access_control] - # ) - # content { - # dynamic "allowed_ip_range" { - # for_each = toset(web_server_network_access_control.key) - # content { - # value = allowed_ip_range.key - # } - # } - # } - # } - } depends_on = [ google_project_iam_member.shared_vpc, + module.orch-project ] } diff --git a/blueprints/data-solutions/data-platform-foundations/03-orchestration.tf b/blueprints/data-solutions/data-platform-foundations/03-orchestration.tf index 2974c122..8e2d0725 100644 --- a/blueprints/data-solutions/data-platform-foundations/03-orchestration.tf +++ b/blueprints/data-solutions/data-platform-foundations/03-orchestration.tf @@ -54,6 +54,9 @@ module "orch-project" { "roles/bigquery.jobUser" = [ module.orch-sa-cmp-0.iam_email, ] + "roles/composer.ServiceAgentV2Ext" = [ + "serviceAccount:${module.orch-project.service_accounts.robots.composer}" + ] "roles/composer.worker" = [ module.orch-sa-cmp-0.iam_email ] @@ -67,11 +70,6 @@ module "orch-project" { "roles/storage.objectViewer" = [module.load-sa-df-0.iam_email] } oslogin = false - org_policies = { - "constraints/compute.requireOsLogin" = { - enforce = false - } - } services = concat(var.project_services, [ "artifactregistry.googleapis.com", "bigquery.googleapis.com", diff --git a/blueprints/data-solutions/data-platform-foundations/05-datawarehouse.tf b/blueprints/data-solutions/data-platform-foundations/05-datawarehouse.tf index 879a0e0b..0db5ce44 100644 --- a/blueprints/data-solutions/data-platform-foundations/05-datawarehouse.tf +++ b/blueprints/data-solutions/data-platform-foundations/05-datawarehouse.tf @@ -30,21 +30,6 @@ locals { "roles/storage.objectViewer", ] } - dwh_plg_group_iam = { - (local.groups.data-engineers) = [ - "roles/bigquery.dataEditor", - "roles/storage.admin", - ], - (local.groups.data-analysts) = [ - "roles/bigquery.dataEditor", - "roles/bigquery.jobUser", - "roles/bigquery.metadataViewer", - "roles/bigquery.user", - "roles/datacatalog.viewer", - "roles/datacatalog.tagTemplateViewer", - "roles/storage.objectAdmin", - ] - } dwh_lnd_iam = { "roles/bigquery.dataOwner" = [ module.load-sa-df-0.iam_email, @@ -140,21 +125,6 @@ module "dwh-conf-project" { } } -module "dwh-plg-project" { - source = "../../../modules/project" - parent = var.folder_id - billing_account = var.billing_account_id - prefix = var.prefix - name = "dwh-plg${local.project_suffix}" - group_iam = local.dwh_plg_group_iam - iam = {} - services = local.dwh_services - service_encryption_key_ids = { - bq = [try(local.service_encryption_keys.bq, null)] - storage = [try(local.service_encryption_keys.storage, null)] - } -} - # Bigquery module "dwh-lnd-bq-0" { @@ -181,14 +151,6 @@ module "dwh-conf-bq-0" { encryption_key = try(local.service_encryption_keys.bq, null) } -module "dwh-plg-bq-0" { - source = "../../../modules/bigquery-dataset" - project_id = module.dwh-plg-project.project_id - id = "${replace(var.prefix, "-", "_")}_dwh_plg_bq_0" - location = var.location - encryption_key = try(local.service_encryption_keys.bq, null) -} - # Cloud storage module "dwh-lnd-cs-0" { @@ -223,14 +185,3 @@ module "dwh-conf-cs-0" { encryption_key = try(local.service_encryption_keys.storage, null) force_destroy = var.data_force_destroy } - -module "dwh-plg-cs-0" { - source = "../../../modules/gcs" - project_id = module.dwh-plg-project.project_id - prefix = var.prefix - name = "dwh-plg-cs-0" - location = var.location - storage_class = "MULTI_REGIONAL" - encryption_key = try(local.service_encryption_keys.storage, null) - force_destroy = var.data_force_destroy -} diff --git a/blueprints/data-solutions/data-platform-foundations/IAM.md b/blueprints/data-solutions/data-platform-foundations/IAM.md index 54d35939..5a1995da 100644 --- a/blueprints/data-solutions/data-platform-foundations/IAM.md +++ b/blueprints/data-solutions/data-platform-foundations/IAM.md @@ -57,14 +57,6 @@ Legend: + additive, conditional. |trf-bq-0
serviceAccount|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner)
[roles/datacatalog.categoryAdmin](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryAdmin) | |trf-df-0
serviceAccount|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) | -## Project dwh-plg - -| members | roles | -|---|---| -|gcp-data-analysts
group|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/bigquery.metadataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.metadataViewer)
[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user)
[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer)
[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer)
[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) | -|gcp-data-engineers
group|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) | -|SERVICE_IDENTITY_service-networking
serviceAccount|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) +| - ## Project lod | members | roles | @@ -80,7 +72,7 @@ Legend: + additive, conditional. | members | roles | |---|---| |gcp-data-engineers
group|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/cloudbuild.builds.editor](https://cloud.google.com/iam/docs/understanding-roles#cloudbuild.builds.editor)
[roles/composer.admin](https://cloud.google.com/iam/docs/understanding-roles#composer.admin)
[roles/composer.environmentAndStorageObjectAdmin](https://cloud.google.com/iam/docs/understanding-roles#composer.environmentAndStorageObjectAdmin)
[roles/iam.serviceAccountUser](https://cloud.google.com/iam/docs/understanding-roles#iam.serviceAccountUser)
[roles/iap.httpsResourceAccessor](https://cloud.google.com/iam/docs/understanding-roles#iap.httpsResourceAccessor)
[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin)
[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) | -|SERVICE_IDENTITY_cloudcomposer-accounts
serviceAccount|[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) | +|SERVICE_IDENTITY_cloudcomposer-accounts
serviceAccount|[roles/composer.ServiceAgentV2Ext](https://cloud.google.com/iam/docs/understanding-roles#composer.ServiceAgentV2Ext)
[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) | |SERVICE_IDENTITY_service-networking
serviceAccount|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) +| |load-df-0
serviceAccount|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) | |orc-cmp-0
serviceAccount|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/composer.worker](https://cloud.google.com/iam/docs/understanding-roles#composer.worker)
[roles/iam.serviceAccountUser](https://cloud.google.com/iam/docs/understanding-roles#iam.serviceAccountUser)
[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) | diff --git a/blueprints/data-solutions/data-platform-foundations/README.md b/blueprints/data-solutions/data-platform-foundations/README.md index 8da143b2..ed28eba6 100644 --- a/blueprints/data-solutions/data-platform-foundations/README.md +++ b/blueprints/data-solutions/data-platform-foundations/README.md @@ -21,7 +21,7 @@ The approach adapts to different high-level requirements: - least privilege principle - rely on service account impersonation -The code in this blueprint doesn't address Organization-level configurations (Organization policy, VPC-SC, centralized logs). We expect those elements to be managed by automation stages external to this script like those in [FAST](../../../fast). +The code in this blueprint doesn't address Organization-level configurations (Organization policy, VPC-SC, centralized logs). We expect those elements to be managed by automation stages external to this script like those in [FAST](../../../fast) and this blueprint deployed on top of them as one of the [stages](../../../fast/stages/03-data-platform/dev/README.md). ### Project structure @@ -39,14 +39,13 @@ This separation into projects allows adhering to the least-privilege principle b The script will create the following projects: - **Drop off** Used to store temporary data. Data is pushed to Cloud Storage, BigQuery, or Cloud PubSub. Resources are configured with a customizable lifecycle policy. -- **Load** Used to load data from the drop off zone to the data warehouse. The load is made with minimal to zero transformation logic (mainly `cast`). Anonymization or tokenization of Personally Identifiable Information (PII) can be implemented here or in the transformation stage, depending on your requirements. The use of [Cloud Dataflow templates](https://cloud.google.com/dataflow/docs/concepts/dataflow-templates) is recommended. +- **Load** Used to load data from the drop off zone to the data warehouse. The load is made with minimal to zero transformation logic (mainly `cast`). Anonymization or tokenization of Personally Identifiable Information (PII) can be implemented here or in the transformation stage, depending on your requirements. The use of [Cloud Dataflow templates](https://cloud.google.com/dataflow/docs/concepts/dataflow-templates) is recommended. When you need to handle workloads from different teams, if strong role separation is needed between them, we suggest to customize the scirpt and have separate `Load` projects. - **Data Warehouse** Several projects distributed across 3 separate layers, to host progressively processed and refined data: - **Landing - Raw data** Structured Data, stored in relevant formats: structured data stored in BigQuery, unstructured data stored on Cloud Storage with additional metadata stored in BigQuery (for example pictures stored in Cloud Storage and analysis of the images for Cloud Vision API stored in BigQuery). - **Curated - Cleansed, aggregated and curated data** - **Confidential - Curated and unencrypted layer** - - **Playground** Temporary tables that Data Analyst may use to perform R&D on data available in other Data Warehouse layers. - **Orchestration** Used to host Cloud Composer, which orchestrates all tasks that move data across layers. -- **Transformation** Used to move data between Data Warehouse layers. We strongly suggest relying on BigQuery Engine to perform the transformations. If BigQuery doesn't have the features needed to perform your transformations, you can use Cloud Dataflow with [Cloud Dataflow templates](https://cloud.google.com/dataflow/docs/concepts/dataflow-templates). This stage can also optionally anonymize or tokenize PII. +- **Transformation** Used to move data between Data Warehouse layers. We strongly suggest relying on BigQuery Engine to perform the transformations. If BigQuery doesn't have the features needed to perform your transformations, you can use Cloud Dataflow with [Cloud Dataflow templates](https://cloud.google.com/dataflow/docs/concepts/dataflow-templates). This stage can also optionally anonymize or tokenize PII. When you need to handle workloads from different teams, if strong role separation is needed between them, we suggest to customize the scirpt and have separate `Tranformation` projects. - **Exposure** Used to host resources that share processed data with external systems. Depending on the access pattern, data can be presented via Cloud SQL, BigQuery, or Bigtable. For BigQuery data, we strongly suggest relying on [Authorized views](https://cloud.google.com/bigquery/docs/authorized-views). ### Roles @@ -80,10 +79,10 @@ We use three groups to control access to resources: The table below shows a high level overview of roles for each group on each project, using `READ`, `WRITE` and `ADMIN` access patterns for simplicity. For detailed roles please refer to the code. -|Group|Drop off|Load|Transformation|DHW Landing|DWH Curated|DWH Confidential|DWH Playground|Orchestration|Common| +|Group|Drop off|Load|Transformation|DHW Landing|DWH Curated|DWH Confidential|Orchestration|Common| |-|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:| -|Data Engineers|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`| -|Data Analysts|-|-|-|-|-|`READ`|`READ`/`WRITE`|-|-| +|Data Engineers|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`| +|Data Analysts|-|-|-|-|-|`READ`|-|-| |Data Security|-|-|-|-|-|-|-|-|`ADMIN`| You can configure groups via the `groups` variable. @@ -109,14 +108,13 @@ In both VPC scenarios, you also need these ranges for Composer: - one /24 for Cloud SQL - one /28 for the GKE control plane -- one /28 for the web server ### Resource naming conventions Resources follow the naming convention described below. - `prefix-layer` for projects -- `prefix-layer-prduct` for resources +- `prefix-layer-product` for resources - `prefix-layer[2]-gcp-product[2]-counter` for services and service accounts ### Encryption @@ -221,7 +219,7 @@ module "data-platform" { prefix = "myprefix" } -# tftest modules=42 resources=316 +# tftest modules=39 resources=286 ``` ## Customizations @@ -247,31 +245,31 @@ You can find examples in the `[demo](./demo)` folder. | name | description | type | required | default | |---|---|:---:|:---:|:---:| | [billing_account_id](variables.tf#L17) | Billing account id. | string | ✓ | | -| [folder_id](variables.tf#L53) | Folder to be used for the networking resources in folders/nnnn format. | string | ✓ | | -| [organization_domain](variables.tf#L98) | Organization domain. | string | ✓ | | -| [prefix](variables.tf#L103) | Prefix used for resource names. | string | ✓ | | -| [composer_config](variables.tf#L22) | Cloud Composer config. | object({…}) | | {…} | -| [data_catalog_tags](variables.tf#L36) | List of Data Catalog Policy tags to be created with optional IAM binging configuration in {tag => {ROLE => [MEMBERS]}} format. | map(map(list(string))) | | {…} | -| [data_force_destroy](variables.tf#L47) | Flag to set 'force_destroy' on data services like BiguQery or Cloud Storage. | bool | | false | -| [groups](variables.tf#L58) | User groups. | map(string) | | {…} | -| [location](variables.tf#L68) | Location used for multi-regional resources. | string | | "eu" | -| [network_config](variables.tf#L74) | Shared VPC network configurations to use. If null networks will be created in projects with preconfigured values. | object({…}) | | null | -| [project_services](variables.tf#L112) | List of core services enabled on all projects. | list(string) | | […] | -| [project_suffix](variables.tf#L123) | Suffix used only for project ids. | string | | null | -| [region](variables.tf#L129) | Region used for regional resources. | string | | "europe-west1" | -| [service_encryption_keys](variables.tf#L135) | Cloud KMS to use to encrypt different services. Key location should match service region. | object({…}) | | null | +| [folder_id](variables.tf#L122) | Folder to be used for the networking resources in folders/nnnn format. | string | ✓ | | +| [organization_domain](variables.tf#L166) | Organization domain. | string | ✓ | | +| [prefix](variables.tf#L171) | Prefix used for resource names. | string | ✓ | | +| [composer_config](variables.tf#L22) | Cloud Composer config. | object({…}) | | {…} | +| [data_catalog_tags](variables.tf#L105) | List of Data Catalog Policy tags to be created with optional IAM binging configuration in {tag => {ROLE => [MEMBERS]}} format. | map(map(list(string))) | | {…} | +| [data_force_destroy](variables.tf#L116) | Flag to set 'force_destroy' on data services like BiguQery or Cloud Storage. | bool | | false | +| [groups](variables.tf#L127) | User groups. | map(string) | | {…} | +| [location](variables.tf#L137) | Location used for multi-regional resources. | string | | "eu" | +| [network_config](variables.tf#L143) | Shared VPC network configurations to use. If null networks will be created in projects with preconfigured values. | object({…}) | | null | +| [project_services](variables.tf#L180) | List of core services enabled on all projects. | list(string) | | […] | +| [project_suffix](variables.tf#L191) | Suffix used only for project ids. | string | | null | +| [region](variables.tf#L197) | Region used for regional resources. | string | | "europe-west1" | +| [service_encryption_keys](variables.tf#L203) | Cloud KMS to use to encrypt different services. Key location should match service region. | object({…}) | | null | ## Outputs | name | description | sensitive | |---|---|:---:| | [bigquery-datasets](outputs.tf#L17) | BigQuery datasets. | | -| [demo_commands](outputs.tf#L28) | Demo commands. | | -| [gcs-buckets](outputs.tf#L41) | GCS buckets. | | -| [kms_keys](outputs.tf#L55) | Cloud MKS keys. | | -| [projects](outputs.tf#L60) | GCP Projects informations. | | -| [vpc_network](outputs.tf#L88) | VPC network. | | -| [vpc_subnet](outputs.tf#L97) | VPC subnetworks. | | +| [demo_commands](outputs.tf#L27) | Demo commands. Relevant only if Composer is deployed. | | +| [gcs-buckets](outputs.tf#L40) | GCS buckets. | | +| [kms_keys](outputs.tf#L53) | Cloud MKS keys. | | +| [projects](outputs.tf#L58) | GCP Projects informations. | | +| [vpc_network](outputs.tf#L84) | VPC network. | | +| [vpc_subnet](outputs.tf#L93) | VPC subnetworks. | | ## TODOs diff --git a/blueprints/data-solutions/data-platform-foundations/images/overview_diagram.png b/blueprints/data-solutions/data-platform-foundations/images/overview_diagram.png index 642c81c2..073ec870 100644 Binary files a/blueprints/data-solutions/data-platform-foundations/images/overview_diagram.png and b/blueprints/data-solutions/data-platform-foundations/images/overview_diagram.png differ diff --git a/blueprints/data-solutions/data-platform-foundations/outputs.tf b/blueprints/data-solutions/data-platform-foundations/outputs.tf index b941776c..2394fe09 100644 --- a/blueprints/data-solutions/data-platform-foundations/outputs.tf +++ b/blueprints/data-solutions/data-platform-foundations/outputs.tf @@ -21,17 +21,16 @@ output "bigquery-datasets" { dwh-landing-bq-0 = module.dwh-lnd-bq-0.dataset_id, dwh-curated-bq-0 = module.dwh-cur-bq-0.dataset_id, dwh-confidential-bq-0 = module.dwh-conf-bq-0.dataset_id, - dwh-plg-bq-0 = module.dwh-plg-bq-0.dataset_id, } } output "demo_commands" { - description = "Demo commands." + description = "Demo commands. Relevant only if Composer is deployed." value = { 01 = "gsutil -i ${module.drop-sa-cs-0.email} cp demo/data/*.csv gs://${module.drop-cs-0.name}" - 02 = "gsutil -i ${module.orch-sa-cmp-0.email} cp demo/data/*.j* gs://${module.orch-cs-0.name}" - 03 = "gsutil -i ${module.orch-sa-cmp-0.email} cp demo/*.py ${google_composer_environment.orch-cmp-0.config[0].dag_gcs_prefix}/" - 04 = "Open ${google_composer_environment.orch-cmp-0.config.0.airflow_uri} and run uploaded DAG." + 02 = try("gsutil -i ${module.orch-sa-cmp-0.email} cp demo/data/*.j* gs://${module.orch-cs-0.name}", "Composer not deployed.") + 03 = try("gsutil -i ${module.orch-sa-cmp-0.email} cp demo/*.py ${google_composer_environment.orch-cmp-0[0].config[0].dag_gcs_prefix}/", "Composer not deployed") + 04 = try("Open ${google_composer_environment.orch-cmp-0[0].config.0.airflow_uri} and run uploaded DAG.", "Composer not deployed") 05 = <object({…}) | ✓ | | 00-bootstrap | | [billing_account](variables.tf#L25) | Billing account id and organization id ('nnnnnnnn' or null). | object({…}) | ✓ | | 00-globals | -| [folder_ids](variables.tf#L65) | Folder to be used for the networking resources in folders/nnnn format. | object({…}) | ✓ | | 01-resman | -| [host_project_ids](variables.tf#L83) | Shared VPC project ids. | object({…}) | ✓ | | 02-networking | -| [organization](variables.tf#L115) | Organization details. | object({…}) | ✓ | | 00-globals | -| [prefix](variables.tf#L131) | Unique prefix used for resource names. Not used for projects if 'project_create' is null. | string | ✓ | | 00-globals | -| [composer_config](variables.tf#L34) | Cloud Composer configuration options. | object({…}) | | {…} | | -| [data_catalog_tags](variables.tf#L48) | List of Data Catalog Policy tags to be created with optional IAM binging configuration in {tag => {ROLE => [MEMBERS]}} format. | map(map(list(string))) | | {…} | | -| [data_force_destroy](variables.tf#L59) | Flag to set 'force_destroy' on data services like BigQery or Cloud Storage. | bool | | false | | -| [groups](variables.tf#L73) | Groups. | map(string) | | {…} | | -| [location](variables.tf#L91) | Location used for multi-regional resources. | string | | "eu" | | -| [network_config_composer](variables.tf#L97) | Network configurations to use for Composer. | object({…}) | | {…} | | -| [outputs_location](variables.tf#L125) | Path where providers, tfvars files, and lists for the following stages are written. Leave empty to disable. | string | | null | | -| [project_services](variables.tf#L137) | List of core services enabled on all projects. | list(string) | | […] | | -| [region](variables.tf#L148) | Region used for regional resources. | string | | "europe-west1" | | -| [service_encryption_keys](variables.tf#L154) | Cloud KMS to use to encrypt different services. Key location should match service region. | object({…}) | | null | | -| [subnet_self_links](variables.tf#L166) | Shared VPC subnet self links. | object({…}) | | null | 02-networking | -| [vpc_self_links](variables.tf#L175) | Shared VPC self links. | object({…}) | | null | 02-networking | +| [folder_ids](variables.tf#L98) | Folder to be used for the networking resources in folders/nnnn format. | object({…}) | ✓ | | 01-resman | +| [host_project_ids](variables.tf#L116) | Shared VPC project ids. | object({…}) | ✓ | | 02-networking | +| [organization](variables.tf#L146) | Organization details. | object({…}) | ✓ | | 00-globals | +| [prefix](variables.tf#L162) | Unique prefix used for resource names. Not used for projects if 'project_create' is null. | string | ✓ | | 00-globals | +| [composer_config](variables.tf#L34) | Cloud Composer configuration options. | object({…}) | | {…} | | +| [data_catalog_tags](variables.tf#L81) | List of Data Catalog Policy tags to be created with optional IAM binging configuration in {tag => {ROLE => [MEMBERS]}} format. | map(map(list(string))) | | {…} | | +| [data_force_destroy](variables.tf#L92) | Flag to set 'force_destroy' on data services like BigQery or Cloud Storage. | bool | | false | | +| [groups](variables.tf#L106) | Groups. | map(string) | | {…} | | +| [location](variables.tf#L124) | Location used for multi-regional resources. | string | | "eu" | | +| [network_config_composer](variables.tf#L130) | Network configurations to use for Composer. | object({…}) | | {…} | | +| [outputs_location](variables.tf#L156) | Path where providers, tfvars files, and lists for the following stages are written. Leave empty to disable. | string | | null | | +| [project_services](variables.tf#L168) | List of core services enabled on all projects. | list(string) | | […] | | +| [region](variables.tf#L179) | Region used for regional resources. | string | | "europe-west1" | | +| [service_encryption_keys](variables.tf#L185) | Cloud KMS to use to encrypt different services. Key location should match service region. | object({…}) | | null | | +| [subnet_self_links](variables.tf#L197) | Shared VPC subnet self links. | object({…}) | | null | 02-networking | +| [vpc_self_links](variables.tf#L206) | Shared VPC self links. | object({…}) | | null | 02-networking | ## Outputs diff --git a/fast/stages/03-data-platform/dev/main.tf b/fast/stages/03-data-platform/dev/main.tf index 24abb58d..53d901d1 100644 --- a/fast/stages/03-data-platform/dev/main.tf +++ b/fast/stages/03-data-platform/dev/main.tf @@ -37,7 +37,6 @@ module "data-platform" { composer_ip_ranges = { cloudsql = var.network_config_composer.cloudsql_range gke_master = var.network_config_composer.gke_master_range - web_server = var.network_config_composer.web_server_range } composer_secondary_ranges = { pods = var.network_config_composer.gke_pods_name diff --git a/fast/stages/03-data-platform/dev/variables.tf b/fast/stages/03-data-platform/dev/variables.tf index 9495316a..29dd1e45 100644 --- a/fast/stages/03-data-platform/dev/variables.tf +++ b/fast/stages/03-data-platform/dev/variables.tf @@ -34,14 +34,47 @@ variable "billing_account" { variable "composer_config" { description = "Cloud Composer configuration options." type = object({ - node_count = number - airflow_version = string - env_variables = map(string) + disable_deployment = optional(bool) + environment_size = string + software_config = object({ + airflow_config_overrides = optional(any) + pypi_packages = optional(any) + env_variables = optional(map(string)) + image_version = string + }) + workloads_config = object({ + scheduler = object( + { + cpu = number + memory_gb = number + storage_gb = number + count = number + } + ) + web_server = object( + { + cpu = number + memory_gb = number + storage_gb = number + } + ) + worker = object( + { + cpu = number + memory_gb = number + storage_gb = number + min_count = number + max_count = number + } + ) + }) }) default = { - node_count = 3 - airflow_version = "composer-1.17.5-airflow-2.1.4" - env_variables = {} + environment_size = "ENVIRONMENT_SIZE_SMALL" + software_config = { + image_version = "composer-2-airflow-2" + } + workloads_config = null } } @@ -101,14 +134,12 @@ variable "network_config_composer" { gke_master_range = string gke_pods_name = string gke_services_name = string - web_server_range = string }) default = { cloudsql_range = "192.168.254.0/24" gke_master_range = "192.168.255.0/28" gke_pods_name = "pods" gke_services_name = "services" - web_server_range = "192.168.255.16/28" } } diff --git a/modules/gke-cluster/README.md b/modules/gke-cluster/README.md index fc2e3b37..caf1fec9 100644 --- a/modules/gke-cluster/README.md +++ b/modules/gke-cluster/README.md @@ -79,7 +79,7 @@ module "cluster-1" { | [location](variables.tf#L117) | Cluster zone or region. | string | ✓ | | | [name](variables.tf#L174) | Cluster name. | string | ✓ | | | [project_id](variables.tf#L200) | Cluster project id. | string | ✓ | | -| [vpc_config](variables.tf#L211) | VPC-level configuration. | object({…}) | ✓ | | +| [vpc_config](variables.tf#L217) | VPC-level configuration. | object({…}) | ✓ | | | [cluster_autoscaling](variables.tf#L17) | Enable and configure limits for Node Auto-Provisioning with Cluster Autoscaler. | object({…}) | | null | | [description](variables.tf#L38) | Cluster description. | string | | null | | [enable_addons](variables.tf#L44) | Addons enabled in the cluster (true means enabled). | object({…}) | | {…} | @@ -94,6 +94,7 @@ module "cluster-1" { | [node_locations](variables.tf#L179) | Zones in which the cluster's nodes are located. | list(string) | | [] | | [private_cluster_config](variables.tf#L186) | Private cluster configuration. | object({…}) | | null | | [release_channel](variables.tf#L205) | Release channel for GKE upgrades. | string | | null | +| [tags](variables.tf#L211) | Network tags applied to nodes. | list(string) | | null | ## Outputs diff --git a/modules/gke-cluster/main.tf b/modules/gke-cluster/main.tf index 5b5cd95f..d529cb42 100644 --- a/modules/gke-cluster/main.tf +++ b/modules/gke-cluster/main.tf @@ -63,6 +63,7 @@ resource "google_container_cluster" "cluster" { enable_integrity_monitoring = true } } + tags = var.tags } diff --git a/modules/gke-cluster/variables.tf b/modules/gke-cluster/variables.tf index 62d871e9..72f7fc14 100644 --- a/modules/gke-cluster/variables.tf +++ b/modules/gke-cluster/variables.tf @@ -208,6 +208,12 @@ variable "release_channel" { default = null } +variable "tags" { + description = "Network tags applied to nodes." + type = list(string) + default = null +} + variable "vpc_config" { description = "VPC-level configuration." type = object({ diff --git a/tests/blueprints/data_solutions/data_platform_foundations/test_plan.py b/tests/blueprints/data_solutions/data_platform_foundations/test_plan.py index 1b51472c..17563647 100644 --- a/tests/blueprints/data_solutions/data_platform_foundations/test_plan.py +++ b/tests/blueprints/data_solutions/data_platform_foundations/test_plan.py @@ -21,5 +21,5 @@ FIXTURES_DIR = os.path.join(os.path.dirname(__file__), 'fixture') def test_resources(e2e_plan_runner): "Test that plan works and the numbers of resources is as expected." modules, resources = e2e_plan_runner(FIXTURES_DIR) - assert len(modules) == 41 - assert len(resources) == 315 + assert len(modules) == 38 + assert len(resources) == 285 diff --git a/tests/modules/gke_cluster/fixture/main.tf b/tests/modules/gke_cluster/fixture/main.tf index 4ac38e16..5e11fbd7 100644 --- a/tests/modules/gke_cluster/fixture/main.tf +++ b/tests/modules/gke_cluster/fixture/main.tf @@ -25,4 +25,5 @@ module "test" { } enable_addons = var.enable_addons enable_features = var.enable_features + tags = var.tags } diff --git a/tests/modules/gke_cluster/fixture/variables.tf b/tests/modules/gke_cluster/fixture/variables.tf index 97fc6a63..2104e452 100644 --- a/tests/modules/gke_cluster/fixture/variables.tf +++ b/tests/modules/gke_cluster/fixture/variables.tf @@ -35,3 +35,9 @@ variable "monitoring_config" { managed_prometheus = true } } + +variable "tags" { + description = "Network tags applied to nodes." + type = list(string) + default = null +} diff --git a/tools/check_documentation.py b/tools/check_documentation.py index 30e76571..47643493 100755 --- a/tools/check_documentation.py +++ b/tools/check_documentation.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -# Copyright 2022 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -128,14 +128,14 @@ def _check_dir(dir_name, exclude_files=None, files=False, show_extra=False): elif nc := [v.name for v in newvars if not v.description.endswith('.')]: state = state.FAIL_VARIABLE_PERIOD diff = "\n".join([ - f'----- {mod_name} variables missing colons -----', + f'----- {mod_name} variable descriptions missing ending period -----', ', '.join(nc), ]) elif nc := [o.name for o in newouts if not o.description.endswith('.')]: state = state.FAIL_VARIABLE_PERIOD diff = "\n".join([ - f'----- {mod_name} outputs missing colons -----', + f'----- {mod_name} output descriptions missing ending period -----', ', '.join(nc), ])