diff --git a/blueprints/data-solutions/data-platform-foundations/03-composer.tf b/blueprints/data-solutions/data-platform-foundations/03-composer.tf index 2622ffa2..33a21408 100644 --- a/blueprints/data-solutions/data-platform-foundations/03-composer.tf +++ b/blueprints/data-solutions/data-platform-foundations/03-composer.tf @@ -14,6 +14,40 @@ # tfdoc:file:description Orchestration Cloud Composer definition. +locals { + env_variables = { + BQ_LOCATION = var.location + DATA_CAT_TAGS = try(jsonencode(module.common-datacatalog.tags), "{}") + DF_KMS_KEY = try(var.service_encryption_keys.dataflow, "") + DRP_PRJ = module.drop-project.project_id + DRP_BQ = module.drop-bq-0.dataset_id + DRP_GCS = module.drop-cs-0.url + DRP_PS = module.drop-ps-0.id + DWH_LAND_PRJ = module.dwh-lnd-project.project_id + DWH_LAND_BQ_DATASET = module.dwh-lnd-bq-0.dataset_id + DWH_LAND_GCS = module.dwh-lnd-cs-0.url + DWH_CURATED_PRJ = module.dwh-cur-project.project_id + DWH_CURATED_BQ_DATASET = module.dwh-cur-bq-0.dataset_id + DWH_CURATED_GCS = module.dwh-cur-cs-0.url + DWH_CONFIDENTIAL_PRJ = module.dwh-conf-project.project_id + DWH_CONFIDENTIAL_BQ_DATASET = module.dwh-conf-bq-0.dataset_id + DWH_CONFIDENTIAL_GCS = module.dwh-conf-cs-0.url + GCP_REGION = var.region + LOD_PRJ = module.load-project.project_id + LOD_GCS_STAGING = module.load-cs-df-0.url + LOD_NET_VPC = local.load_vpc + LOD_NET_SUBNET = local.load_subnet + LOD_SA_DF = module.load-sa-df-0.email + ORC_PRJ = module.orch-project.project_id + ORC_GCS = module.orch-cs-0.url + TRF_PRJ = module.transf-project.project_id + TRF_GCS_STAGING = module.transf-cs-df-0.url + TRF_NET_VPC = local.transf_vpc + TRF_NET_SUBNET = local.transf_subnet + TRF_SA_DF = module.transf-sa-df-0.email + TRF_SA_BQ = module.transf-sa-bq-0.email + } +} module "orch-sa-cmp-0" { source = "../../../modules/iam-service-account" project_id = module.orch-project.project_id @@ -27,21 +61,51 @@ module "orch-sa-cmp-0" { } resource "google_composer_environment" "orch-cmp-0" { - provider = google-beta - project = module.orch-project.project_id - name = "${var.prefix}-orc-cmp-0" - region = var.region + count = var.composer_config.disable_deployment == true ? 0 : 1 + project = module.orch-project.project_id + name = "${var.prefix}-orc-cmp-0" + region = var.region config { - node_count = var.composer_config.node_count + software_config { + airflow_config_overrides = try(var.composer_config.software_config.airflow_config_overrides, null) + pypi_packages = try(var.composer_config.software_config.pypi_packages, null) + env_variables = merge(try(var.composer_config.software_config.env_variables, null), local.env_variables) + image_version = try(var.composer_config.software_config.image_version, null) + } + dynamic "workloads_config" { + for_each = (try(var.composer_config.workloads_config, null) != null ? { 1 = 1 } : {}) + + content { + scheduler { + cpu = try(var.composer_config.workloads_config.scheduler.cpu, null) + memory_gb = try(var.composer_config.workloads_config.scheduler.memory_gb, null) + storage_gb = try(var.composer_config.workloads_config.scheduler.storage_gb, null) + count = try(var.composer_config.workloads_config.scheduler.count, null) + } + web_server { + cpu = try(var.composer_config.workloads_config.web_server.cpu, null) + memory_gb = try(var.composer_config.workloads_config.web_server.memory_gb, null) + storage_gb = try(var.composer_config.workloads_config.web_server.storage_gb, null) + } + worker { + cpu = try(var.composer_config.workloads_config.worker.cpu, null) + memory_gb = try(var.composer_config.workloads_config.worker.memory_gb, null) + storage_gb = try(var.composer_config.workloads_config.worker.storage_gb, null) + min_count = try(var.composer_config.workloads_config.worker.min_count, null) + max_count = try(var.composer_config.workloads_config.worker.max_count, null) + } + } + } + + environment_size = var.composer_config.environment_size + node_config { - zone = "${var.region}-b" - service_account = module.orch-sa-cmp-0.email network = local.orch_vpc subnetwork = local.orch_subnet - tags = ["composer-worker", "http-server", "https-server"] - enable_ip_masq_agent = true + service_account = module.orch-sa-cmp-0.email + enable_ip_masq_agent = "true" + tags = ["composer-worker"] ip_allocation_policy { - use_ip_aliases = "true" cluster_secondary_range_name = try( var.network_config.composer_secondary_ranges.pods, "pods" ) @@ -58,80 +122,20 @@ resource "google_composer_environment" "orch-cmp-0" { master_ipv4_cidr_block = try( var.network_config.composer_ip_ranges.gke_master, "10.20.11.0/28" ) - web_server_ipv4_cidr_block = try( - var.network_config.composer_ip_ranges.web_server, "10.20.11.16/28" - ) } - software_config { - image_version = var.composer_config.airflow_version - env_variables = merge( - var.composer_config.env_variables, { - BQ_LOCATION = var.location - DATA_CAT_TAGS = try(jsonencode(module.common-datacatalog.tags), "{}") - DF_KMS_KEY = try(var.service_encryption_keys.dataflow, "") - DRP_PRJ = module.drop-project.project_id - DRP_BQ = module.drop-bq-0.dataset_id - DRP_GCS = module.drop-cs-0.url - DRP_PS = module.drop-ps-0.id - DWH_LAND_PRJ = module.dwh-lnd-project.project_id - DWH_LAND_BQ_DATASET = module.dwh-lnd-bq-0.dataset_id - DWH_LAND_GCS = module.dwh-lnd-cs-0.url - DWH_CURATED_PRJ = module.dwh-cur-project.project_id - DWH_CURATED_BQ_DATASET = module.dwh-cur-bq-0.dataset_id - DWH_CURATED_GCS = module.dwh-cur-cs-0.url - DWH_CONFIDENTIAL_PRJ = module.dwh-conf-project.project_id - DWH_CONFIDENTIAL_BQ_DATASET = module.dwh-conf-bq-0.dataset_id - DWH_CONFIDENTIAL_GCS = module.dwh-conf-cs-0.url - DWH_PLG_PRJ = module.dwh-plg-project.project_id - DWH_PLG_BQ_DATASET = module.dwh-plg-bq-0.dataset_id - DWH_PLG_GCS = module.dwh-plg-cs-0.url - GCP_REGION = var.region - LOD_PRJ = module.load-project.project_id - LOD_GCS_STAGING = module.load-cs-df-0.url - LOD_NET_VPC = local.load_vpc - LOD_NET_SUBNET = local.load_subnet - LOD_SA_DF = module.load-sa-df-0.email - ORC_PRJ = module.orch-project.project_id - ORC_GCS = module.orch-cs-0.url - TRF_PRJ = module.transf-project.project_id - TRF_GCS_STAGING = module.transf-cs-df-0.url - TRF_NET_VPC = local.transf_vpc - TRF_NET_SUBNET = local.transf_subnet - TRF_SA_DF = module.transf-sa-df-0.email - TRF_SA_BQ = module.transf-sa-bq-0.email - } - ) - } - dynamic "encryption_config" { for_each = ( - try(local.service_encryption_keys.composer != null, false) + try(var.service_encryption_keys[var.region], null) != null ? { 1 = 1 } : {} ) content { - kms_key_name = try(local.service_encryption_keys.composer, null) + kms_key_name = try(var.service_encryption_keys[var.region], null) } } - - # dynamic "web_server_network_access_control" { - # for_each = toset( - # var.network_config.web_server_network_access_control == null - # ? [] - # : [var.network_config.web_server_network_access_control] - # ) - # content { - # dynamic "allowed_ip_range" { - # for_each = toset(web_server_network_access_control.key) - # content { - # value = allowed_ip_range.key - # } - # } - # } - # } - } depends_on = [ google_project_iam_member.shared_vpc, + module.orch-project ] } diff --git a/blueprints/data-solutions/data-platform-foundations/03-orchestration.tf b/blueprints/data-solutions/data-platform-foundations/03-orchestration.tf index 2974c122..8e2d0725 100644 --- a/blueprints/data-solutions/data-platform-foundations/03-orchestration.tf +++ b/blueprints/data-solutions/data-platform-foundations/03-orchestration.tf @@ -54,6 +54,9 @@ module "orch-project" { "roles/bigquery.jobUser" = [ module.orch-sa-cmp-0.iam_email, ] + "roles/composer.ServiceAgentV2Ext" = [ + "serviceAccount:${module.orch-project.service_accounts.robots.composer}" + ] "roles/composer.worker" = [ module.orch-sa-cmp-0.iam_email ] @@ -67,11 +70,6 @@ module "orch-project" { "roles/storage.objectViewer" = [module.load-sa-df-0.iam_email] } oslogin = false - org_policies = { - "constraints/compute.requireOsLogin" = { - enforce = false - } - } services = concat(var.project_services, [ "artifactregistry.googleapis.com", "bigquery.googleapis.com", diff --git a/blueprints/data-solutions/data-platform-foundations/05-datawarehouse.tf b/blueprints/data-solutions/data-platform-foundations/05-datawarehouse.tf index 879a0e0b..0db5ce44 100644 --- a/blueprints/data-solutions/data-platform-foundations/05-datawarehouse.tf +++ b/blueprints/data-solutions/data-platform-foundations/05-datawarehouse.tf @@ -30,21 +30,6 @@ locals { "roles/storage.objectViewer", ] } - dwh_plg_group_iam = { - (local.groups.data-engineers) = [ - "roles/bigquery.dataEditor", - "roles/storage.admin", - ], - (local.groups.data-analysts) = [ - "roles/bigquery.dataEditor", - "roles/bigquery.jobUser", - "roles/bigquery.metadataViewer", - "roles/bigquery.user", - "roles/datacatalog.viewer", - "roles/datacatalog.tagTemplateViewer", - "roles/storage.objectAdmin", - ] - } dwh_lnd_iam = { "roles/bigquery.dataOwner" = [ module.load-sa-df-0.iam_email, @@ -140,21 +125,6 @@ module "dwh-conf-project" { } } -module "dwh-plg-project" { - source = "../../../modules/project" - parent = var.folder_id - billing_account = var.billing_account_id - prefix = var.prefix - name = "dwh-plg${local.project_suffix}" - group_iam = local.dwh_plg_group_iam - iam = {} - services = local.dwh_services - service_encryption_key_ids = { - bq = [try(local.service_encryption_keys.bq, null)] - storage = [try(local.service_encryption_keys.storage, null)] - } -} - # Bigquery module "dwh-lnd-bq-0" { @@ -181,14 +151,6 @@ module "dwh-conf-bq-0" { encryption_key = try(local.service_encryption_keys.bq, null) } -module "dwh-plg-bq-0" { - source = "../../../modules/bigquery-dataset" - project_id = module.dwh-plg-project.project_id - id = "${replace(var.prefix, "-", "_")}_dwh_plg_bq_0" - location = var.location - encryption_key = try(local.service_encryption_keys.bq, null) -} - # Cloud storage module "dwh-lnd-cs-0" { @@ -223,14 +185,3 @@ module "dwh-conf-cs-0" { encryption_key = try(local.service_encryption_keys.storage, null) force_destroy = var.data_force_destroy } - -module "dwh-plg-cs-0" { - source = "../../../modules/gcs" - project_id = module.dwh-plg-project.project_id - prefix = var.prefix - name = "dwh-plg-cs-0" - location = var.location - storage_class = "MULTI_REGIONAL" - encryption_key = try(local.service_encryption_keys.storage, null) - force_destroy = var.data_force_destroy -} diff --git a/blueprints/data-solutions/data-platform-foundations/IAM.md b/blueprints/data-solutions/data-platform-foundations/IAM.md index 54d35939..5a1995da 100644 --- a/blueprints/data-solutions/data-platform-foundations/IAM.md +++ b/blueprints/data-solutions/data-platform-foundations/IAM.md @@ -57,14 +57,6 @@ Legend: + additive, conditional. |trf-bq-0
serviceAccount|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner)
[roles/datacatalog.categoryAdmin](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryAdmin) | |trf-df-0
serviceAccount|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) | -## Project dwh-plg - -| members | roles | -|---|---| -|gcp-data-analysts
group|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/bigquery.metadataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.metadataViewer)
[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user)
[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer)
[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer)
[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) | -|gcp-data-engineers
group|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) | -|SERVICE_IDENTITY_service-networking
serviceAccount|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) +| - ## Project lod | members | roles | @@ -80,7 +72,7 @@ Legend: + additive, conditional. | members | roles | |---|---| |gcp-data-engineers
group|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/cloudbuild.builds.editor](https://cloud.google.com/iam/docs/understanding-roles#cloudbuild.builds.editor)
[roles/composer.admin](https://cloud.google.com/iam/docs/understanding-roles#composer.admin)
[roles/composer.environmentAndStorageObjectAdmin](https://cloud.google.com/iam/docs/understanding-roles#composer.environmentAndStorageObjectAdmin)
[roles/iam.serviceAccountUser](https://cloud.google.com/iam/docs/understanding-roles#iam.serviceAccountUser)
[roles/iap.httpsResourceAccessor](https://cloud.google.com/iam/docs/understanding-roles#iap.httpsResourceAccessor)
[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin)
[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) | -|SERVICE_IDENTITY_cloudcomposer-accounts
serviceAccount|[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) | +|SERVICE_IDENTITY_cloudcomposer-accounts
serviceAccount|[roles/composer.ServiceAgentV2Ext](https://cloud.google.com/iam/docs/understanding-roles#composer.ServiceAgentV2Ext)
[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) | |SERVICE_IDENTITY_service-networking
serviceAccount|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) +| |load-df-0
serviceAccount|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) | |orc-cmp-0
serviceAccount|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/composer.worker](https://cloud.google.com/iam/docs/understanding-roles#composer.worker)
[roles/iam.serviceAccountUser](https://cloud.google.com/iam/docs/understanding-roles#iam.serviceAccountUser)
[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) | diff --git a/blueprints/data-solutions/data-platform-foundations/README.md b/blueprints/data-solutions/data-platform-foundations/README.md index 8da143b2..ed28eba6 100644 --- a/blueprints/data-solutions/data-platform-foundations/README.md +++ b/blueprints/data-solutions/data-platform-foundations/README.md @@ -21,7 +21,7 @@ The approach adapts to different high-level requirements: - least privilege principle - rely on service account impersonation -The code in this blueprint doesn't address Organization-level configurations (Organization policy, VPC-SC, centralized logs). We expect those elements to be managed by automation stages external to this script like those in [FAST](../../../fast). +The code in this blueprint doesn't address Organization-level configurations (Organization policy, VPC-SC, centralized logs). We expect those elements to be managed by automation stages external to this script like those in [FAST](../../../fast) and this blueprint deployed on top of them as one of the [stages](../../../fast/stages/03-data-platform/dev/README.md). ### Project structure @@ -39,14 +39,13 @@ This separation into projects allows adhering to the least-privilege principle b The script will create the following projects: - **Drop off** Used to store temporary data. Data is pushed to Cloud Storage, BigQuery, or Cloud PubSub. Resources are configured with a customizable lifecycle policy. -- **Load** Used to load data from the drop off zone to the data warehouse. The load is made with minimal to zero transformation logic (mainly `cast`). Anonymization or tokenization of Personally Identifiable Information (PII) can be implemented here or in the transformation stage, depending on your requirements. The use of [Cloud Dataflow templates](https://cloud.google.com/dataflow/docs/concepts/dataflow-templates) is recommended. +- **Load** Used to load data from the drop off zone to the data warehouse. The load is made with minimal to zero transformation logic (mainly `cast`). Anonymization or tokenization of Personally Identifiable Information (PII) can be implemented here or in the transformation stage, depending on your requirements. The use of [Cloud Dataflow templates](https://cloud.google.com/dataflow/docs/concepts/dataflow-templates) is recommended. When you need to handle workloads from different teams, if strong role separation is needed between them, we suggest to customize the scirpt and have separate `Load` projects. - **Data Warehouse** Several projects distributed across 3 separate layers, to host progressively processed and refined data: - **Landing - Raw data** Structured Data, stored in relevant formats: structured data stored in BigQuery, unstructured data stored on Cloud Storage with additional metadata stored in BigQuery (for example pictures stored in Cloud Storage and analysis of the images for Cloud Vision API stored in BigQuery). - **Curated - Cleansed, aggregated and curated data** - **Confidential - Curated and unencrypted layer** - - **Playground** Temporary tables that Data Analyst may use to perform R&D on data available in other Data Warehouse layers. - **Orchestration** Used to host Cloud Composer, which orchestrates all tasks that move data across layers. -- **Transformation** Used to move data between Data Warehouse layers. We strongly suggest relying on BigQuery Engine to perform the transformations. If BigQuery doesn't have the features needed to perform your transformations, you can use Cloud Dataflow with [Cloud Dataflow templates](https://cloud.google.com/dataflow/docs/concepts/dataflow-templates). This stage can also optionally anonymize or tokenize PII. +- **Transformation** Used to move data between Data Warehouse layers. We strongly suggest relying on BigQuery Engine to perform the transformations. If BigQuery doesn't have the features needed to perform your transformations, you can use Cloud Dataflow with [Cloud Dataflow templates](https://cloud.google.com/dataflow/docs/concepts/dataflow-templates). This stage can also optionally anonymize or tokenize PII. When you need to handle workloads from different teams, if strong role separation is needed between them, we suggest to customize the scirpt and have separate `Tranformation` projects. - **Exposure** Used to host resources that share processed data with external systems. Depending on the access pattern, data can be presented via Cloud SQL, BigQuery, or Bigtable. For BigQuery data, we strongly suggest relying on [Authorized views](https://cloud.google.com/bigquery/docs/authorized-views). ### Roles @@ -80,10 +79,10 @@ We use three groups to control access to resources: The table below shows a high level overview of roles for each group on each project, using `READ`, `WRITE` and `ADMIN` access patterns for simplicity. For detailed roles please refer to the code. -|Group|Drop off|Load|Transformation|DHW Landing|DWH Curated|DWH Confidential|DWH Playground|Orchestration|Common| +|Group|Drop off|Load|Transformation|DHW Landing|DWH Curated|DWH Confidential|Orchestration|Common| |-|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:| -|Data Engineers|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`| -|Data Analysts|-|-|-|-|-|`READ`|`READ`/`WRITE`|-|-| +|Data Engineers|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`| +|Data Analysts|-|-|-|-|-|`READ`|-|-| |Data Security|-|-|-|-|-|-|-|-|`ADMIN`| You can configure groups via the `groups` variable. @@ -109,14 +108,13 @@ In both VPC scenarios, you also need these ranges for Composer: - one /24 for Cloud SQL - one /28 for the GKE control plane -- one /28 for the web server ### Resource naming conventions Resources follow the naming convention described below. - `prefix-layer` for projects -- `prefix-layer-prduct` for resources +- `prefix-layer-product` for resources - `prefix-layer[2]-gcp-product[2]-counter` for services and service accounts ### Encryption @@ -221,7 +219,7 @@ module "data-platform" { prefix = "myprefix" } -# tftest modules=42 resources=316 +# tftest modules=39 resources=286 ``` ## Customizations @@ -247,31 +245,31 @@ You can find examples in the `[demo](./demo)` folder. | name | description | type | required | default | |---|---|:---:|:---:|:---:| | [billing_account_id](variables.tf#L17) | Billing account id. | string | ✓ | | -| [folder_id](variables.tf#L53) | Folder to be used for the networking resources in folders/nnnn format. | string | ✓ | | -| [organization_domain](variables.tf#L98) | Organization domain. | string | ✓ | | -| [prefix](variables.tf#L103) | Prefix used for resource names. | string | ✓ | | -| [composer_config](variables.tf#L22) | Cloud Composer config. | object({…}) | | {…} | -| [data_catalog_tags](variables.tf#L36) | List of Data Catalog Policy tags to be created with optional IAM binging configuration in {tag => {ROLE => [MEMBERS]}} format. | map(map(list(string))) | | {…} | -| [data_force_destroy](variables.tf#L47) | Flag to set 'force_destroy' on data services like BiguQery or Cloud Storage. | bool | | false | -| [groups](variables.tf#L58) | User groups. | map(string) | | {…} | -| [location](variables.tf#L68) | Location used for multi-regional resources. | string | | "eu" | -| [network_config](variables.tf#L74) | Shared VPC network configurations to use. If null networks will be created in projects with preconfigured values. | object({…}) | | null | -| [project_services](variables.tf#L112) | List of core services enabled on all projects. | list(string) | | […] | -| [project_suffix](variables.tf#L123) | Suffix used only for project ids. | string | | null | -| [region](variables.tf#L129) | Region used for regional resources. | string | | "europe-west1" | -| [service_encryption_keys](variables.tf#L135) | Cloud KMS to use to encrypt different services. Key location should match service region. | object({…}) | | null | +| [folder_id](variables.tf#L122) | Folder to be used for the networking resources in folders/nnnn format. | string | ✓ | | +| [organization_domain](variables.tf#L166) | Organization domain. | string | ✓ | | +| [prefix](variables.tf#L171) | Prefix used for resource names. | string | ✓ | | +| [composer_config](variables.tf#L22) | Cloud Composer config. | object({…}) | | {…} | +| [data_catalog_tags](variables.tf#L105) | List of Data Catalog Policy tags to be created with optional IAM binging configuration in {tag => {ROLE => [MEMBERS]}} format. | map(map(list(string))) | | {…} | +| [data_force_destroy](variables.tf#L116) | Flag to set 'force_destroy' on data services like BiguQery or Cloud Storage. | bool | | false | +| [groups](variables.tf#L127) | User groups. | map(string) | | {…} | +| [location](variables.tf#L137) | Location used for multi-regional resources. | string | | "eu" | +| [network_config](variables.tf#L143) | Shared VPC network configurations to use. If null networks will be created in projects with preconfigured values. | object({…}) | | null | +| [project_services](variables.tf#L180) | List of core services enabled on all projects. | list(string) | | […] | +| [project_suffix](variables.tf#L191) | Suffix used only for project ids. | string | | null | +| [region](variables.tf#L197) | Region used for regional resources. | string | | "europe-west1" | +| [service_encryption_keys](variables.tf#L203) | Cloud KMS to use to encrypt different services. Key location should match service region. | object({…}) | | null | ## Outputs | name | description | sensitive | |---|---|:---:| | [bigquery-datasets](outputs.tf#L17) | BigQuery datasets. | | -| [demo_commands](outputs.tf#L28) | Demo commands. | | -| [gcs-buckets](outputs.tf#L41) | GCS buckets. | | -| [kms_keys](outputs.tf#L55) | Cloud MKS keys. | | -| [projects](outputs.tf#L60) | GCP Projects informations. | | -| [vpc_network](outputs.tf#L88) | VPC network. | | -| [vpc_subnet](outputs.tf#L97) | VPC subnetworks. | | +| [demo_commands](outputs.tf#L27) | Demo commands. Relevant only if Composer is deployed. | | +| [gcs-buckets](outputs.tf#L40) | GCS buckets. | | +| [kms_keys](outputs.tf#L53) | Cloud MKS keys. | | +| [projects](outputs.tf#L58) | GCP Projects informations. | | +| [vpc_network](outputs.tf#L84) | VPC network. | | +| [vpc_subnet](outputs.tf#L93) | VPC subnetworks. | | ## TODOs diff --git a/blueprints/data-solutions/data-platform-foundations/images/overview_diagram.png b/blueprints/data-solutions/data-platform-foundations/images/overview_diagram.png index 642c81c2..073ec870 100644 Binary files a/blueprints/data-solutions/data-platform-foundations/images/overview_diagram.png and b/blueprints/data-solutions/data-platform-foundations/images/overview_diagram.png differ diff --git a/blueprints/data-solutions/data-platform-foundations/outputs.tf b/blueprints/data-solutions/data-platform-foundations/outputs.tf index b941776c..2394fe09 100644 --- a/blueprints/data-solutions/data-platform-foundations/outputs.tf +++ b/blueprints/data-solutions/data-platform-foundations/outputs.tf @@ -21,17 +21,16 @@ output "bigquery-datasets" { dwh-landing-bq-0 = module.dwh-lnd-bq-0.dataset_id, dwh-curated-bq-0 = module.dwh-cur-bq-0.dataset_id, dwh-confidential-bq-0 = module.dwh-conf-bq-0.dataset_id, - dwh-plg-bq-0 = module.dwh-plg-bq-0.dataset_id, } } output "demo_commands" { - description = "Demo commands." + description = "Demo commands. Relevant only if Composer is deployed." value = { 01 = "gsutil -i ${module.drop-sa-cs-0.email} cp demo/data/*.csv gs://${module.drop-cs-0.name}" - 02 = "gsutil -i ${module.orch-sa-cmp-0.email} cp demo/data/*.j* gs://${module.orch-cs-0.name}" - 03 = "gsutil -i ${module.orch-sa-cmp-0.email} cp demo/*.py ${google_composer_environment.orch-cmp-0.config[0].dag_gcs_prefix}/" - 04 = "Open ${google_composer_environment.orch-cmp-0.config.0.airflow_uri} and run uploaded DAG." + 02 = try("gsutil -i ${module.orch-sa-cmp-0.email} cp demo/data/*.j* gs://${module.orch-cs-0.name}", "Composer not deployed.") + 03 = try("gsutil -i ${module.orch-sa-cmp-0.email} cp demo/*.py ${google_composer_environment.orch-cmp-0[0].config[0].dag_gcs_prefix}/", "Composer not deployed") + 04 = try("Open ${google_composer_environment.orch-cmp-0[0].config.0.airflow_uri} and run uploaded DAG.", "Composer not deployed") 05 = <object({…}) | ✓ | | 00-bootstrap | | [billing_account](variables.tf#L25) | Billing account id and organization id ('nnnnnnnn' or null). | object({…}) | ✓ | | 00-globals | -| [folder_ids](variables.tf#L65) | Folder to be used for the networking resources in folders/nnnn format. | object({…}) | ✓ | | 01-resman | -| [host_project_ids](variables.tf#L83) | Shared VPC project ids. | object({…}) | ✓ | | 02-networking | -| [organization](variables.tf#L115) | Organization details. | object({…}) | ✓ | | 00-globals | -| [prefix](variables.tf#L131) | Unique prefix used for resource names. Not used for projects if 'project_create' is null. | string | ✓ | | 00-globals | -| [composer_config](variables.tf#L34) | Cloud Composer configuration options. | object({…}) | | {…} | | -| [data_catalog_tags](variables.tf#L48) | List of Data Catalog Policy tags to be created with optional IAM binging configuration in {tag => {ROLE => [MEMBERS]}} format. | map(map(list(string))) | | {…} | | -| [data_force_destroy](variables.tf#L59) | Flag to set 'force_destroy' on data services like BigQery or Cloud Storage. | bool | | false | | -| [groups](variables.tf#L73) | Groups. | map(string) | | {…} | | -| [location](variables.tf#L91) | Location used for multi-regional resources. | string | | "eu" | | -| [network_config_composer](variables.tf#L97) | Network configurations to use for Composer. | object({…}) | | {…} | | -| [outputs_location](variables.tf#L125) | Path where providers, tfvars files, and lists for the following stages are written. Leave empty to disable. | string | | null | | -| [project_services](variables.tf#L137) | List of core services enabled on all projects. | list(string) | | […] | | -| [region](variables.tf#L148) | Region used for regional resources. | string | | "europe-west1" | | -| [service_encryption_keys](variables.tf#L154) | Cloud KMS to use to encrypt different services. Key location should match service region. | object({…}) | | null | | -| [subnet_self_links](variables.tf#L166) | Shared VPC subnet self links. | object({…}) | | null | 02-networking | -| [vpc_self_links](variables.tf#L175) | Shared VPC self links. | object({…}) | | null | 02-networking | +| [folder_ids](variables.tf#L98) | Folder to be used for the networking resources in folders/nnnn format. | object({…}) | ✓ | | 01-resman | +| [host_project_ids](variables.tf#L116) | Shared VPC project ids. | object({…}) | ✓ | | 02-networking | +| [organization](variables.tf#L146) | Organization details. | object({…}) | ✓ | | 00-globals | +| [prefix](variables.tf#L162) | Unique prefix used for resource names. Not used for projects if 'project_create' is null. | string | ✓ | | 00-globals | +| [composer_config](variables.tf#L34) | Cloud Composer configuration options. | object({…}) | | {…} | | +| [data_catalog_tags](variables.tf#L81) | List of Data Catalog Policy tags to be created with optional IAM binging configuration in {tag => {ROLE => [MEMBERS]}} format. | map(map(list(string))) | | {…} | | +| [data_force_destroy](variables.tf#L92) | Flag to set 'force_destroy' on data services like BigQery or Cloud Storage. | bool | | false | | +| [groups](variables.tf#L106) | Groups. | map(string) | | {…} | | +| [location](variables.tf#L124) | Location used for multi-regional resources. | string | | "eu" | | +| [network_config_composer](variables.tf#L130) | Network configurations to use for Composer. | object({…}) | | {…} | | +| [outputs_location](variables.tf#L156) | Path where providers, tfvars files, and lists for the following stages are written. Leave empty to disable. | string | | null | | +| [project_services](variables.tf#L168) | List of core services enabled on all projects. | list(string) | | […] | | +| [region](variables.tf#L179) | Region used for regional resources. | string | | "europe-west1" | | +| [service_encryption_keys](variables.tf#L185) | Cloud KMS to use to encrypt different services. Key location should match service region. | object({…}) | | null | | +| [subnet_self_links](variables.tf#L197) | Shared VPC subnet self links. | object({…}) | | null | 02-networking | +| [vpc_self_links](variables.tf#L206) | Shared VPC self links. | object({…}) | | null | 02-networking | ## Outputs diff --git a/fast/stages/03-data-platform/dev/main.tf b/fast/stages/03-data-platform/dev/main.tf index 24abb58d..53d901d1 100644 --- a/fast/stages/03-data-platform/dev/main.tf +++ b/fast/stages/03-data-platform/dev/main.tf @@ -37,7 +37,6 @@ module "data-platform" { composer_ip_ranges = { cloudsql = var.network_config_composer.cloudsql_range gke_master = var.network_config_composer.gke_master_range - web_server = var.network_config_composer.web_server_range } composer_secondary_ranges = { pods = var.network_config_composer.gke_pods_name diff --git a/fast/stages/03-data-platform/dev/variables.tf b/fast/stages/03-data-platform/dev/variables.tf index 9495316a..29dd1e45 100644 --- a/fast/stages/03-data-platform/dev/variables.tf +++ b/fast/stages/03-data-platform/dev/variables.tf @@ -34,14 +34,47 @@ variable "billing_account" { variable "composer_config" { description = "Cloud Composer configuration options." type = object({ - node_count = number - airflow_version = string - env_variables = map(string) + disable_deployment = optional(bool) + environment_size = string + software_config = object({ + airflow_config_overrides = optional(any) + pypi_packages = optional(any) + env_variables = optional(map(string)) + image_version = string + }) + workloads_config = object({ + scheduler = object( + { + cpu = number + memory_gb = number + storage_gb = number + count = number + } + ) + web_server = object( + { + cpu = number + memory_gb = number + storage_gb = number + } + ) + worker = object( + { + cpu = number + memory_gb = number + storage_gb = number + min_count = number + max_count = number + } + ) + }) }) default = { - node_count = 3 - airflow_version = "composer-1.17.5-airflow-2.1.4" - env_variables = {} + environment_size = "ENVIRONMENT_SIZE_SMALL" + software_config = { + image_version = "composer-2-airflow-2" + } + workloads_config = null } } @@ -101,14 +134,12 @@ variable "network_config_composer" { gke_master_range = string gke_pods_name = string gke_services_name = string - web_server_range = string }) default = { cloudsql_range = "192.168.254.0/24" gke_master_range = "192.168.255.0/28" gke_pods_name = "pods" gke_services_name = "services" - web_server_range = "192.168.255.16/28" } } diff --git a/tests/blueprints/data_solutions/data_platform_foundations/test_plan.py b/tests/blueprints/data_solutions/data_platform_foundations/test_plan.py index 1b51472c..17563647 100644 --- a/tests/blueprints/data_solutions/data_platform_foundations/test_plan.py +++ b/tests/blueprints/data_solutions/data_platform_foundations/test_plan.py @@ -21,5 +21,5 @@ FIXTURES_DIR = os.path.join(os.path.dirname(__file__), 'fixture') def test_resources(e2e_plan_runner): "Test that plan works and the numbers of resources is as expected." modules, resources = e2e_plan_runner(FIXTURES_DIR) - assert len(modules) == 41 - assert len(resources) == 315 + assert len(modules) == 38 + assert len(resources) == 285