Merge pull request #1089 from GoogleCloudPlatform/lcaggio/dp-20230111
Update Data Platform
This commit is contained in:
commit
6e5922eca8
|
@ -14,6 +14,40 @@
|
|||
|
||||
# tfdoc:file:description Orchestration Cloud Composer definition.
|
||||
|
||||
locals {
|
||||
env_variables = {
|
||||
BQ_LOCATION = var.location
|
||||
DATA_CAT_TAGS = try(jsonencode(module.common-datacatalog.tags), "{}")
|
||||
DF_KMS_KEY = try(var.service_encryption_keys.dataflow, "")
|
||||
DRP_PRJ = module.drop-project.project_id
|
||||
DRP_BQ = module.drop-bq-0.dataset_id
|
||||
DRP_GCS = module.drop-cs-0.url
|
||||
DRP_PS = module.drop-ps-0.id
|
||||
DWH_LAND_PRJ = module.dwh-lnd-project.project_id
|
||||
DWH_LAND_BQ_DATASET = module.dwh-lnd-bq-0.dataset_id
|
||||
DWH_LAND_GCS = module.dwh-lnd-cs-0.url
|
||||
DWH_CURATED_PRJ = module.dwh-cur-project.project_id
|
||||
DWH_CURATED_BQ_DATASET = module.dwh-cur-bq-0.dataset_id
|
||||
DWH_CURATED_GCS = module.dwh-cur-cs-0.url
|
||||
DWH_CONFIDENTIAL_PRJ = module.dwh-conf-project.project_id
|
||||
DWH_CONFIDENTIAL_BQ_DATASET = module.dwh-conf-bq-0.dataset_id
|
||||
DWH_CONFIDENTIAL_GCS = module.dwh-conf-cs-0.url
|
||||
GCP_REGION = var.region
|
||||
LOD_PRJ = module.load-project.project_id
|
||||
LOD_GCS_STAGING = module.load-cs-df-0.url
|
||||
LOD_NET_VPC = local.load_vpc
|
||||
LOD_NET_SUBNET = local.load_subnet
|
||||
LOD_SA_DF = module.load-sa-df-0.email
|
||||
ORC_PRJ = module.orch-project.project_id
|
||||
ORC_GCS = module.orch-cs-0.url
|
||||
TRF_PRJ = module.transf-project.project_id
|
||||
TRF_GCS_STAGING = module.transf-cs-df-0.url
|
||||
TRF_NET_VPC = local.transf_vpc
|
||||
TRF_NET_SUBNET = local.transf_subnet
|
||||
TRF_SA_DF = module.transf-sa-df-0.email
|
||||
TRF_SA_BQ = module.transf-sa-bq-0.email
|
||||
}
|
||||
}
|
||||
module "orch-sa-cmp-0" {
|
||||
source = "../../../modules/iam-service-account"
|
||||
project_id = module.orch-project.project_id
|
||||
|
@ -27,21 +61,51 @@ module "orch-sa-cmp-0" {
|
|||
}
|
||||
|
||||
resource "google_composer_environment" "orch-cmp-0" {
|
||||
provider = google-beta
|
||||
project = module.orch-project.project_id
|
||||
name = "${var.prefix}-orc-cmp-0"
|
||||
region = var.region
|
||||
count = var.composer_config.disable_deployment == true ? 0 : 1
|
||||
project = module.orch-project.project_id
|
||||
name = "${var.prefix}-orc-cmp-0"
|
||||
region = var.region
|
||||
config {
|
||||
node_count = var.composer_config.node_count
|
||||
software_config {
|
||||
airflow_config_overrides = try(var.composer_config.software_config.airflow_config_overrides, null)
|
||||
pypi_packages = try(var.composer_config.software_config.pypi_packages, null)
|
||||
env_variables = merge(try(var.composer_config.software_config.env_variables, null), local.env_variables)
|
||||
image_version = try(var.composer_config.software_config.image_version, null)
|
||||
}
|
||||
dynamic "workloads_config" {
|
||||
for_each = (try(var.composer_config.workloads_config, null) != null ? { 1 = 1 } : {})
|
||||
|
||||
content {
|
||||
scheduler {
|
||||
cpu = try(var.composer_config.workloads_config.scheduler.cpu, null)
|
||||
memory_gb = try(var.composer_config.workloads_config.scheduler.memory_gb, null)
|
||||
storage_gb = try(var.composer_config.workloads_config.scheduler.storage_gb, null)
|
||||
count = try(var.composer_config.workloads_config.scheduler.count, null)
|
||||
}
|
||||
web_server {
|
||||
cpu = try(var.composer_config.workloads_config.web_server.cpu, null)
|
||||
memory_gb = try(var.composer_config.workloads_config.web_server.memory_gb, null)
|
||||
storage_gb = try(var.composer_config.workloads_config.web_server.storage_gb, null)
|
||||
}
|
||||
worker {
|
||||
cpu = try(var.composer_config.workloads_config.worker.cpu, null)
|
||||
memory_gb = try(var.composer_config.workloads_config.worker.memory_gb, null)
|
||||
storage_gb = try(var.composer_config.workloads_config.worker.storage_gb, null)
|
||||
min_count = try(var.composer_config.workloads_config.worker.min_count, null)
|
||||
max_count = try(var.composer_config.workloads_config.worker.max_count, null)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
environment_size = var.composer_config.environment_size
|
||||
|
||||
node_config {
|
||||
zone = "${var.region}-b"
|
||||
service_account = module.orch-sa-cmp-0.email
|
||||
network = local.orch_vpc
|
||||
subnetwork = local.orch_subnet
|
||||
tags = ["composer-worker", "http-server", "https-server"]
|
||||
enable_ip_masq_agent = true
|
||||
service_account = module.orch-sa-cmp-0.email
|
||||
enable_ip_masq_agent = "true"
|
||||
tags = ["composer-worker"]
|
||||
ip_allocation_policy {
|
||||
use_ip_aliases = "true"
|
||||
cluster_secondary_range_name = try(
|
||||
var.network_config.composer_secondary_ranges.pods, "pods"
|
||||
)
|
||||
|
@ -58,80 +122,20 @@ resource "google_composer_environment" "orch-cmp-0" {
|
|||
master_ipv4_cidr_block = try(
|
||||
var.network_config.composer_ip_ranges.gke_master, "10.20.11.0/28"
|
||||
)
|
||||
web_server_ipv4_cidr_block = try(
|
||||
var.network_config.composer_ip_ranges.web_server, "10.20.11.16/28"
|
||||
)
|
||||
}
|
||||
software_config {
|
||||
image_version = var.composer_config.airflow_version
|
||||
env_variables = merge(
|
||||
var.composer_config.env_variables, {
|
||||
BQ_LOCATION = var.location
|
||||
DATA_CAT_TAGS = try(jsonencode(module.common-datacatalog.tags), "{}")
|
||||
DF_KMS_KEY = try(var.service_encryption_keys.dataflow, "")
|
||||
DRP_PRJ = module.drop-project.project_id
|
||||
DRP_BQ = module.drop-bq-0.dataset_id
|
||||
DRP_GCS = module.drop-cs-0.url
|
||||
DRP_PS = module.drop-ps-0.id
|
||||
DWH_LAND_PRJ = module.dwh-lnd-project.project_id
|
||||
DWH_LAND_BQ_DATASET = module.dwh-lnd-bq-0.dataset_id
|
||||
DWH_LAND_GCS = module.dwh-lnd-cs-0.url
|
||||
DWH_CURATED_PRJ = module.dwh-cur-project.project_id
|
||||
DWH_CURATED_BQ_DATASET = module.dwh-cur-bq-0.dataset_id
|
||||
DWH_CURATED_GCS = module.dwh-cur-cs-0.url
|
||||
DWH_CONFIDENTIAL_PRJ = module.dwh-conf-project.project_id
|
||||
DWH_CONFIDENTIAL_BQ_DATASET = module.dwh-conf-bq-0.dataset_id
|
||||
DWH_CONFIDENTIAL_GCS = module.dwh-conf-cs-0.url
|
||||
DWH_PLG_PRJ = module.dwh-plg-project.project_id
|
||||
DWH_PLG_BQ_DATASET = module.dwh-plg-bq-0.dataset_id
|
||||
DWH_PLG_GCS = module.dwh-plg-cs-0.url
|
||||
GCP_REGION = var.region
|
||||
LOD_PRJ = module.load-project.project_id
|
||||
LOD_GCS_STAGING = module.load-cs-df-0.url
|
||||
LOD_NET_VPC = local.load_vpc
|
||||
LOD_NET_SUBNET = local.load_subnet
|
||||
LOD_SA_DF = module.load-sa-df-0.email
|
||||
ORC_PRJ = module.orch-project.project_id
|
||||
ORC_GCS = module.orch-cs-0.url
|
||||
TRF_PRJ = module.transf-project.project_id
|
||||
TRF_GCS_STAGING = module.transf-cs-df-0.url
|
||||
TRF_NET_VPC = local.transf_vpc
|
||||
TRF_NET_SUBNET = local.transf_subnet
|
||||
TRF_SA_DF = module.transf-sa-df-0.email
|
||||
TRF_SA_BQ = module.transf-sa-bq-0.email
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
dynamic "encryption_config" {
|
||||
for_each = (
|
||||
try(local.service_encryption_keys.composer != null, false)
|
||||
try(var.service_encryption_keys[var.region], null) != null
|
||||
? { 1 = 1 }
|
||||
: {}
|
||||
)
|
||||
content {
|
||||
kms_key_name = try(local.service_encryption_keys.composer, null)
|
||||
kms_key_name = try(var.service_encryption_keys[var.region], null)
|
||||
}
|
||||
}
|
||||
|
||||
# dynamic "web_server_network_access_control" {
|
||||
# for_each = toset(
|
||||
# var.network_config.web_server_network_access_control == null
|
||||
# ? []
|
||||
# : [var.network_config.web_server_network_access_control]
|
||||
# )
|
||||
# content {
|
||||
# dynamic "allowed_ip_range" {
|
||||
# for_each = toset(web_server_network_access_control.key)
|
||||
# content {
|
||||
# value = allowed_ip_range.key
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
|
||||
}
|
||||
depends_on = [
|
||||
google_project_iam_member.shared_vpc,
|
||||
module.orch-project
|
||||
]
|
||||
}
|
||||
|
|
|
@ -54,6 +54,9 @@ module "orch-project" {
|
|||
"roles/bigquery.jobUser" = [
|
||||
module.orch-sa-cmp-0.iam_email,
|
||||
]
|
||||
"roles/composer.ServiceAgentV2Ext" = [
|
||||
"serviceAccount:${module.orch-project.service_accounts.robots.composer}"
|
||||
]
|
||||
"roles/composer.worker" = [
|
||||
module.orch-sa-cmp-0.iam_email
|
||||
]
|
||||
|
@ -67,11 +70,6 @@ module "orch-project" {
|
|||
"roles/storage.objectViewer" = [module.load-sa-df-0.iam_email]
|
||||
}
|
||||
oslogin = false
|
||||
org_policies = {
|
||||
"constraints/compute.requireOsLogin" = {
|
||||
enforce = false
|
||||
}
|
||||
}
|
||||
services = concat(var.project_services, [
|
||||
"artifactregistry.googleapis.com",
|
||||
"bigquery.googleapis.com",
|
||||
|
|
|
@ -30,21 +30,6 @@ locals {
|
|||
"roles/storage.objectViewer",
|
||||
]
|
||||
}
|
||||
dwh_plg_group_iam = {
|
||||
(local.groups.data-engineers) = [
|
||||
"roles/bigquery.dataEditor",
|
||||
"roles/storage.admin",
|
||||
],
|
||||
(local.groups.data-analysts) = [
|
||||
"roles/bigquery.dataEditor",
|
||||
"roles/bigquery.jobUser",
|
||||
"roles/bigquery.metadataViewer",
|
||||
"roles/bigquery.user",
|
||||
"roles/datacatalog.viewer",
|
||||
"roles/datacatalog.tagTemplateViewer",
|
||||
"roles/storage.objectAdmin",
|
||||
]
|
||||
}
|
||||
dwh_lnd_iam = {
|
||||
"roles/bigquery.dataOwner" = [
|
||||
module.load-sa-df-0.iam_email,
|
||||
|
@ -140,21 +125,6 @@ module "dwh-conf-project" {
|
|||
}
|
||||
}
|
||||
|
||||
module "dwh-plg-project" {
|
||||
source = "../../../modules/project"
|
||||
parent = var.folder_id
|
||||
billing_account = var.billing_account_id
|
||||
prefix = var.prefix
|
||||
name = "dwh-plg${local.project_suffix}"
|
||||
group_iam = local.dwh_plg_group_iam
|
||||
iam = {}
|
||||
services = local.dwh_services
|
||||
service_encryption_key_ids = {
|
||||
bq = [try(local.service_encryption_keys.bq, null)]
|
||||
storage = [try(local.service_encryption_keys.storage, null)]
|
||||
}
|
||||
}
|
||||
|
||||
# Bigquery
|
||||
|
||||
module "dwh-lnd-bq-0" {
|
||||
|
@ -181,14 +151,6 @@ module "dwh-conf-bq-0" {
|
|||
encryption_key = try(local.service_encryption_keys.bq, null)
|
||||
}
|
||||
|
||||
module "dwh-plg-bq-0" {
|
||||
source = "../../../modules/bigquery-dataset"
|
||||
project_id = module.dwh-plg-project.project_id
|
||||
id = "${replace(var.prefix, "-", "_")}_dwh_plg_bq_0"
|
||||
location = var.location
|
||||
encryption_key = try(local.service_encryption_keys.bq, null)
|
||||
}
|
||||
|
||||
# Cloud storage
|
||||
|
||||
module "dwh-lnd-cs-0" {
|
||||
|
@ -223,14 +185,3 @@ module "dwh-conf-cs-0" {
|
|||
encryption_key = try(local.service_encryption_keys.storage, null)
|
||||
force_destroy = var.data_force_destroy
|
||||
}
|
||||
|
||||
module "dwh-plg-cs-0" {
|
||||
source = "../../../modules/gcs"
|
||||
project_id = module.dwh-plg-project.project_id
|
||||
prefix = var.prefix
|
||||
name = "dwh-plg-cs-0"
|
||||
location = var.location
|
||||
storage_class = "MULTI_REGIONAL"
|
||||
encryption_key = try(local.service_encryption_keys.storage, null)
|
||||
force_destroy = var.data_force_destroy
|
||||
}
|
||||
|
|
|
@ -57,14 +57,6 @@ Legend: <code>+</code> additive, <code>•</code> conditional.
|
|||
|<b>trf-bq-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) <br>[roles/datacatalog.categoryAdmin](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryAdmin) |
|
||||
|<b>trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) |
|
||||
|
||||
## Project <i>dwh-plg</i>
|
||||
|
||||
| members | roles |
|
||||
|---|---|
|
||||
|<b>gcp-data-analysts</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/bigquery.metadataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.metadataViewer) <br>[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user) <br>[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|
||||
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) |
|
||||
|<b>SERVICE_IDENTITY_service-networking</b><br><small><i>serviceAccount</i></small>|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) <code>+</code>|
|
||||
|
||||
## Project <i>lod</i>
|
||||
|
||||
| members | roles |
|
||||
|
@ -80,7 +72,7 @@ Legend: <code>+</code> additive, <code>•</code> conditional.
|
|||
| members | roles |
|
||||
|---|---|
|
||||
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/cloudbuild.builds.editor](https://cloud.google.com/iam/docs/understanding-roles#cloudbuild.builds.editor) <br>[roles/composer.admin](https://cloud.google.com/iam/docs/understanding-roles#composer.admin) <br>[roles/composer.environmentAndStorageObjectAdmin](https://cloud.google.com/iam/docs/understanding-roles#composer.environmentAndStorageObjectAdmin) <br>[roles/iam.serviceAccountUser](https://cloud.google.com/iam/docs/understanding-roles#iam.serviceAccountUser) <br>[roles/iap.httpsResourceAccessor](https://cloud.google.com/iam/docs/understanding-roles#iap.httpsResourceAccessor) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) <br>[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|
||||
|<b>SERVICE_IDENTITY_cloudcomposer-accounts</b><br><small><i>serviceAccount</i></small>|[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|
||||
|<b>SERVICE_IDENTITY_cloudcomposer-accounts</b><br><small><i>serviceAccount</i></small>|[roles/composer.ServiceAgentV2Ext](https://cloud.google.com/iam/docs/understanding-roles#composer.ServiceAgentV2Ext) <br>[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|
||||
|<b>SERVICE_IDENTITY_service-networking</b><br><small><i>serviceAccount</i></small>|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) <code>+</code>|
|
||||
|<b>load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|
||||
|<b>orc-cmp-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/composer.worker](https://cloud.google.com/iam/docs/understanding-roles#composer.worker) <br>[roles/iam.serviceAccountUser](https://cloud.google.com/iam/docs/understanding-roles#iam.serviceAccountUser) <br>[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|
||||
|
|
|
@ -21,7 +21,7 @@ The approach adapts to different high-level requirements:
|
|||
- least privilege principle
|
||||
- rely on service account impersonation
|
||||
|
||||
The code in this blueprint doesn't address Organization-level configurations (Organization policy, VPC-SC, centralized logs). We expect those elements to be managed by automation stages external to this script like those in [FAST](../../../fast).
|
||||
The code in this blueprint doesn't address Organization-level configurations (Organization policy, VPC-SC, centralized logs). We expect those elements to be managed by automation stages external to this script like those in [FAST](../../../fast) and this blueprint deployed on top of them as one of the [stages](../../../fast/stages/03-data-platform/dev/README.md).
|
||||
|
||||
### Project structure
|
||||
|
||||
|
@ -39,14 +39,13 @@ This separation into projects allows adhering to the least-privilege principle b
|
|||
The script will create the following projects:
|
||||
|
||||
- **Drop off** Used to store temporary data. Data is pushed to Cloud Storage, BigQuery, or Cloud PubSub. Resources are configured with a customizable lifecycle policy.
|
||||
- **Load** Used to load data from the drop off zone to the data warehouse. The load is made with minimal to zero transformation logic (mainly `cast`). Anonymization or tokenization of Personally Identifiable Information (PII) can be implemented here or in the transformation stage, depending on your requirements. The use of [Cloud Dataflow templates](https://cloud.google.com/dataflow/docs/concepts/dataflow-templates) is recommended.
|
||||
- **Load** Used to load data from the drop off zone to the data warehouse. The load is made with minimal to zero transformation logic (mainly `cast`). Anonymization or tokenization of Personally Identifiable Information (PII) can be implemented here or in the transformation stage, depending on your requirements. The use of [Cloud Dataflow templates](https://cloud.google.com/dataflow/docs/concepts/dataflow-templates) is recommended. When you need to handle workloads from different teams, if strong role separation is needed between them, we suggest to customize the scirpt and have separate `Load` projects.
|
||||
- **Data Warehouse** Several projects distributed across 3 separate layers, to host progressively processed and refined data:
|
||||
- **Landing - Raw data** Structured Data, stored in relevant formats: structured data stored in BigQuery, unstructured data stored on Cloud Storage with additional metadata stored in BigQuery (for example pictures stored in Cloud Storage and analysis of the images for Cloud Vision API stored in BigQuery).
|
||||
- **Curated - Cleansed, aggregated and curated data**
|
||||
- **Confidential - Curated and unencrypted layer**
|
||||
- **Playground** Temporary tables that Data Analyst may use to perform R&D on data available in other Data Warehouse layers.
|
||||
- **Orchestration** Used to host Cloud Composer, which orchestrates all tasks that move data across layers.
|
||||
- **Transformation** Used to move data between Data Warehouse layers. We strongly suggest relying on BigQuery Engine to perform the transformations. If BigQuery doesn't have the features needed to perform your transformations, you can use Cloud Dataflow with [Cloud Dataflow templates](https://cloud.google.com/dataflow/docs/concepts/dataflow-templates). This stage can also optionally anonymize or tokenize PII.
|
||||
- **Transformation** Used to move data between Data Warehouse layers. We strongly suggest relying on BigQuery Engine to perform the transformations. If BigQuery doesn't have the features needed to perform your transformations, you can use Cloud Dataflow with [Cloud Dataflow templates](https://cloud.google.com/dataflow/docs/concepts/dataflow-templates). This stage can also optionally anonymize or tokenize PII. When you need to handle workloads from different teams, if strong role separation is needed between them, we suggest to customize the scirpt and have separate `Tranformation` projects.
|
||||
- **Exposure** Used to host resources that share processed data with external systems. Depending on the access pattern, data can be presented via Cloud SQL, BigQuery, or Bigtable. For BigQuery data, we strongly suggest relying on [Authorized views](https://cloud.google.com/bigquery/docs/authorized-views).
|
||||
|
||||
### Roles
|
||||
|
@ -80,10 +79,10 @@ We use three groups to control access to resources:
|
|||
|
||||
The table below shows a high level overview of roles for each group on each project, using `READ`, `WRITE` and `ADMIN` access patterns for simplicity. For detailed roles please refer to the code.
|
||||
|
||||
|Group|Drop off|Load|Transformation|DHW Landing|DWH Curated|DWH Confidential|DWH Playground|Orchestration|Common|
|
||||
|Group|Drop off|Load|Transformation|DHW Landing|DWH Curated|DWH Confidential|Orchestration|Common|
|
||||
|-|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|
|
||||
|Data Engineers|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`|
|
||||
|Data Analysts|-|-|-|-|-|`READ`|`READ`/`WRITE`|-|-|
|
||||
|Data Engineers|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`|
|
||||
|Data Analysts|-|-|-|-|-|`READ`|-|-|
|
||||
|Data Security|-|-|-|-|-|-|-|-|`ADMIN`|
|
||||
|
||||
You can configure groups via the `groups` variable.
|
||||
|
@ -109,14 +108,13 @@ In both VPC scenarios, you also need these ranges for Composer:
|
|||
|
||||
- one /24 for Cloud SQL
|
||||
- one /28 for the GKE control plane
|
||||
- one /28 for the web server
|
||||
|
||||
### Resource naming conventions
|
||||
|
||||
Resources follow the naming convention described below.
|
||||
|
||||
- `prefix-layer` for projects
|
||||
- `prefix-layer-prduct` for resources
|
||||
- `prefix-layer-product` for resources
|
||||
- `prefix-layer[2]-gcp-product[2]-counter` for services and service accounts
|
||||
|
||||
### Encryption
|
||||
|
@ -221,7 +219,7 @@ module "data-platform" {
|
|||
prefix = "myprefix"
|
||||
}
|
||||
|
||||
# tftest modules=42 resources=316
|
||||
# tftest modules=39 resources=286
|
||||
```
|
||||
|
||||
## Customizations
|
||||
|
@ -247,31 +245,31 @@ You can find examples in the `[demo](./demo)` folder.
|
|||
| name | description | type | required | default |
|
||||
|---|---|:---:|:---:|:---:|
|
||||
| [billing_account_id](variables.tf#L17) | Billing account id. | <code>string</code> | ✓ | |
|
||||
| [folder_id](variables.tf#L53) | Folder to be used for the networking resources in folders/nnnn format. | <code>string</code> | ✓ | |
|
||||
| [organization_domain](variables.tf#L98) | Organization domain. | <code>string</code> | ✓ | |
|
||||
| [prefix](variables.tf#L103) | Prefix used for resource names. | <code>string</code> | ✓ | |
|
||||
| [composer_config](variables.tf#L22) | Cloud Composer config. | <code title="object({ node_count = number airflow_version = string env_variables = map(string) })">object({…})</code> | | <code title="{ node_count = 3 airflow_version = "composer-1-airflow-2" env_variables = {} }">{…}</code> |
|
||||
| [data_catalog_tags](variables.tf#L36) | List of Data Catalog Policy tags to be created with optional IAM binging configuration in {tag => {ROLE => [MEMBERS]}} format. | <code>map(map(list(string)))</code> | | <code title="{ "3_Confidential" = null "2_Private" = null "1_Sensitive" = null }">{…}</code> |
|
||||
| [data_force_destroy](variables.tf#L47) | Flag to set 'force_destroy' on data services like BiguQery or Cloud Storage. | <code>bool</code> | | <code>false</code> |
|
||||
| [groups](variables.tf#L58) | User groups. | <code>map(string)</code> | | <code title="{ data-analysts = "gcp-data-analysts" data-engineers = "gcp-data-engineers" data-security = "gcp-data-security" }">{…}</code> |
|
||||
| [location](variables.tf#L68) | Location used for multi-regional resources. | <code>string</code> | | <code>"eu"</code> |
|
||||
| [network_config](variables.tf#L74) | Shared VPC network configurations to use. If null networks will be created in projects with preconfigured values. | <code title="object({ host_project = string network_self_link = string subnet_self_links = object({ load = string transformation = string orchestration = string }) composer_ip_ranges = object({ cloudsql = string gke_master = string web_server = string }) composer_secondary_ranges = object({ pods = string services = string }) })">object({…})</code> | | <code>null</code> |
|
||||
| [project_services](variables.tf#L112) | List of core services enabled on all projects. | <code>list(string)</code> | | <code title="[ "cloudresourcemanager.googleapis.com", "iam.googleapis.com", "serviceusage.googleapis.com", "stackdriver.googleapis.com" ]">[…]</code> |
|
||||
| [project_suffix](variables.tf#L123) | Suffix used only for project ids. | <code>string</code> | | <code>null</code> |
|
||||
| [region](variables.tf#L129) | Region used for regional resources. | <code>string</code> | | <code>"europe-west1"</code> |
|
||||
| [service_encryption_keys](variables.tf#L135) | Cloud KMS to use to encrypt different services. Key location should match service region. | <code title="object({ bq = string composer = string dataflow = string storage = string pubsub = string })">object({…})</code> | | <code>null</code> |
|
||||
| [folder_id](variables.tf#L122) | Folder to be used for the networking resources in folders/nnnn format. | <code>string</code> | ✓ | |
|
||||
| [organization_domain](variables.tf#L166) | Organization domain. | <code>string</code> | ✓ | |
|
||||
| [prefix](variables.tf#L171) | Prefix used for resource names. | <code>string</code> | ✓ | |
|
||||
| [composer_config](variables.tf#L22) | Cloud Composer config. | <code title="object({ disable_deployment = optional(bool) environment_size = optional(string, "ENVIRONMENT_SIZE_SMALL") software_config = optional(object({ airflow_config_overrides = optional(any) pypi_packages = optional(any) env_variables = optional(map(string)) image_version = string }), { image_version = "composer-2-airflow-2" }) workloads_config = optional(object({ scheduler = optional(object( { cpu = number memory_gb = number storage_gb = number count = number } ), { cpu = 0.5 memory_gb = 1.875 storage_gb = 1 count = 1 }) web_server = optional(object( { cpu = number memory_gb = number storage_gb = number } ), { cpu = 0.5 memory_gb = 1.875 storage_gb = 1 }) worker = optional(object( { cpu = number memory_gb = number storage_gb = number min_count = number max_count = number } ), { cpu = 0.5 memory_gb = 1.875 storage_gb = 1 min_count = 1 max_count = 3 }) })) })">object({…})</code> | | <code title="{ environment_size = "ENVIRONMENT_SIZE_SMALL" software_config = { image_version = "composer-2-airflow-2" } workloads_config = { scheduler = { cpu = 0.5 memory_gb = 1.875 storage_gb = 1 count = 1 } web_server = { cpu = 0.5 memory_gb = 1.875 storage_gb = 1 } worker = { cpu = 0.5 memory_gb = 1.875 storage_gb = 1 min_count = 1 max_count = 3 } } }">{…}</code> |
|
||||
| [data_catalog_tags](variables.tf#L105) | List of Data Catalog Policy tags to be created with optional IAM binging configuration in {tag => {ROLE => [MEMBERS]}} format. | <code>map(map(list(string)))</code> | | <code title="{ "3_Confidential" = null "2_Private" = null "1_Sensitive" = null }">{…}</code> |
|
||||
| [data_force_destroy](variables.tf#L116) | Flag to set 'force_destroy' on data services like BiguQery or Cloud Storage. | <code>bool</code> | | <code>false</code> |
|
||||
| [groups](variables.tf#L127) | User groups. | <code>map(string)</code> | | <code title="{ data-analysts = "gcp-data-analysts" data-engineers = "gcp-data-engineers" data-security = "gcp-data-security" }">{…}</code> |
|
||||
| [location](variables.tf#L137) | Location used for multi-regional resources. | <code>string</code> | | <code>"eu"</code> |
|
||||
| [network_config](variables.tf#L143) | Shared VPC network configurations to use. If null networks will be created in projects with preconfigured values. | <code title="object({ host_project = string network_self_link = string subnet_self_links = object({ load = string transformation = string orchestration = string }) composer_ip_ranges = object({ cloudsql = string gke_master = string }) composer_secondary_ranges = object({ pods = string services = string }) })">object({…})</code> | | <code>null</code> |
|
||||
| [project_services](variables.tf#L180) | List of core services enabled on all projects. | <code>list(string)</code> | | <code title="[ "cloudresourcemanager.googleapis.com", "iam.googleapis.com", "serviceusage.googleapis.com", "stackdriver.googleapis.com" ]">[…]</code> |
|
||||
| [project_suffix](variables.tf#L191) | Suffix used only for project ids. | <code>string</code> | | <code>null</code> |
|
||||
| [region](variables.tf#L197) | Region used for regional resources. | <code>string</code> | | <code>"europe-west1"</code> |
|
||||
| [service_encryption_keys](variables.tf#L203) | Cloud KMS to use to encrypt different services. Key location should match service region. | <code title="object({ bq = string composer = string dataflow = string storage = string pubsub = string })">object({…})</code> | | <code>null</code> |
|
||||
|
||||
## Outputs
|
||||
|
||||
| name | description | sensitive |
|
||||
|---|---|:---:|
|
||||
| [bigquery-datasets](outputs.tf#L17) | BigQuery datasets. | |
|
||||
| [demo_commands](outputs.tf#L28) | Demo commands. | |
|
||||
| [gcs-buckets](outputs.tf#L41) | GCS buckets. | |
|
||||
| [kms_keys](outputs.tf#L55) | Cloud MKS keys. | |
|
||||
| [projects](outputs.tf#L60) | GCP Projects informations. | |
|
||||
| [vpc_network](outputs.tf#L88) | VPC network. | |
|
||||
| [vpc_subnet](outputs.tf#L97) | VPC subnetworks. | |
|
||||
| [demo_commands](outputs.tf#L27) | Demo commands. Relevant only if Composer is deployed. | |
|
||||
| [gcs-buckets](outputs.tf#L40) | GCS buckets. | |
|
||||
| [kms_keys](outputs.tf#L53) | Cloud MKS keys. | |
|
||||
| [projects](outputs.tf#L58) | GCP Projects informations. | |
|
||||
| [vpc_network](outputs.tf#L84) | VPC network. | |
|
||||
| [vpc_subnet](outputs.tf#L93) | VPC subnetworks. | |
|
||||
|
||||
<!-- END TFDOC -->
|
||||
## TODOs
|
||||
|
|
Binary file not shown.
Before Width: | Height: | Size: 50 KiB After Width: | Height: | Size: 144 KiB |
|
@ -21,17 +21,16 @@ output "bigquery-datasets" {
|
|||
dwh-landing-bq-0 = module.dwh-lnd-bq-0.dataset_id,
|
||||
dwh-curated-bq-0 = module.dwh-cur-bq-0.dataset_id,
|
||||
dwh-confidential-bq-0 = module.dwh-conf-bq-0.dataset_id,
|
||||
dwh-plg-bq-0 = module.dwh-plg-bq-0.dataset_id,
|
||||
}
|
||||
}
|
||||
|
||||
output "demo_commands" {
|
||||
description = "Demo commands."
|
||||
description = "Demo commands. Relevant only if Composer is deployed."
|
||||
value = {
|
||||
01 = "gsutil -i ${module.drop-sa-cs-0.email} cp demo/data/*.csv gs://${module.drop-cs-0.name}"
|
||||
02 = "gsutil -i ${module.orch-sa-cmp-0.email} cp demo/data/*.j* gs://${module.orch-cs-0.name}"
|
||||
03 = "gsutil -i ${module.orch-sa-cmp-0.email} cp demo/*.py ${google_composer_environment.orch-cmp-0.config[0].dag_gcs_prefix}/"
|
||||
04 = "Open ${google_composer_environment.orch-cmp-0.config.0.airflow_uri} and run uploaded DAG."
|
||||
02 = try("gsutil -i ${module.orch-sa-cmp-0.email} cp demo/data/*.j* gs://${module.orch-cs-0.name}", "Composer not deployed.")
|
||||
03 = try("gsutil -i ${module.orch-sa-cmp-0.email} cp demo/*.py ${google_composer_environment.orch-cmp-0[0].config[0].dag_gcs_prefix}/", "Composer not deployed")
|
||||
04 = try("Open ${google_composer_environment.orch-cmp-0[0].config.0.airflow_uri} and run uploaded DAG.", "Composer not deployed")
|
||||
05 = <<EOT
|
||||
bq query --project_id=${module.dwh-conf-project.project_id} --use_legacy_sql=false 'SELECT * EXCEPT (name, surname) FROM `${module.dwh-conf-project.project_id}.${module.dwh-conf-bq-0.dataset_id}.customer_purchase` LIMIT 1000'"
|
||||
EOT
|
||||
|
@ -44,7 +43,6 @@ output "gcs-buckets" {
|
|||
dwh-landing-cs-0 = module.dwh-lnd-cs-0.name,
|
||||
dwh-curated-cs-0 = module.dwh-cur-cs-0.name,
|
||||
dwh-confidential-cs-0 = module.dwh-conf-cs-0.name,
|
||||
dwh-plg-cs-0 = module.dwh-plg-cs-0.name,
|
||||
drop-cs-0 = module.drop-cs-0.name,
|
||||
lod-cs-df = module.load-cs-df-0.name,
|
||||
orch-cs-0 = module.orch-cs-0.name,
|
||||
|
@ -64,7 +62,6 @@ output "projects" {
|
|||
dwh-landing = module.dwh-lnd-project.number,
|
||||
dwh-curated = module.dwh-cur-project.number,
|
||||
dwh-confidential = module.dwh-conf-project.number,
|
||||
dwh-plg = module.dwh-plg-project.number,
|
||||
exposure = module.exp-project.number,
|
||||
dropoff = module.drop-project.number,
|
||||
load = module.load-project.number,
|
||||
|
@ -75,7 +72,6 @@ output "projects" {
|
|||
dwh-landing = module.dwh-lnd-project.project_id,
|
||||
dwh-curated = module.dwh-cur-project.project_id,
|
||||
dwh-confidential = module.dwh-conf-project.project_id,
|
||||
dwh-plg = module.dwh-plg-project.project_id,
|
||||
exposure = module.exp-project.project_id,
|
||||
dropoff = module.drop-project.project_id,
|
||||
load = module.load-project.project_id,
|
||||
|
|
|
@ -22,14 +22,83 @@ variable "billing_account_id" {
|
|||
variable "composer_config" {
|
||||
description = "Cloud Composer config."
|
||||
type = object({
|
||||
node_count = number
|
||||
airflow_version = string
|
||||
env_variables = map(string)
|
||||
disable_deployment = optional(bool)
|
||||
environment_size = optional(string, "ENVIRONMENT_SIZE_SMALL")
|
||||
software_config = optional(object({
|
||||
airflow_config_overrides = optional(any)
|
||||
pypi_packages = optional(any)
|
||||
env_variables = optional(map(string))
|
||||
image_version = string
|
||||
}), {
|
||||
image_version = "composer-2-airflow-2"
|
||||
})
|
||||
workloads_config = optional(object({
|
||||
scheduler = optional(object(
|
||||
{
|
||||
cpu = number
|
||||
memory_gb = number
|
||||
storage_gb = number
|
||||
count = number
|
||||
}
|
||||
), {
|
||||
cpu = 0.5
|
||||
memory_gb = 1.875
|
||||
storage_gb = 1
|
||||
count = 1
|
||||
})
|
||||
web_server = optional(object(
|
||||
{
|
||||
cpu = number
|
||||
memory_gb = number
|
||||
storage_gb = number
|
||||
}
|
||||
), {
|
||||
cpu = 0.5
|
||||
memory_gb = 1.875
|
||||
storage_gb = 1
|
||||
})
|
||||
worker = optional(object(
|
||||
{
|
||||
cpu = number
|
||||
memory_gb = number
|
||||
storage_gb = number
|
||||
min_count = number
|
||||
max_count = number
|
||||
}
|
||||
), {
|
||||
cpu = 0.5
|
||||
memory_gb = 1.875
|
||||
storage_gb = 1
|
||||
min_count = 1
|
||||
max_count = 3
|
||||
})
|
||||
}))
|
||||
})
|
||||
default = {
|
||||
node_count = 3
|
||||
airflow_version = "composer-1-airflow-2"
|
||||
env_variables = {}
|
||||
environment_size = "ENVIRONMENT_SIZE_SMALL"
|
||||
software_config = {
|
||||
image_version = "composer-2-airflow-2"
|
||||
}
|
||||
workloads_config = {
|
||||
scheduler = {
|
||||
cpu = 0.5
|
||||
memory_gb = 1.875
|
||||
storage_gb = 1
|
||||
count = 1
|
||||
}
|
||||
web_server = {
|
||||
cpu = 0.5
|
||||
memory_gb = 1.875
|
||||
storage_gb = 1
|
||||
}
|
||||
worker = {
|
||||
cpu = 0.5
|
||||
memory_gb = 1.875
|
||||
storage_gb = 1
|
||||
min_count = 1
|
||||
max_count = 3
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -84,7 +153,6 @@ variable "network_config" {
|
|||
composer_ip_ranges = object({
|
||||
cloudsql = string
|
||||
gke_master = string
|
||||
web_server = string
|
||||
})
|
||||
composer_secondary_ranges = object({
|
||||
pods = string
|
||||
|
|
|
@ -31,13 +31,13 @@ The Data Platform manages:
|
|||
As per our GCP best practices the Data Platform relies on user groups to assign roles to human identities. These are the specific groups used by the Data Platform and their access patterns, from the [module documentation](../../../../blueprints/data-solutions/data-platform-foundations/#groups):
|
||||
|
||||
- *Data Engineers* They handle and run the Data Hub, with read access to all resources in order to troubleshoot possible issues with pipelines. This team can also impersonate any service account.
|
||||
- *Data Analysts*. They perform analysis on datasets, with read access to the data warehouse Curated or Confidential projects depending on their privileges, and BigQuery READ/WRITE access to the playground project.
|
||||
- *Data Analysts*. They perform analysis on datasets, with read access to the data warehouse Curated or Confidential projects depending on their privileges.
|
||||
- *Data Security*:. They handle security configurations related to the Data Hub. This team has admin access to the common project to configure Cloud DLP templates or Data Catalog policy tags.
|
||||
|
||||
|Group|Landing|Load|Transformation|Data Warehouse Landing|Data Warehouse Curated|Data Warehouse Confidential|Data Warehouse Playground|Orchestration|Common|
|
||||
|-|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|
|
||||
|Data Engineers|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`|
|
||||
|Data Analysts|-|-|-|-|-|`READ`|`READ`/`WRITE`|-|-|
|
||||
|Group|Landing|Load|Transformation|Data Warehouse Landing|Data Warehouse Curated|Data Warehouse Confidential|Orchestration|Common|
|
||||
|-|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|
|
||||
|Data Engineers|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`|`ADMIN`|
|
||||
|Data Analysts|-|-|-|-|-|`READ`|-|-|
|
||||
|Data Security|-|-|-|-|-|-|-|-|`ADMIN`|
|
||||
|
||||
### Network
|
||||
|
@ -168,22 +168,22 @@ You can find examples in the `[demo](../../../../blueprints/data-solutions/data-
|
|||
|---|---|:---:|:---:|:---:|:---:|
|
||||
| [automation](variables.tf#L17) | Automation resources created by the bootstrap stage. | <code title="object({ outputs_bucket = string })">object({…})</code> | ✓ | | <code>00-bootstrap</code> |
|
||||
| [billing_account](variables.tf#L25) | Billing account id and organization id ('nnnnnnnn' or null). | <code title="object({ id = string organization_id = number })">object({…})</code> | ✓ | | <code>00-globals</code> |
|
||||
| [folder_ids](variables.tf#L65) | Folder to be used for the networking resources in folders/nnnn format. | <code title="object({ data-platform-dev = string })">object({…})</code> | ✓ | | <code>01-resman</code> |
|
||||
| [host_project_ids](variables.tf#L83) | Shared VPC project ids. | <code title="object({ dev-spoke-0 = string })">object({…})</code> | ✓ | | <code>02-networking</code> |
|
||||
| [organization](variables.tf#L115) | Organization details. | <code title="object({ domain = string id = number customer_id = string })">object({…})</code> | ✓ | | <code>00-globals</code> |
|
||||
| [prefix](variables.tf#L131) | Unique prefix used for resource names. Not used for projects if 'project_create' is null. | <code>string</code> | ✓ | | <code>00-globals</code> |
|
||||
| [composer_config](variables.tf#L34) | Cloud Composer configuration options. | <code title="object({ node_count = number airflow_version = string env_variables = map(string) })">object({…})</code> | | <code title="{ node_count = 3 airflow_version = "composer-1.17.5-airflow-2.1.4" env_variables = {} }">{…}</code> | |
|
||||
| [data_catalog_tags](variables.tf#L48) | List of Data Catalog Policy tags to be created with optional IAM binging configuration in {tag => {ROLE => [MEMBERS]}} format. | <code>map(map(list(string)))</code> | | <code title="{ "3_Confidential" = null "2_Private" = null "1_Sensitive" = null }">{…}</code> | |
|
||||
| [data_force_destroy](variables.tf#L59) | Flag to set 'force_destroy' on data services like BigQery or Cloud Storage. | <code>bool</code> | | <code>false</code> | |
|
||||
| [groups](variables.tf#L73) | Groups. | <code>map(string)</code> | | <code title="{ data-analysts = "gcp-data-analysts" data-engineers = "gcp-data-engineers" data-security = "gcp-data-security" }">{…}</code> | |
|
||||
| [location](variables.tf#L91) | Location used for multi-regional resources. | <code>string</code> | | <code>"eu"</code> | |
|
||||
| [network_config_composer](variables.tf#L97) | Network configurations to use for Composer. | <code title="object({ cloudsql_range = string gke_master_range = string gke_pods_name = string gke_services_name = string web_server_range = string })">object({…})</code> | | <code title="{ cloudsql_range = "192.168.254.0/24" gke_master_range = "192.168.255.0/28" gke_pods_name = "pods" gke_services_name = "services" web_server_range = "192.168.255.16/28" }">{…}</code> | |
|
||||
| [outputs_location](variables.tf#L125) | Path where providers, tfvars files, and lists for the following stages are written. Leave empty to disable. | <code>string</code> | | <code>null</code> | |
|
||||
| [project_services](variables.tf#L137) | List of core services enabled on all projects. | <code>list(string)</code> | | <code title="[ "cloudresourcemanager.googleapis.com", "iam.googleapis.com", "serviceusage.googleapis.com", "stackdriver.googleapis.com" ]">[…]</code> | |
|
||||
| [region](variables.tf#L148) | Region used for regional resources. | <code>string</code> | | <code>"europe-west1"</code> | |
|
||||
| [service_encryption_keys](variables.tf#L154) | Cloud KMS to use to encrypt different services. Key location should match service region. | <code title="object({ bq = string composer = string dataflow = string storage = string pubsub = string })">object({…})</code> | | <code>null</code> | |
|
||||
| [subnet_self_links](variables.tf#L166) | Shared VPC subnet self links. | <code title="object({ dev-spoke-0 = map(string) })">object({…})</code> | | <code>null</code> | <code>02-networking</code> |
|
||||
| [vpc_self_links](variables.tf#L175) | Shared VPC self links. | <code title="object({ dev-spoke-0 = string })">object({…})</code> | | <code>null</code> | <code>02-networking</code> |
|
||||
| [folder_ids](variables.tf#L98) | Folder to be used for the networking resources in folders/nnnn format. | <code title="object({ data-platform-dev = string })">object({…})</code> | ✓ | | <code>01-resman</code> |
|
||||
| [host_project_ids](variables.tf#L116) | Shared VPC project ids. | <code title="object({ dev-spoke-0 = string })">object({…})</code> | ✓ | | <code>02-networking</code> |
|
||||
| [organization](variables.tf#L146) | Organization details. | <code title="object({ domain = string id = number customer_id = string })">object({…})</code> | ✓ | | <code>00-globals</code> |
|
||||
| [prefix](variables.tf#L162) | Unique prefix used for resource names. Not used for projects if 'project_create' is null. | <code>string</code> | ✓ | | <code>00-globals</code> |
|
||||
| [composer_config](variables.tf#L34) | Cloud Composer configuration options. | <code title="object({ disable_deployment = optional(bool) environment_size = string software_config = object({ airflow_config_overrides = optional(any) pypi_packages = optional(any) env_variables = optional(map(string)) image_version = string }) workloads_config = object({ scheduler = object( { cpu = number memory_gb = number storage_gb = number count = number } ) web_server = object( { cpu = number memory_gb = number storage_gb = number } ) worker = object( { cpu = number memory_gb = number storage_gb = number min_count = number max_count = number } ) }) })">object({…})</code> | | <code title="{ environment_size = "ENVIRONMENT_SIZE_SMALL" software_config = { image_version = "composer-2-airflow-2" } workloads_config = null }">{…}</code> | |
|
||||
| [data_catalog_tags](variables.tf#L81) | List of Data Catalog Policy tags to be created with optional IAM binging configuration in {tag => {ROLE => [MEMBERS]}} format. | <code>map(map(list(string)))</code> | | <code title="{ "3_Confidential" = null "2_Private" = null "1_Sensitive" = null }">{…}</code> | |
|
||||
| [data_force_destroy](variables.tf#L92) | Flag to set 'force_destroy' on data services like BigQery or Cloud Storage. | <code>bool</code> | | <code>false</code> | |
|
||||
| [groups](variables.tf#L106) | Groups. | <code>map(string)</code> | | <code title="{ data-analysts = "gcp-data-analysts" data-engineers = "gcp-data-engineers" data-security = "gcp-data-security" }">{…}</code> | |
|
||||
| [location](variables.tf#L124) | Location used for multi-regional resources. | <code>string</code> | | <code>"eu"</code> | |
|
||||
| [network_config_composer](variables.tf#L130) | Network configurations to use for Composer. | <code title="object({ cloudsql_range = string gke_master_range = string gke_pods_name = string gke_services_name = string })">object({…})</code> | | <code title="{ cloudsql_range = "192.168.254.0/24" gke_master_range = "192.168.255.0/28" gke_pods_name = "pods" gke_services_name = "services" }">{…}</code> | |
|
||||
| [outputs_location](variables.tf#L156) | Path where providers, tfvars files, and lists for the following stages are written. Leave empty to disable. | <code>string</code> | | <code>null</code> | |
|
||||
| [project_services](variables.tf#L168) | List of core services enabled on all projects. | <code>list(string)</code> | | <code title="[ "cloudresourcemanager.googleapis.com", "iam.googleapis.com", "serviceusage.googleapis.com", "stackdriver.googleapis.com" ]">[…]</code> | |
|
||||
| [region](variables.tf#L179) | Region used for regional resources. | <code>string</code> | | <code>"europe-west1"</code> | |
|
||||
| [service_encryption_keys](variables.tf#L185) | Cloud KMS to use to encrypt different services. Key location should match service region. | <code title="object({ bq = string composer = string dataflow = string storage = string pubsub = string })">object({…})</code> | | <code>null</code> | |
|
||||
| [subnet_self_links](variables.tf#L197) | Shared VPC subnet self links. | <code title="object({ dev-spoke-0 = map(string) })">object({…})</code> | | <code>null</code> | <code>02-networking</code> |
|
||||
| [vpc_self_links](variables.tf#L206) | Shared VPC self links. | <code title="object({ dev-spoke-0 = string })">object({…})</code> | | <code>null</code> | <code>02-networking</code> |
|
||||
|
||||
## Outputs
|
||||
|
||||
|
|
|
@ -37,7 +37,6 @@ module "data-platform" {
|
|||
composer_ip_ranges = {
|
||||
cloudsql = var.network_config_composer.cloudsql_range
|
||||
gke_master = var.network_config_composer.gke_master_range
|
||||
web_server = var.network_config_composer.web_server_range
|
||||
}
|
||||
composer_secondary_ranges = {
|
||||
pods = var.network_config_composer.gke_pods_name
|
||||
|
|
|
@ -34,14 +34,47 @@ variable "billing_account" {
|
|||
variable "composer_config" {
|
||||
description = "Cloud Composer configuration options."
|
||||
type = object({
|
||||
node_count = number
|
||||
airflow_version = string
|
||||
env_variables = map(string)
|
||||
disable_deployment = optional(bool)
|
||||
environment_size = string
|
||||
software_config = object({
|
||||
airflow_config_overrides = optional(any)
|
||||
pypi_packages = optional(any)
|
||||
env_variables = optional(map(string))
|
||||
image_version = string
|
||||
})
|
||||
workloads_config = object({
|
||||
scheduler = object(
|
||||
{
|
||||
cpu = number
|
||||
memory_gb = number
|
||||
storage_gb = number
|
||||
count = number
|
||||
}
|
||||
)
|
||||
web_server = object(
|
||||
{
|
||||
cpu = number
|
||||
memory_gb = number
|
||||
storage_gb = number
|
||||
}
|
||||
)
|
||||
worker = object(
|
||||
{
|
||||
cpu = number
|
||||
memory_gb = number
|
||||
storage_gb = number
|
||||
min_count = number
|
||||
max_count = number
|
||||
}
|
||||
)
|
||||
})
|
||||
})
|
||||
default = {
|
||||
node_count = 3
|
||||
airflow_version = "composer-1.17.5-airflow-2.1.4"
|
||||
env_variables = {}
|
||||
environment_size = "ENVIRONMENT_SIZE_SMALL"
|
||||
software_config = {
|
||||
image_version = "composer-2-airflow-2"
|
||||
}
|
||||
workloads_config = null
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -101,14 +134,12 @@ variable "network_config_composer" {
|
|||
gke_master_range = string
|
||||
gke_pods_name = string
|
||||
gke_services_name = string
|
||||
web_server_range = string
|
||||
})
|
||||
default = {
|
||||
cloudsql_range = "192.168.254.0/24"
|
||||
gke_master_range = "192.168.255.0/28"
|
||||
gke_pods_name = "pods"
|
||||
gke_services_name = "services"
|
||||
web_server_range = "192.168.255.16/28"
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -21,5 +21,5 @@ FIXTURES_DIR = os.path.join(os.path.dirname(__file__), 'fixture')
|
|||
def test_resources(e2e_plan_runner):
|
||||
"Test that plan works and the numbers of resources is as expected."
|
||||
modules, resources = e2e_plan_runner(FIXTURES_DIR)
|
||||
assert len(modules) == 41
|
||||
assert len(resources) == 315
|
||||
assert len(modules) == 38
|
||||
assert len(resources) == 285
|
||||
|
|
Loading…
Reference in New Issue