diff --git a/data-solutions/README.md b/data-solutions/README.md
index 33e22153..af35f8df 100644
--- a/data-solutions/README.md
+++ b/data-solutions/README.md
@@ -19,5 +19,5 @@ All resources use CMEK hosted in Cloud KMS running in a centralized project. The
### Data Platform Foundations
-This [example](./data-platform-foundations/) implements a **robust and flexible** Data Foundation on GCP that provides **opinionated defaults** while allowing customers to **build and scale** out additional data pipelines **quickly and reliably**.
+This [example](./data-platform-foundations/) implements a robust and flexible Data Foundation on GCP that provides opinionated defaults, allowing customers to build and scale out additional data pipelines quickly and reliably.
diff --git a/data-solutions/data-platform-foundations/environment/README.md b/data-solutions/data-platform-foundations/environment/README.md
index 824df21a..e4885932 100644
--- a/data-solutions/data-platform-foundations/environment/README.md
+++ b/data-solutions/data-platform-foundations/environment/README.md
@@ -10,7 +10,11 @@ The projects that will be created are:
- DWH
- Datamart
-A master service account named `projects-editor-sa` will be created under the common services project, and it will be granted editor permissions on all the projects in scope.
+A main service account named `projects-editor-sa` will be created under the common services project, and it will be granted editor permissions on all the projects in scope.
+
+This is a high level diagram of the created resources:
+
+![Environment - Phase 1](./diagram.png)
## Running the example
@@ -26,7 +30,7 @@ parent = "folders/12345678"
- make sure you have the right authentication setup (application default credentials, or a service account key)
- run `terraform init` and `terraform apply`
-Once done testing, you can clean up resources by running `terraform destroy`
+Once done testing, you can clean up resources by running `terraform destroy`.
## Variables
@@ -44,4 +48,5 @@ Once done testing, you can clean up resources by running `terraform destroy`
| name | description | sensitive |
|---|---|:---:|
| project_ids | Project ids for created projects. | |
+| service_account | Main service account. | |
diff --git a/data-solutions/data-platform-foundations/environment/outputs.tf b/data-solutions/data-platform-foundations/environment/outputs.tf
index 0919bdd0..b13d8fe0 100644
--- a/data-solutions/data-platform-foundations/environment/outputs.tf
+++ b/data-solutions/data-platform-foundations/environment/outputs.tf
@@ -24,3 +24,8 @@ output "project_ids" {
transformation = module.project-transformation.project_id
}
}
+
+output "service_account" {
+ description = "Main service account."
+ value = module.sa-services-main.email
+}
diff --git a/data-solutions/data-platform-foundations/resources/README.md b/data-solutions/data-platform-foundations/resources/README.md
index 0cc74f5f..01c0f96e 100644
--- a/data-solutions/data-platform-foundations/resources/README.md
+++ b/data-solutions/data-platform-foundations/resources/README.md
@@ -1,12 +1,10 @@
-# Data Platform Foundations - Phase 2
+# Data Platform Foundations - Resources (Step 2)
-## General
-
-This is the second step needed to deploy Data Platform Foundations, which creates resources needed to store and process the data in projects created in the [environment step](./../environment/). Please refer to [top-level Data Platform README](../README.md) for prerequisites and how to run the first step.
+This is the second step needed to deploy Data Platform Foundations, which creates resources needed to store and process the data, in the projects created in the [previous step](./../environment/). Please refer to the [top-level README](../README.md) for prerequisites and how to run the first step.
![Data Foundation - Phase 2](./diagram.png)
-This example will create the next resources per project:
+The resources that will be create in each project are:
- Common
- Landing
@@ -24,59 +22,49 @@ This example will create the next resources per project:
## Running the example
-You can run Teraform script with the Service Account created in the first step.
-
-To create the infrastructure:
+To create the resources, you can leverage the service account created in the previous step:
- Specify your variables in a `terraform.tvars`
```tfm
project_ids = {
- datamart = "DATAMART-PROJECT_ID"
- dwh = "DWH-PROJECT_ID"
- landing = "LANDING-PROJECT_ID"
- services = "SERVICES-PROJECT_ID"
- transformation = "TRANSFORMATION-PROJECT_ID"
+ datamart = "datamart-project_id"
+ dwh = "dwh-project_id"
+ landing = "landing-project_id"
+ services = "services-project_id"
+ transformation = "transformation-project_id"
}
```
-- Place the data_service_account_name service account (the service account was created in phase 1) key in the terraform folder
-- Go through the following steps to create resources:
+- make sure you have the right authentication setup (application default credentials, or a service account key)
+- run `terraform init` and `terraform apply`
-```bash
-terraform init
-terraform apply
-```
-
-Once done testing, you can clean up resources by running:
-
-```bash
-terraform destroy
-```
+Once done testing, you can clean up resources by running `terraform destroy`.
## Variables
-| Name | Description | Type | Required | Default|
-|------|-------------|------|:--------:|:--------:|
-| [datamart\_bq\_datasets](#Variables\_datamart\_bq\_datasets) | Datamart Bigquery datasets | `object({...})` | | `...` |
-| [dwh\_bq\_datasets](#Variables\_dwh\_bq\_datasets) | DWH Bigquery datasets | `object({...})` | | `...` |
-| [landing\_buckets](#Variables\_landing\_buckets) | List of landing buckets to create | `object({...})` | | `...` |
-| [landing\_pubsub](#Variables\_landing\_pubsub) | List of landing pubsub topics and subscriptions to create | `object({...})` | | `...` |
-| [landing\_service\_account](#Variables\_landing\_service\_account) | landing service accounts list. | `string` | | `sa-landing` |
-| [project\_ids](#Variables\_project\_ids) | Project IDs. | `object({...})` | ✓ | n/a |
-| [project\_service\_account](#Variables\_project\_service\_account) | Project service accounts list. | `object({...})` | | `...` |
-| [transformation\_buckets](#Variables\_transformation\_buckets) | List of transformation buckets to create | `object({...})` | | `...` |
-| [transformation\_subnets](#Variables\_transformation\_subnets) | List of subnets to create in the transformation Project. | `object({...})` | | `...` |
-| [transformation\_vpc\_name](#Variables\_transformation\_vpc\_name) | Name of the VPC created in the transformation Project. | `string` | | `transformation-vpc` |
+| name | description | type | required | default |
+|---|---|:---: |:---:|:---:|
+| project_ids | Project IDs. | object({...})
| ✓ | |
+| *datamart_bq_datasets* | Datamart Bigquery datasets | map(object({...}))
| | ...
|
+| *dwh_bq_datasets* | DWH Bigquery datasets | map(object({...}))
| | ...
|
+| *landing_buckets* | List of landing buckets to create | map(object({...}))
| | ...
|
+| *landing_pubsub* | List of landing pubsub topics and subscriptions to create | map(map(object({...})))
| | ...
|
+| *landing_service_account* | landing service accounts list. | string
| | sa-landing
|
+| *service_account_names* | Project service accounts list. | object({...})
| | ...
|
+| *transformation_buckets* | List of transformation buckets to create | map(object({...}))
| | ...
|
+| *transformation_subnets* | List of subnets to create in the transformation Project. | list(object({...}))
| | ...
|
+| *transformation_vpc_name* | Name of the VPC created in the transformation Project. | string
| | transformation-vpc
|
## Outputs
-| Name | Description |
-|------|-------------|
-| [datamart-bigquery-datasets-list](#output\_datamart-bigquery-datasets-list) | List of bigquery datasets created for the datamart project |
-| [dwh-bigquery-datasets-list](#output\_dwh-bigquery-datasets-list) | List of bigquery datasets created for the dwh project |
-| [landing-bucket-names](#output\_landing-bucket-names) | List of buckets created for the landing project |
-| [landing-pubsub-list](#output\_landing-pubsub-list) | List of pubsub topics and subscriptions created for the landing project |
-| [transformation-bucket-names](#output\_transformation-bucket-names) | List of buckets created for the transformation project |
-| [transformation-vpc-info](#output\_transformation-vpc-info) | Transformation VPC details |
+| name | description | sensitive |
+|---|---|:---:|
+| datamart-datasets | List of bigquery datasets created for the datamart project. | |
+| dwh-datasets | List of bigquery datasets created for the dwh project. | |
+| landing-buckets | List of buckets created for the landing project. | |
+| landing-pubsub | List of pubsub topics and subscriptions created for the landing project. | |
+| transformation-buckets | List of buckets created for the transformation project. | |
+| transformation-vpc | Transformation VPC details | |
+
diff --git a/data-solutions/data-platform-foundations/resources/main.tf b/data-solutions/data-platform-foundations/resources/main.tf
index 79ec2b0b..19816cbc 100644
--- a/data-solutions/data-platform-foundations/resources/main.tf
+++ b/data-solutions/data-platform-foundations/resources/main.tf
@@ -14,130 +14,49 @@
* limitations under the License.
*/
-locals {
- landing_pubsub = merge({
- for k, v in var.landing_pubsub :
- k => {
- name = v.name
- subscriptions = v.subscriptions
- subscription_iam = merge({
- for s_k, s_v in v.subscription_iam :
- s_k => merge(s_v, { "roles/pubsub.subscriber" : ["serviceAccount:${module.transformation-default-service-accounts.email}"] })
- })
- }
- })
-
- datamart_bq_datasets = merge({
- for k, v in var.datamart_bq_datasets :
- k => {
- id = v.id
- location = v.location
- iam = merge({
- for s_k, s_v in v.iam :
- s_k => s_k == "roles/bigquery.dataOwner" ? concat(s_v, ["serviceAccount:${module.datamart-default-service-accounts.email}"]) : s_v
- })
- }
- })
-
- dwh_bq_datasets = merge({
- for k, v in var.dwh_bq_datasets :
- k => {
- id = v.id
- location = v.location
- iam = merge({
- for s_k, s_v in v.iam :
- s_k => s_k == "roles/bigquery.dataOwner" ? concat(s_v, ["serviceAccount:${module.dwh-default-service-accounts.email}", "serviceAccount:${module.transformation-default-service-accounts.email}"]) : s_v
- })
- }
- })
-}
-
-###############################################################################
-# Projects #
-###############################################################################
-module "project-datamart" {
- source = "../../../modules/project"
- name = var.project_ids.datamart
- project_create = false
-}
-
-module "project-dwh" {
- source = "../../../modules/project"
- name = var.project_ids.dwh
- project_create = false
-}
-
-module "project-landing" {
- source = "../../../modules/project"
- name = var.project_ids.landing
- project_create = false
-}
-
-module "project-services" {
- source = "../../../modules/project"
- name = var.project_ids.services
- project_create = false
-}
-
-module "project-transformation" {
- source = "../../../modules/project"
- name = var.project_ids.transformation
- project_create = false
-}
-
###############################################################################
# IAM #
###############################################################################
-module "datamart-default-service-accounts" {
+module "datamart-sa" {
source = "../../../modules/iam-service-account"
- project_id = module.project-datamart.project_id
- name = var.project_service_account.datamart
-
+ project_id = var.project_ids.datamart
+ name = var.service_account_names.datamart
iam_project_roles = {
- "${module.project-datamart.project_id}" = [
- "roles/editor",
- ]
+ "${var.project_ids.datamart}" = ["roles/editor"]
}
}
-module "dwh-default-service-accounts" {
+module "dwh-sa" {
source = "../../../modules/iam-service-account"
- project_id = module.project-dwh.project_id
- name = var.project_service_account.dwh
+ project_id = var.project_ids.dwh
+ name = var.service_account_names.dwh
}
-module "landing-default-service-accounts" {
+module "landing-sa" {
source = "../../../modules/iam-service-account"
- project_id = module.project-landing.project_id
- name = var.project_service_account.landing
-
+ project_id = var.project_ids.landing
+ name = var.service_account_names.landing
iam_project_roles = {
- "${module.project-landing.project_id}" = [
- "roles/pubsub.publisher",
- ]
+ "${var.project_ids.landing}" = ["roles/pubsub.publisher"]
}
}
-module "services-default-service-accounts" {
+module "services-sa" {
source = "../../../modules/iam-service-account"
- project_id = module.project-services.project_id
- name = var.project_service_account.services
-
+ project_id = var.project_ids.services
+ name = var.service_account_names.services
iam_project_roles = {
- "${module.project-services.project_id}" = [
- "roles/editor",
- ]
+ "${var.project_ids.services}" = ["roles/editor"]
}
}
-module "transformation-default-service-accounts" {
+module "transformation-sa" {
source = "../../../modules/iam-service-account"
- project_id = module.project-transformation.project_id
- name = var.project_service_account.transformation
-
+ project_id = var.project_ids.transformation
+ name = var.service_account_names.transformation
iam_project_roles = {
- "${module.project-transformation.project_id}" = [
+ "${var.project_ids.transformation}" = [
"roles/logging.logWriter",
"roles/monitoring.metricWriter",
"roles/dataflow.admin",
@@ -155,30 +74,28 @@ module "transformation-default-service-accounts" {
# GCS #
###############################################################################
-module "bucket-landing" {
+module "landing-buckets" {
source = "../../../modules/gcs"
- project_id = module.project-landing.project_id
+ for_each = var.landing_buckets
+ project_id = var.project_ids.landing
prefix = var.project_ids.landing
+ name = each.value.name
+ location = each.value.location
iam = {
- "roles/storage.objectCreator" = ["serviceAccount:${module.landing-default-service-accounts.email}"],
- "roles/storage.admin" = ["serviceAccount:${module.transformation-default-service-accounts.email}"],
+ "roles/storage.objectCreator" = [module.landing-sa.iam_email]
+ "roles/storage.admin" = [module.transformation-sa.iam_email]
}
-
- for_each = var.landing_buckets
- name = each.value.name
- location = each.value.location
}
-module "bucket-transformation" {
+module "transformation-buckets" {
source = "../../../modules/gcs"
- project_id = module.project-transformation.project_id
+ for_each = var.transformation_buckets
+ project_id = var.project_ids.transformation
prefix = var.project_ids.transformation
-
- for_each = var.transformation_buckets
- name = each.value.name
- location = each.value.location
+ name = each.value.name
+ location = each.value.location
iam = {
- "roles/storage.admin" = ["serviceAccount:${module.transformation-default-service-accounts.email}"],
+ "roles/storage.admin" = [module.transformation-sa.iam_email]
}
}
@@ -186,24 +103,34 @@ module "bucket-transformation" {
# Bigquery #
###############################################################################
-module "bigquery-datasets-datamart" {
+module "datamart-bq" {
source = "../../../modules/bigquery-dataset"
- project_id = module.project-datamart.project_id
-
- for_each = local.datamart_bq_datasets
- id = each.value.id
- location = each.value.location
- iam = each.value.iam
+ for_each = var.datamart_bq_datasets
+ project_id = var.project_ids.datamart
+ id = each.key
+ location = each.value.location
+ iam = {
+ for k, v in each.value.iam : k => (
+ k == "roles/bigquery.dataOwner"
+ ? concat(v, [module.datamart-sa.iam_email])
+ : v
+ )
+ }
}
-module "bigquery-datasets-dwh" {
+module "dwh-bq" {
source = "../../../modules/bigquery-dataset"
- project_id = module.project-dwh.project_id
-
- for_each = local.dwh_bq_datasets
- id = each.value.id
- location = each.value.location
- iam = each.value.iam
+ for_each = var.dwh_bq_datasets
+ project_id = var.project_ids.dwh
+ id = each.key
+ location = each.value.location
+ iam = {
+ for k, v in each.value.iam : k => (
+ k == "roles/bigquery.dataOwner"
+ ? concat(v, [module.dwh-sa.iam_email])
+ : v
+ )
+ }
}
###############################################################################
@@ -211,7 +138,7 @@ module "bigquery-datasets-dwh" {
###############################################################################
module "vpc-transformation" {
source = "../../../modules/net-vpc"
- project_id = module.project-transformation.project_id
+ project_id = var.project_ids.transformation
name = var.transformation_vpc_name
subnets = var.transformation_subnets
}
@@ -219,12 +146,18 @@ module "vpc-transformation" {
###############################################################################
# Pub/Sub #
###############################################################################
-module "pubsub-landing" {
- source = "../../../modules/pubsub"
- project_id = module.project-landing.project_id
- for_each = local.landing_pubsub
- name = each.value.name
- subscriptions = each.value.subscriptions
- subscription_iam = each.value.subscription_iam
+module "landing-pubsub" {
+ source = "../../../modules/pubsub"
+ for_each = var.landing_pubsub
+ project_id = var.project_ids.landing
+ name = each.key
+ subscriptions = {
+ for k, v in each.value : k => { labels = v.labels, options = v.options }
+ }
+ subscription_iam = {
+ for k, v in each.value : k => merge(v.iam, {
+ "roles/pubsub.subscriber" = [module.transformation-sa.iam_email]
+ })
+ }
}
diff --git a/data-solutions/data-platform-foundations/resources/outputs.tf b/data-solutions/data-platform-foundations/resources/outputs.tf
index e0f51e81..3023587d 100644
--- a/data-solutions/data-platform-foundations/resources/outputs.tf
+++ b/data-solutions/data-platform-foundations/resources/outputs.tf
@@ -14,69 +14,46 @@
* limitations under the License.
*/
+output "datamart-datasets" {
+ description = "List of bigquery datasets created for the datamart project."
+ value = [
+ for k, datasets in module.datamart-bq : datasets.dataset_id
+ ]
+}
-###############################################################################
-# Network #
-###############################################################################
+output "dwh-datasets" {
+ description = "List of bigquery datasets created for the dwh project."
+ value = [for k, datasets in module.dwh-bq : datasets.dataset_id]
+}
-output "transformation-vpc-info" {
+output "landing-buckets" {
+ description = "List of buckets created for the landing project."
+ value = [for k, bucket in module.landing-buckets : bucket.name]
+}
+
+output "landing-pubsub" {
+ description = "List of pubsub topics and subscriptions created for the landing project."
+ value = {
+ for t in module.landing-pubsub : t.topic.name => {
+ id = t.topic.id
+ subscriptions = { for s in t.subscriptions : s.name => s.id }
+ }
+ }
+}
+
+output "transformation-buckets" {
+ description = "List of buckets created for the transformation project."
+ value = [for k, bucket in module.transformation-buckets : bucket.name]
+}
+
+output "transformation-vpc" {
description = "Transformation VPC details"
value = {
name = module.vpc-transformation.name
subnets = {
- for s in module.vpc-transformation.subnets : s.name => {
- gateway_address = s.gateway_address
- ip_cidr_range = s.ip_cidr_range
- private_ip_google_access = s.private_ip_google_access
- region = s.region
- }
- }
- }
-}
-
-###############################################################################
-# GCS #
-###############################################################################
-
-output "landing-bucket-names" {
- description = "List of buckets created for the landing project"
- value = [for k, bucket in module.bucket-landing : "${bucket.name}"]
-}
-
-output "transformation-bucket-names" {
- description = "List of buckets created for the transformation project"
- value = [for k, bucket in module.bucket-transformation : "${bucket.name}"]
-}
-
-###############################################################################
-# Bigquery #
-###############################################################################
-
-output "dwh-bigquery-datasets-list" {
- description = "List of bigquery datasets created for the dwh project"
- value = [for k, datasets in module.bigquery-datasets-dwh : "${datasets.dataset_id}"]
-}
-
-output "datamart-bigquery-datasets-list" {
- description = "List of bigquery datasets created for the datamart project"
- value = [for k, datasets in module.bigquery-datasets-datamart : "${datasets.dataset_id}"]
-}
-
-###############################################################################
-# Pub/Sub #
-###############################################################################
-
-output "landing-pubsub-list" {
- description = "List of pubsub topics and subscriptions created for the landing project"
- value = {
- for t in module.pubsub-landing : t.topic.name => {
- name = t.topic.name
- id = t.topic.id
- subscriptions = {
- for s in t.subscriptions : s.name => {
- name = s.name
- id = s.id
- }
+ for k, s in module.vpc-transformation.subnets : k => {
+ ip_cidr_range = s.ip_cidr_range
+ region = s.region
}
}
}
diff --git a/data-solutions/data-platform-foundations/resources/variables.tf b/data-solutions/data-platform-foundations/resources/variables.tf
index 3f599b5d..bd139724 100644
--- a/data-solutions/data-platform-foundations/resources/variables.tf
+++ b/data-solutions/data-platform-foundations/resources/variables.tf
@@ -14,15 +14,17 @@
variable "datamart_bq_datasets" {
description = "Datamart Bigquery datasets"
- type = map(any)
+ type = map(object({
+ iam = map(list(string))
+ location = string
+ }))
default = {
bq_datamart_dataset = {
- id = "bq_datamart_dataset"
location = "EU"
iam = {
- "roles/bigquery.dataOwner" = []
- "roles/bigquery.dataEditor" = []
- "roles/bigquery.dataViewer" = []
+ # "roles/bigquery.dataOwner" = []
+ # "roles/bigquery.dataEditor" = []
+ # "roles/bigquery.dataViewer" = []
}
}
}
@@ -30,69 +32,62 @@ variable "datamart_bq_datasets" {
variable "dwh_bq_datasets" {
description = "DWH Bigquery datasets"
- type = map(any)
+ type = map(object({
+ location = string
+ iam = map(list(string))
+ }))
default = {
bq_raw_dataset = {
- id = "bq_raw_dataset"
+ iam = {}
location = "EU"
- iam = {
- "roles/bigquery.dataOwner" = []
- "roles/bigquery.dataEditor" = []
- "roles/bigquery.dataViewer" = []
- }
}
}
}
variable "landing_buckets" {
description = "List of landing buckets to create"
- type = map(any)
+ type = map(object({
+ location = string
+ name = string
+ }))
default = {
raw-data = {
+ location = "EU"
name = "raw-data"
- location = "EU"
- },
+ }
data-schema = {
- name = "data-schema"
location = "EU"
- },
+ name = "data-schema"
+ }
}
}
variable "landing_pubsub" {
description = "List of landing pubsub topics and subscriptions to create"
- type = map(any)
+ type = map(map(object({
+ iam = map(list(string))
+ labels = map(string)
+ options = object({
+ ack_deadline_seconds = number
+ message_retention_duration = number
+ retain_acked_messages = bool
+ expiration_policy_ttl = number
+ })
+ })))
default = {
- landing_1 = {
- name = "landing-1"
- subscriptions = {
- sub1 = {
- labels = {},
- options = {
- ack_deadline_seconds = null
- message_retention_duration = null
- retain_acked_messages = false
- expiration_policy_ttl = null
- }
- },
- sub2 = {
- labels = {},
- options = {
- ack_deadline_seconds = null
- message_retention_duration = null
- retain_acked_messages = false
- expiration_policy_ttl = null
- }
- },
- }
- subscription_iam = {
- sub1 = {
- "roles/pubsub.subscriber" = []
- }
- sub2 = {
- "roles/pubsub.subscriber" = []
+ landing-1 = {
+ sub1 = {
+ iam = {
+ # "roles/pubsub.subscriber" = []
}
+ labels = {}
+ options = null
}
+ sub2 = {
+ iam = {}
+ labels = {},
+ options = null
+ },
}
}
}
@@ -115,7 +110,7 @@ variable "project_ids" {
}
-variable "project_service_account" {
+variable "service_account_names" {
description = "Project service accounts list."
type = object({
datamart = string
@@ -135,28 +130,36 @@ variable "project_service_account" {
variable "transformation_buckets" {
description = "List of transformation buckets to create"
- type = map(any)
+ type = map(object({
+ location = string
+ name = string
+ }))
default = {
temp = {
- name = "temp"
location = "EU"
+ name = "temp"
},
templates = {
- name = "templates"
location = "EU"
+ name = "templates"
},
}
}
variable "transformation_subnets" {
description = "List of subnets to create in the transformation Project."
- type = list(any)
+ type = list(object({
+ ip_cidr_range = string
+ name = string
+ region = string
+ secondary_ip_range = map(string)
+ }))
default = [
{
- name = "transformation-subnet",
- ip_cidr_range = "10.1.0.0/20",
- secondary_ip_range = {},
+ ip_cidr_range = "10.1.0.0/20"
+ name = "transformation-subnet"
region = "europe-west3"
+ secondary_ip_range = {}
},
]
}