Improve Minimal Data Platform Blueprint (#1473)
* Add SA to access to Curated resources * Add BQ dataset in the landing project * Provide example to move data from landing to curated using BQ engine * Improve diagram
This commit is contained in:
parent
6fcb010ff2
commit
099ad03910
|
@ -16,7 +16,7 @@
|
||||||
|
|
||||||
locals {
|
locals {
|
||||||
iam_lnd = {
|
iam_lnd = {
|
||||||
"roles/storage.objectCreator" = [module.land-sa-cs-0.iam_email]
|
"roles/storage.objectCreator" = [module.land-sa-0.iam_email]
|
||||||
"roles/storage.objectViewer" = [module.processing-sa-cmp-0.iam_email]
|
"roles/storage.objectViewer" = [module.processing-sa-cmp-0.iam_email]
|
||||||
"roles/storage.objectAdmin" = [module.processing-sa-0.iam_email]
|
"roles/storage.objectAdmin" = [module.processing-sa-0.iam_email]
|
||||||
}
|
}
|
||||||
|
@ -36,6 +36,9 @@ module "land-project" {
|
||||||
iam = var.project_config.billing_account_id != null ? local.iam_lnd : null
|
iam = var.project_config.billing_account_id != null ? local.iam_lnd : null
|
||||||
iam_additive = var.project_config.billing_account_id == null ? local.iam_lnd : null
|
iam_additive = var.project_config.billing_account_id == null ? local.iam_lnd : null
|
||||||
services = [
|
services = [
|
||||||
|
"bigquery.googleapis.com",
|
||||||
|
"bigqueryreservation.googleapis.com",
|
||||||
|
"bigquerystorage.googleapis.com",
|
||||||
"cloudkms.googleapis.com",
|
"cloudkms.googleapis.com",
|
||||||
"cloudresourcemanager.googleapis.com",
|
"cloudresourcemanager.googleapis.com",
|
||||||
"iam.googleapis.com",
|
"iam.googleapis.com",
|
||||||
|
@ -52,12 +55,12 @@ module "land-project" {
|
||||||
|
|
||||||
# Cloud Storage
|
# Cloud Storage
|
||||||
|
|
||||||
module "land-sa-cs-0" {
|
module "land-sa-0" {
|
||||||
source = "../../../modules/iam-service-account"
|
source = "../../../modules/iam-service-account"
|
||||||
project_id = module.land-project.project_id
|
project_id = module.land-project.project_id
|
||||||
prefix = var.prefix
|
prefix = var.prefix
|
||||||
name = "lnd-cs-0"
|
name = "lnd-sa-0"
|
||||||
display_name = "Data platform GCS landing service account."
|
display_name = "Data platform landing zone service account."
|
||||||
iam = {
|
iam = {
|
||||||
"roles/iam.serviceAccountTokenCreator" = [
|
"roles/iam.serviceAccountTokenCreator" = [
|
||||||
local.groups_iam.data-engineers
|
local.groups_iam.data-engineers
|
||||||
|
@ -75,3 +78,11 @@ module "land-cs-0" {
|
||||||
encryption_key = var.service_encryption_keys.storage
|
encryption_key = var.service_encryption_keys.storage
|
||||||
force_destroy = var.data_force_destroy
|
force_destroy = var.data_force_destroy
|
||||||
}
|
}
|
||||||
|
|
||||||
|
module "land-bq-0" {
|
||||||
|
source = "../../../modules/bigquery-dataset"
|
||||||
|
project_id = module.land-project.project_id
|
||||||
|
id = "${replace(var.prefix, "-", "_")}_lnd_bq_0"
|
||||||
|
location = var.location
|
||||||
|
encryption_key = var.service_encryption_keys.bq
|
||||||
|
}
|
||||||
|
|
|
@ -49,7 +49,7 @@ locals {
|
||||||
}
|
}
|
||||||
processing_subnet = (
|
processing_subnet = (
|
||||||
local.use_shared_vpc
|
local.use_shared_vpc
|
||||||
? var.network_config.subnet_self_links.processing_transformation
|
? var.network_config.subnet_self_link
|
||||||
: module.processing-vpc.0.subnet_self_links["${var.region}/${var.prefix}-processing"]
|
: module.processing-vpc.0.subnet_self_links["${var.region}/${var.prefix}-processing"]
|
||||||
)
|
)
|
||||||
processing_vpc = (
|
processing_vpc = (
|
||||||
|
@ -57,8 +57,6 @@ locals {
|
||||||
? var.network_config.network_self_link
|
? var.network_config.network_self_link
|
||||||
: module.processing-vpc.0.self_link
|
: module.processing-vpc.0.self_link
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
module "processing-project" {
|
module "processing-project" {
|
||||||
|
|
|
@ -18,33 +18,41 @@ locals {
|
||||||
cur_iam = {
|
cur_iam = {
|
||||||
"roles/bigquery.dataOwner" = [module.processing-sa-0.iam_email]
|
"roles/bigquery.dataOwner" = [module.processing-sa-0.iam_email]
|
||||||
"roles/bigquery.dataViewer" = [
|
"roles/bigquery.dataViewer" = [
|
||||||
|
module.cur-sa-0.iam_email,
|
||||||
local.groups_iam.data-analysts,
|
local.groups_iam.data-analysts,
|
||||||
local.groups_iam.data-engineers
|
local.groups_iam.data-engineers
|
||||||
]
|
]
|
||||||
"roles/bigquery.jobUser" = [
|
"roles/bigquery.jobUser" = [
|
||||||
module.processing-sa-0.iam_email,
|
module.processing-sa-0.iam_email, # Remove once bug is fixed. https://github.com/apache/airflow/issues/32106
|
||||||
|
module.cur-sa-0.iam_email,
|
||||||
local.groups_iam.data-analysts,
|
local.groups_iam.data-analysts,
|
||||||
local.groups_iam.data-engineers
|
local.groups_iam.data-engineers
|
||||||
]
|
]
|
||||||
"roles/datacatalog.tagTemplateViewer" = [
|
"roles/datacatalog.tagTemplateViewer" = [
|
||||||
local.groups_iam.data-analysts, local.groups_iam.data-engineers
|
module.cur-sa-0.iam_email,
|
||||||
|
local.groups_iam.data-analysts,
|
||||||
|
local.groups_iam.data-engineers
|
||||||
]
|
]
|
||||||
"roles/datacatalog.viewer" = [
|
"roles/datacatalog.viewer" = [
|
||||||
local.groups_iam.data-analysts, local.groups_iam.data-engineers
|
module.cur-sa-0.iam_email,
|
||||||
|
local.groups_iam.data-analysts,
|
||||||
|
local.groups_iam.data-engineers
|
||||||
]
|
]
|
||||||
"roles/storage.objectViewer" = [
|
"roles/storage.objectViewer" = [
|
||||||
local.groups_iam.data-analysts, local.groups_iam.data-engineers
|
module.cur-sa-0.iam_email,
|
||||||
|
local.groups_iam.data-analysts,
|
||||||
|
local.groups_iam.data-engineers
|
||||||
]
|
]
|
||||||
"roles/storage.objectAdmin" = [module.processing-sa-0.iam_email]
|
"roles/storage.objectAdmin" = [module.processing-sa-0.iam_email]
|
||||||
}
|
}
|
||||||
cur_services = [
|
cur_services = [
|
||||||
"iam.googleapis.com",
|
|
||||||
"bigquery.googleapis.com",
|
"bigquery.googleapis.com",
|
||||||
"bigqueryreservation.googleapis.com",
|
"bigqueryreservation.googleapis.com",
|
||||||
"bigquerystorage.googleapis.com",
|
"bigquerystorage.googleapis.com",
|
||||||
"cloudkms.googleapis.com",
|
"cloudkms.googleapis.com",
|
||||||
"cloudresourcemanager.googleapis.com",
|
"cloudresourcemanager.googleapis.com",
|
||||||
"compute.googleapis.com",
|
"compute.googleapis.com",
|
||||||
|
"iam.googleapis.com",
|
||||||
"servicenetworking.googleapis.com",
|
"servicenetworking.googleapis.com",
|
||||||
"serviceusage.googleapis.com",
|
"serviceusage.googleapis.com",
|
||||||
"stackdriver.googleapis.com",
|
"stackdriver.googleapis.com",
|
||||||
|
@ -75,6 +83,19 @@ module "cur-project" {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
module "cur-sa-0" {
|
||||||
|
source = "../../../modules/iam-service-account"
|
||||||
|
project_id = module.cur-project.project_id
|
||||||
|
prefix = var.prefix
|
||||||
|
name = "cur-sa-0"
|
||||||
|
display_name = "Data platform curated zone service account."
|
||||||
|
iam = {
|
||||||
|
"roles/iam.serviceAccountTokenCreator" = [
|
||||||
|
local.groups_iam.data-engineers
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
# Bigquery
|
# Bigquery
|
||||||
|
|
||||||
module "cur-bq-0" {
|
module "cur-bq-0" {
|
||||||
|
|
|
@ -9,7 +9,7 @@ Legend: <code>+</code> additive, <code>•</code> conditional.
|
||||||
|<b>gcp-data-analysts</b><br><small><i>group</i></small>|[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) |
|
|<b>gcp-data-analysts</b><br><small><i>group</i></small>|[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) |
|
||||||
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/dlp.estimatesAdmin](https://cloud.google.com/iam/docs/understanding-roles#dlp.estimatesAdmin) <br>[roles/dlp.reader](https://cloud.google.com/iam/docs/understanding-roles#dlp.reader) <br>[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) |
|
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/dlp.estimatesAdmin](https://cloud.google.com/iam/docs/understanding-roles#dlp.estimatesAdmin) <br>[roles/dlp.reader](https://cloud.google.com/iam/docs/understanding-roles#dlp.reader) <br>[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) |
|
||||||
|<b>gcp-data-security</b><br><small><i>group</i></small>|[roles/datacatalog.admin](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.admin) <br>[roles/dlp.admin](https://cloud.google.com/iam/docs/understanding-roles#dlp.admin) |
|
|<b>gcp-data-security</b><br><small><i>group</i></small>|[roles/datacatalog.admin](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.admin) <br>[roles/dlp.admin](https://cloud.google.com/iam/docs/understanding-roles#dlp.admin) |
|
||||||
|<b>prc-dp-0</b><br><small><i>serviceAccount</i></small>|[roles/datacatalog.categoryFineGrainedReader](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryFineGrainedReader) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) |
|
|<b>prc-0</b><br><small><i>serviceAccount</i></small>|[roles/datacatalog.categoryFineGrainedReader](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryFineGrainedReader) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) |
|
||||||
|
|
||||||
## Project <i>cur</i>
|
## Project <i>cur</i>
|
||||||
|
|
||||||
|
@ -18,15 +18,16 @@ Legend: <code>+</code> additive, <code>•</code> conditional.
|
||||||
|<b>gcp-data-analysts</b><br><small><i>group</i></small>|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|
|<b>gcp-data-analysts</b><br><small><i>group</i></small>|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|
||||||
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|
||||||
|<b>SERVICE_IDENTITY_service-networking</b><br><small><i>serviceAccount</i></small>|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) <code>+</code>|
|
|<b>SERVICE_IDENTITY_service-networking</b><br><small><i>serviceAccount</i></small>|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) <code>+</code>|
|
||||||
|<b>prc-dp-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|
|<b>cur-sa-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|
||||||
|
|<b>prc-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|
||||||
|
|
||||||
## Project <i>lnd</i>
|
## Project <i>lnd</i>
|
||||||
|
|
||||||
| members | roles |
|
| members | roles |
|
||||||
|---|---|
|
|---|---|
|
||||||
|<b>lnd-cs-0</b><br><small><i>serviceAccount</i></small>|[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator) |
|
|<b>lnd-sa-0</b><br><small><i>serviceAccount</i></small>|[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator) |
|
||||||
|
|<b>prc-0</b><br><small><i>serviceAccount</i></small>|[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|
||||||
|<b>prc-cmp-0</b><br><small><i>serviceAccount</i></small>|[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|
|<b>prc-cmp-0</b><br><small><i>serviceAccount</i></small>|[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|
||||||
|<b>prc-dp-0</b><br><small><i>serviceAccount</i></small>|[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|
|
||||||
|
|
||||||
## Project <i>prc</i>
|
## Project <i>prc</i>
|
||||||
|
|
||||||
|
@ -35,5 +36,5 @@ Legend: <code>+</code> additive, <code>•</code> conditional.
|
||||||
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/composer.admin](https://cloud.google.com/iam/docs/understanding-roles#composer.admin) <br>[roles/composer.environmentAndStorageObjectAdmin](https://cloud.google.com/iam/docs/understanding-roles#composer.environmentAndStorageObjectAdmin) <br>[roles/iam.serviceAccountUser](https://cloud.google.com/iam/docs/understanding-roles#iam.serviceAccountUser) <br>[roles/iap.httpsResourceAccessor](https://cloud.google.com/iam/docs/understanding-roles#iap.httpsResourceAccessor) <br>[roles/serviceusage.serviceUsageConsumer](https://cloud.google.com/iam/docs/understanding-roles#serviceusage.serviceUsageConsumer) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) |
|
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/composer.admin](https://cloud.google.com/iam/docs/understanding-roles#composer.admin) <br>[roles/composer.environmentAndStorageObjectAdmin](https://cloud.google.com/iam/docs/understanding-roles#composer.environmentAndStorageObjectAdmin) <br>[roles/iam.serviceAccountUser](https://cloud.google.com/iam/docs/understanding-roles#iam.serviceAccountUser) <br>[roles/iap.httpsResourceAccessor](https://cloud.google.com/iam/docs/understanding-roles#iap.httpsResourceAccessor) <br>[roles/serviceusage.serviceUsageConsumer](https://cloud.google.com/iam/docs/understanding-roles#serviceusage.serviceUsageConsumer) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) |
|
||||||
|<b>SERVICE_IDENTITY_cloudcomposer-accounts</b><br><small><i>serviceAccount</i></small>|[roles/composer.ServiceAgentV2Ext](https://cloud.google.com/iam/docs/understanding-roles#composer.ServiceAgentV2Ext) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) |
|
|<b>SERVICE_IDENTITY_cloudcomposer-accounts</b><br><small><i>serviceAccount</i></small>|[roles/composer.ServiceAgentV2Ext](https://cloud.google.com/iam/docs/understanding-roles#composer.ServiceAgentV2Ext) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) |
|
||||||
|<b>SERVICE_IDENTITY_service-networking</b><br><small><i>serviceAccount</i></small>|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) <code>+</code>|
|
|<b>SERVICE_IDENTITY_service-networking</b><br><small><i>serviceAccount</i></small>|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) <code>+</code>|
|
||||||
|<b>prc-cmp-0</b><br><small><i>serviceAccount</i></small>|[roles/composer.worker](https://cloud.google.com/iam/docs/understanding-roles#composer.worker) <br>[roles/dataproc.editor](https://cloud.google.com/iam/docs/understanding-roles#dataproc.editor) <br>[roles/iam.serviceAccountUser](https://cloud.google.com/iam/docs/understanding-roles#iam.serviceAccountUser) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) |
|
|<b>prc-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/dataflow.worker](https://cloud.google.com/iam/docs/understanding-roles#dataflow.worker) <br>[roles/dataproc.worker](https://cloud.google.com/iam/docs/understanding-roles#dataproc.worker) |
|
||||||
|<b>prc-dp-0</b><br><small><i>serviceAccount</i></small>|[roles/dataproc.worker](https://cloud.google.com/iam/docs/understanding-roles#dataproc.worker) |
|
|<b>prc-cmp-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/composer.worker](https://cloud.google.com/iam/docs/understanding-roles#composer.worker) <br>[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) <br>[roles/dataproc.editor](https://cloud.google.com/iam/docs/understanding-roles#dataproc.editor) <br>[roles/iam.serviceAccountUser](https://cloud.google.com/iam/docs/understanding-roles#iam.serviceAccountUser) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) |
|
||||||
|
|
|
@ -203,7 +203,7 @@ module "data-platform" {
|
||||||
prefix = "myprefix"
|
prefix = "myprefix"
|
||||||
}
|
}
|
||||||
|
|
||||||
# tftest modules=21 resources=116
|
# tftest modules=23 resources=123
|
||||||
```
|
```
|
||||||
|
|
||||||
## Customizations
|
## Customizations
|
||||||
|
@ -229,10 +229,7 @@ To configure the use of a shared VPC, configure the `network_config`, example:
|
||||||
network_config = {
|
network_config = {
|
||||||
host_project = "PROJECT_ID"
|
host_project = "PROJECT_ID"
|
||||||
network_self_link = "https://www.googleapis.com/compute/v1/projects/PROJECT_ID/global/networks/NAME"
|
network_self_link = "https://www.googleapis.com/compute/v1/projects/PROJECT_ID/global/networks/NAME"
|
||||||
subnet_self_links = {
|
subnet_self_link = "https://www.googleapis.com/compute/v1/projects/PROJECT_ID/regions/REGION/subnetworks/NAME"
|
||||||
processing_transformation = "https://www.googleapis.com/compute/v1/projects/PROJECT_ID/regions/REGION/subnetworks/NAME"
|
|
||||||
processing_composer = "https://www.googleapis.com/compute/v1/projects/PROJECT_ID/regions/REGION/subnetworks/NAME"
|
|
||||||
}
|
|
||||||
composer_ip_ranges = {
|
composer_ip_ranges = {
|
||||||
cloudsql = "192.168.XXX.XXX/24"
|
cloudsql = "192.168.XXX.XXX/24"
|
||||||
gke_master = "192.168.XXX.XXX/28"
|
gke_master = "192.168.XXX.XXX/28"
|
||||||
|
@ -280,32 +277,31 @@ The application layer is out of scope of this script. As a demo purpuse only, on
|
||||||
|
|
||||||
| name | description | type | required | default |
|
| name | description | type | required | default |
|
||||||
|---|---|:---:|:---:|:---:|
|
|---|---|:---:|:---:|:---:|
|
||||||
| [organization_domain](variables.tf#L122) | Organization domain. | <code>string</code> | ✓ | |
|
| [organization_domain](variables.tf#L119) | Organization domain. | <code>string</code> | ✓ | |
|
||||||
| [prefix](variables.tf#L127) | Prefix used for resource names. | <code>string</code> | ✓ | |
|
| [prefix](variables.tf#L124) | Prefix used for resource names. | <code>string</code> | ✓ | |
|
||||||
| [project_config](variables.tf#L136) | Provide 'billing_account_id' value if project creation is needed, uses existing 'project_ids' if null. Parent is in 'folders/nnn' or 'organizations/nnn' format. | <code title="object({ billing_account_id = optional(string, null) parent = string project_ids = optional(object({ landing = string processing = string curated = string common = string }), { landing = "lnd" processing = "prc" curated = "cur" common = "cmn" } ) })">object({…})</code> | ✓ | |
|
| [project_config](variables.tf#L133) | Provide 'billing_account_id' value if project creation is needed, uses existing 'project_ids' if null. Parent is in 'folders/nnn' or 'organizations/nnn' format. | <code title="object({ billing_account_id = optional(string, null) parent = string project_ids = optional(object({ landing = string processing = string curated = string common = string }), { landing = "lnd" processing = "prc" curated = "cur" common = "cmn" } ) })">object({…})</code> | ✓ | |
|
||||||
| [composer_config](variables.tf#L17) | Cloud Composer config. | <code title="object({ environment_size = optional(string, "ENVIRONMENT_SIZE_SMALL") software_config = optional(object({ airflow_config_overrides = optional(map(string), {}) pypi_packages = optional(map(string), {}) env_variables = optional(map(string), {}) image_version = optional(string, "composer-2-airflow-2") }), {}) workloads_config = optional(object({ scheduler = optional(object({ cpu = optional(number, 0.5) memory_gb = optional(number, 1.875) storage_gb = optional(number, 1) count = optional(number, 1) } ), {}) web_server = optional(object({ cpu = optional(number, 0.5) memory_gb = optional(number, 1.875) storage_gb = optional(number, 1) }), {}) worker = optional(object({ cpu = optional(number, 0.5) memory_gb = optional(number, 1.875) storage_gb = optional(number, 1) min_count = optional(number, 1) max_count = optional(number, 3) } ), {}) }), {}) })">object({…})</code> | | <code>{}</code> |
|
| [composer_config](variables.tf#L17) | Cloud Composer config. | <code title="object({ environment_size = optional(string, "ENVIRONMENT_SIZE_SMALL") software_config = optional(object({ airflow_config_overrides = optional(map(string), {}) pypi_packages = optional(map(string), {}) env_variables = optional(map(string), {}) image_version = optional(string, "composer-2-airflow-2") }), {}) workloads_config = optional(object({ scheduler = optional(object({ cpu = optional(number, 0.5) memory_gb = optional(number, 1.875) storage_gb = optional(number, 1) count = optional(number, 1) } ), {}) web_server = optional(object({ cpu = optional(number, 0.5) memory_gb = optional(number, 1.875) storage_gb = optional(number, 1) }), {}) worker = optional(object({ cpu = optional(number, 0.5) memory_gb = optional(number, 1.875) storage_gb = optional(number, 1) min_count = optional(number, 1) max_count = optional(number, 3) } ), {}) }), {}) })">object({…})</code> | | <code>{}</code> |
|
||||||
| [data_catalog_tags](variables.tf#L54) | List of Data Catalog Policy tags to be created with optional IAM binging configuration in {tag => {ROLE => [MEMBERS]}} format. | <code>map(map(list(string)))</code> | | <code title="{ "3_Confidential" = null "2_Private" = null "1_Sensitive" = null }">{…}</code> |
|
| [data_catalog_tags](variables.tf#L54) | List of Data Catalog Policy tags to be created with optional IAM binging configuration in {tag => {ROLE => [MEMBERS]}} format. | <code>map(map(list(string)))</code> | | <code title="{ "3_Confidential" = null "2_Private" = null "1_Sensitive" = null }">{…}</code> |
|
||||||
| [data_force_destroy](variables.tf#L65) | Flag to set 'force_destroy' on data services like BiguQery or Cloud Storage. | <code>bool</code> | | <code>false</code> |
|
| [data_force_destroy](variables.tf#L65) | Flag to set 'force_destroy' on data services like BiguQery or Cloud Storage. | <code>bool</code> | | <code>false</code> |
|
||||||
| [enable_services](variables.tf#L71) | Flag to enable or disable services in the Data Platform. | <code title="object({ composer = optional(bool, true) dataproc_history_server = optional(bool, true) })">object({…})</code> | | <code>{}</code> |
|
| [enable_services](variables.tf#L71) | Flag to enable or disable services in the Data Platform. | <code title="object({ composer = optional(bool, true) dataproc_history_server = optional(bool, true) })">object({…})</code> | | <code>{}</code> |
|
||||||
| [groups](variables.tf#L80) | User groups. | <code>map(string)</code> | | <code title="{ data-analysts = "gcp-data-analysts" data-engineers = "gcp-data-engineers" data-security = "gcp-data-security" }">{…}</code> |
|
| [groups](variables.tf#L80) | User groups. | <code>map(string)</code> | | <code title="{ data-analysts = "gcp-data-analysts" data-engineers = "gcp-data-engineers" data-security = "gcp-data-security" }">{…}</code> |
|
||||||
| [location](variables.tf#L90) | Location used for multi-regional resources. | <code>string</code> | | <code>"eu"</code> |
|
| [location](variables.tf#L90) | Location used for multi-regional resources. | <code>string</code> | | <code>"eu"</code> |
|
||||||
| [network_config](variables.tf#L96) | Shared VPC network configurations to use. If null networks will be created in projects. | <code title="object({ host_project = optional(string) network_self_link = optional(string) subnet_self_links = optional(object({ processing_transformation = string processing_composer = string }), null) composer_ip_ranges = optional(object({ connection_subnetwork = optional(string) cloud_sql = optional(string, "10.20.10.0/24") gke_master = optional(string, "10.20.11.0/28") pods_range_name = optional(string, "pods") services_range_name = optional(string, "services") }), {}) })">object({…})</code> | | <code>{}</code> |
|
| [network_config](variables.tf#L96) | Shared VPC network configurations to use. If null networks will be created in projects. | <code title="object({ host_project = optional(string) network_self_link = optional(string) subnet_self_link = optional(string) composer_ip_ranges = optional(object({ connection_subnetwork = optional(string) cloud_sql = optional(string, "10.20.10.0/24") gke_master = optional(string, "10.20.11.0/28") pods_range_name = optional(string, "pods") services_range_name = optional(string, "services") }), {}) })">object({…})</code> | | <code>{}</code> |
|
||||||
| [project_suffix](variables.tf#L160) | Suffix used only for project ids. | <code>string</code> | | <code>null</code> |
|
| [project_suffix](variables.tf#L157) | Suffix used only for project ids. | <code>string</code> | | <code>null</code> |
|
||||||
| [region](variables.tf#L166) | Region used for regional resources. | <code>string</code> | | <code>"europe-west1"</code> |
|
| [region](variables.tf#L163) | Region used for regional resources. | <code>string</code> | | <code>"europe-west1"</code> |
|
||||||
| [service_encryption_keys](variables.tf#L172) | Cloud KMS to use to encrypt different services. Key location should match service region. | <code title="object({ bq = optional(string) composer = optional(string) compute = optional(string) storage = optional(string) })">object({…})</code> | | <code>{}</code> |
|
| [service_encryption_keys](variables.tf#L169) | Cloud KMS to use to encrypt different services. Key location should match service region. | <code title="object({ bq = optional(string) composer = optional(string) compute = optional(string) storage = optional(string) })">object({…})</code> | | <code>{}</code> |
|
||||||
|
|
||||||
## Outputs
|
## Outputs
|
||||||
|
|
||||||
| name | description | sensitive |
|
| name | description | sensitive |
|
||||||
|---|---|:---:|
|
|---|---|:---:|
|
||||||
| [bigquery-datasets](outputs.tf#L17) | BigQuery datasets. | |
|
| [bigquery-datasets](outputs.tf#L17) | BigQuery datasets. | |
|
||||||
| [composer](outputs.tf#L24) | Composer variables. | |
|
| [composer](outputs.tf#L25) | Composer variables. | |
|
||||||
| [dataproc-history-server](outputs.tf#L31) | List of bucket names which have been assigned to the cluster. | |
|
| [dataproc-history-server](outputs.tf#L33) | List of bucket names which have been assigned to the cluster. | |
|
||||||
| [gcs_buckets](outputs.tf#L36) | GCS buckets. | |
|
| [gcs_buckets](outputs.tf#L38) | GCS buckets. | |
|
||||||
| [kms_keys](outputs.tf#L46) | Cloud MKS keys. | |
|
| [kms_keys](outputs.tf#L47) | Cloud MKS keys. | |
|
||||||
| [projects](outputs.tf#L51) | GCP Projects informations. | |
|
| [network](outputs.tf#L52) | VPC network. | |
|
||||||
| [service_accounts](outputs.tf#L69) | Service account created. | |
|
| [projects](outputs.tf#L60) | GCP Projects informations. | |
|
||||||
| [vpc_network](outputs.tf#L78) | VPC network. | |
|
| [service_accounts](outputs.tf#L78) | Service account created. | |
|
||||||
| [vpc_subnet](outputs.tf#L86) | VPC subnetworks. | |
|
|
||||||
|
|
||||||
<!-- END TFDOC -->
|
<!-- END TFDOC -->
|
||||||
|
|
|
@ -47,7 +47,7 @@ Below you can find computed commands to perform steps.
|
||||||
```bash
|
```bash
|
||||||
terraform output -json | jq -r '@sh "export LND_SA=\(.service_accounts.value.landing)\nexport PRC_SA=\(.service_accounts.value.processing)\nexport CMP_SA=\(.service_accounts.value.composer)"' > env.sh
|
terraform output -json | jq -r '@sh "export LND_SA=\(.service_accounts.value.landing)\nexport PRC_SA=\(.service_accounts.value.processing)\nexport CMP_SA=\(.service_accounts.value.composer)"' > env.sh
|
||||||
|
|
||||||
terraform output -json | jq -r '@sh "export LND_GCS=\(.gcs_buckets.value.landing_cs_0)\nexport PRC_GCS=\(.gcs_buckets.value.processing_cs_0)\nexport CMP_GCS=\(.gcs_buckets.value.composer)"' >> env.sh
|
terraform output -json | jq -r '@sh "export LND_GCS=\(.gcs_buckets.value.landing)\nexport PRC_GCS=\(.gcs_buckets.value.processing)\nexport CUR_GCS=\(.gcs_buckets.value.curated)\nexport CMP_GCS=\(.composer.value.dag_bucket)"' >> env.sh
|
||||||
|
|
||||||
source ./env.sh
|
source ./env.sh
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,104 @@
|
||||||
|
# Copyright 2022 Google LLC
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# https://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
# --------------------------------------------------------------------------------
|
||||||
|
# Load The Dependencies
|
||||||
|
# --------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
import csv
|
||||||
|
import datetime
|
||||||
|
import io
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
|
||||||
|
from airflow import models
|
||||||
|
from airflow.operators import dummy
|
||||||
|
from airflow.providers.google.cloud.transfers.gcs_to_bigquery import GCSToBigQueryOperator
|
||||||
|
|
||||||
|
# --------------------------------------------------------------------------------
|
||||||
|
# Set variables - Needed for the DEMO
|
||||||
|
# --------------------------------------------------------------------------------
|
||||||
|
BQ_LOCATION = os.environ.get("BQ_LOCATION")
|
||||||
|
CURATED_PRJ = os.environ.get("CURATED_PRJ")
|
||||||
|
CURATED_BQ_DATASET = os.environ.get("CURATED_BQ_DATASET")
|
||||||
|
CURATED_GCS = os.environ.get("CURATED_GCS")
|
||||||
|
LAND_PRJ = os.environ.get("LAND_PRJ")
|
||||||
|
LAND_GCS = os.environ.get("LAND_GCS")
|
||||||
|
PROCESSING_GCS = os.environ.get("PROCESSING_GCS")
|
||||||
|
PROCESSING_SA = os.environ.get("PROCESSING_SA")
|
||||||
|
PROCESSING_PRJ = os.environ.get("PROCESSING_PRJ")
|
||||||
|
PROCESSING_SUBNET = os.environ.get("PROCESSING_SUBNET")
|
||||||
|
PROCESSING_VPC = os.environ.get("PROCESSING_VPC")
|
||||||
|
DP_KMS_KEY = os.environ.get("DP_KMS_KEY", "")
|
||||||
|
DP_REGION = os.environ.get("DP_REGION")
|
||||||
|
DP_ZONE = os.environ.get("DP_REGION") + "-b"
|
||||||
|
|
||||||
|
# --------------------------------------------------------------------------------
|
||||||
|
# Set default arguments
|
||||||
|
# --------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
# If you are running Airflow in more than one time zone
|
||||||
|
# see https://airflow.apache.org/docs/apache-airflow/stable/timezone.html
|
||||||
|
# for best practices
|
||||||
|
yesterday = datetime.datetime.now() - datetime.timedelta(days=1)
|
||||||
|
|
||||||
|
default_args = {
|
||||||
|
'owner': 'airflow',
|
||||||
|
'start_date': yesterday,
|
||||||
|
'depends_on_past': False,
|
||||||
|
'email': [''],
|
||||||
|
'email_on_failure': False,
|
||||||
|
'email_on_retry': False,
|
||||||
|
'retries': 1,
|
||||||
|
'retry_delay': datetime.timedelta(minutes=5),
|
||||||
|
}
|
||||||
|
|
||||||
|
# --------------------------------------------------------------------------------
|
||||||
|
# Main DAG
|
||||||
|
# --------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
with models.DAG(
|
||||||
|
'bq_gcs2bq',
|
||||||
|
default_args=default_args,
|
||||||
|
schedule_interval=None) as dag:
|
||||||
|
start = dummy.DummyOperator(
|
||||||
|
task_id='start',
|
||||||
|
trigger_rule='all_success'
|
||||||
|
)
|
||||||
|
|
||||||
|
end = dummy.DummyOperator(
|
||||||
|
task_id='end',
|
||||||
|
trigger_rule='all_success'
|
||||||
|
)
|
||||||
|
|
||||||
|
# Bigquery Tables automatically created for demo porpuse.
|
||||||
|
# Consider a dedicated pipeline or tool for a real life scenario.
|
||||||
|
|
||||||
|
customers_import = GCSToBigQueryOperator(
|
||||||
|
task_id='csv_to_bigquery',
|
||||||
|
bucket=LAND_GCS[5:],
|
||||||
|
source_objects=['customers.csv'],
|
||||||
|
destination_project_dataset_table='{}:{}.{}'.format(CURATED_PRJ, CURATED_BQ_DATASET, 'customers'),
|
||||||
|
create_disposition='CREATE_IF_NEEDED',
|
||||||
|
write_disposition='WRITE_APPEND',
|
||||||
|
schema_update_options=['ALLOW_FIELD_RELAXATION', 'ALLOW_FIELD_ADDITION'],
|
||||||
|
schema_object="customers.json",
|
||||||
|
schema_object_bucket=PROCESSING_GCS[5:],
|
||||||
|
project_id=PROCESSING_PRJ, # The process will continue to run on the dataset project until the Apache Airflow bug is fixed. https://github.com/apache/airflow/issues/32106
|
||||||
|
impersonation_chain=[PROCESSING_SA]
|
||||||
|
)
|
||||||
|
|
||||||
|
start >> customers_import >> end
|
||||||
|
|
Binary file not shown.
Before Width: | Height: | Size: 102 KiB After Width: | Height: | Size: 174 KiB |
|
@ -17,7 +17,8 @@
|
||||||
output "bigquery-datasets" {
|
output "bigquery-datasets" {
|
||||||
description = "BigQuery datasets."
|
description = "BigQuery datasets."
|
||||||
value = {
|
value = {
|
||||||
curated = module.cur-bq-0.dataset_id,
|
curated = module.cur-bq-0.dataset_id
|
||||||
|
landing = module.land-bq-0.dataset_id
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -25,6 +26,7 @@ output "composer" {
|
||||||
description = "Composer variables."
|
description = "Composer variables."
|
||||||
value = {
|
value = {
|
||||||
air_flow_uri = try(google_composer_environment.processing-cmp-0[0].config.0.airflow_uri, null)
|
air_flow_uri = try(google_composer_environment.processing-cmp-0[0].config.0.airflow_uri, null)
|
||||||
|
dag_bucket = try(google_composer_environment.processing-cmp-0[0].config[0].dag_gcs_prefix, null)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -36,10 +38,9 @@ output "dataproc-history-server" {
|
||||||
output "gcs_buckets" {
|
output "gcs_buckets" {
|
||||||
description = "GCS buckets."
|
description = "GCS buckets."
|
||||||
value = {
|
value = {
|
||||||
landing_cs_0 = module.land-cs-0.name,
|
curated = module.cur-cs-0.name
|
||||||
processing_cs_0 = module.processing-cs-0.name,
|
landing = module.land-cs-0.name
|
||||||
cur_cs_0 = module.cur-cs-0.name,
|
processing = module.processing-cs-0.name
|
||||||
composer = try(google_composer_environment.processing-cmp-0[0].config[0].dag_gcs_prefix, null)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -48,20 +49,28 @@ output "kms_keys" {
|
||||||
value = var.service_encryption_keys
|
value = var.service_encryption_keys
|
||||||
}
|
}
|
||||||
|
|
||||||
|
output "network" {
|
||||||
|
description = "VPC network."
|
||||||
|
value = {
|
||||||
|
processing_subnet = local.processing_subnet
|
||||||
|
processing_vpc = local.processing_vpc
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
output "projects" {
|
output "projects" {
|
||||||
description = "GCP Projects informations."
|
description = "GCP Projects informations."
|
||||||
value = {
|
value = {
|
||||||
project_number = {
|
project_number = {
|
||||||
landing = module.land-project.number,
|
common = module.common-project.number
|
||||||
common = module.common-project.number,
|
curated = module.cur-project.number
|
||||||
curated = module.cur-project.number,
|
landing = module.land-project.number
|
||||||
processing = module.processing-project.number,
|
processing = module.processing-project.number
|
||||||
}
|
}
|
||||||
project_id = {
|
project_id = {
|
||||||
landing = module.land-project.project_id,
|
common = module.common-project.project_id
|
||||||
common = module.common-project.project_id,
|
curated = module.cur-project.project_id
|
||||||
curated = module.cur-project.project_id,
|
landing = module.land-project.project_id
|
||||||
processing = module.processing-project.project_id,
|
processing = module.processing-project.project_id
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -69,24 +78,9 @@ output "projects" {
|
||||||
output "service_accounts" {
|
output "service_accounts" {
|
||||||
description = "Service account created."
|
description = "Service account created."
|
||||||
value = {
|
value = {
|
||||||
landing = module.land-sa-cs-0.email
|
|
||||||
processing = module.processing-sa-0.email
|
|
||||||
composer = module.processing-sa-cmp-0.email
|
composer = module.processing-sa-cmp-0.email
|
||||||
}
|
curated = module.cur-sa-0.email,
|
||||||
}
|
landing = module.land-sa-0.email,
|
||||||
|
processing = module.processing-sa-0.email,
|
||||||
output "vpc_network" {
|
|
||||||
description = "VPC network."
|
|
||||||
value = {
|
|
||||||
processing_transformation = local.processing_vpc
|
|
||||||
processing_composer = local.processing_vpc
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
output "vpc_subnet" {
|
|
||||||
description = "VPC subnetworks."
|
|
||||||
value = {
|
|
||||||
processing_transformation = local.processing_subnet
|
|
||||||
processing_composer = local.processing_subnet
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -98,10 +98,7 @@ variable "network_config" {
|
||||||
type = object({
|
type = object({
|
||||||
host_project = optional(string)
|
host_project = optional(string)
|
||||||
network_self_link = optional(string)
|
network_self_link = optional(string)
|
||||||
subnet_self_links = optional(object({
|
subnet_self_link = optional(string)
|
||||||
processing_transformation = string
|
|
||||||
processing_composer = string
|
|
||||||
}), null)
|
|
||||||
composer_ip_ranges = optional(object({
|
composer_ip_ranges = optional(object({
|
||||||
connection_subnetwork = optional(string)
|
connection_subnetwork = optional(string)
|
||||||
cloud_sql = optional(string, "10.20.10.0/24")
|
cloud_sql = optional(string, "10.20.10.0/24")
|
||||||
|
|
Loading…
Reference in New Issue