Merge pull request #607 from GoogleCloudPlatform/lcaggio/dp-dc-policy-tag

[Data Platform] Add Data Catalog Policy tag
This commit is contained in:
lcaggio 2022-04-04 15:48:53 +02:00 committed by GitHub
commit 56b89211a7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
18 changed files with 719 additions and 99 deletions

View File

@ -67,6 +67,7 @@ resource "google_composer_environment" "orch-cmp-0" {
env_variables = merge(
var.composer_config.env_variables, {
BQ_LOCATION = var.location
DATA_CAT_TAGS = try(jsonencode(module.common-datacatalog.tags), "{}")
DF_KMS_KEY = try(var.service_encryption_keys.dataflow, "")
DTL_L0_PRJ = module.lake-0-project.project_id
DTL_L0_BQ_DATASET = module.lake-0-bq-0.dataset_id

View File

@ -23,6 +23,7 @@ locals {
(local.groups.data-analysts) = [
"roles/bigquery.dataViewer",
"roles/bigquery.jobUser",
"roles/bigquery.metadataViewer",
"roles/bigquery.user",
"roles/datacatalog.viewer",
"roles/datacatalog.tagTemplateViewer",
@ -37,6 +38,7 @@ locals {
(local.groups.data-analysts) = [
"roles/bigquery.dataEditor",
"roles/bigquery.jobUser",
"roles/bigquery.metadataViewer",
"roles/bigquery.user",
"roles/datacatalog.viewer",
"roles/datacatalog.tagTemplateViewer",
@ -44,7 +46,7 @@ locals {
]
}
lake_0_iam = {
"roles/bigquery.dataEditor" = [
"roles/bigquery.dataOwner" = [
module.load-sa-df-0.iam_email,
module.transf-sa-df-0.iam_email,
module.transf-sa-bq-0.iam_email,
@ -52,18 +54,24 @@ locals {
"roles/bigquery.jobUser" = [
module.load-sa-df-0.iam_email,
]
"roles/datacatalog.categoryAdmin" = [
module.transf-sa-bq-0.iam_email
]
"roles/storage.objectCreator" = [
module.load-sa-df-0.iam_email,
]
}
lake_iam = {
"roles/bigquery.dataEditor" = [
"roles/bigquery.dataOwner" = [
module.transf-sa-df-0.iam_email,
module.transf-sa-bq-0.iam_email,
]
"roles/bigquery.jobUser" = [
module.transf-sa-bq-0.iam_email,
]
"roles/datacatalog.categoryAdmin" = [
module.load-sa-df-0.iam_email
]
"roles/storage.objectCreator" = [
module.transf-sa-df-0.iam_email,
]

View File

@ -21,6 +21,9 @@ module "common-project" {
prefix = var.prefix
name = "cmn${local.project_suffix}"
group_iam = {
(local.groups.data-analysts) = [
"roles/datacatalog.viewer",
]
(local.groups.data-engineers) = [
"roles/dlp.reader",
"roles/dlp.user",
@ -28,6 +31,7 @@ module "common-project" {
]
(local.groups.data-security) = [
"roles/dlp.admin",
"roles/datacatalog.admin"
]
}
iam = {
@ -35,6 +39,17 @@ module "common-project" {
module.load-sa-df-0.iam_email,
module.transf-sa-df-0.iam_email
]
"roles/datacatalog.viewer" = [
module.load-sa-df-0.iam_email,
module.transf-sa-df-0.iam_email,
module.transf-sa-bq-0.iam_email
]
"roles/datacatalog.categoryFineGrainedReader" = [
module.transf-sa-df-0.iam_email,
module.transf-sa-bq-0.iam_email,
# Uncomment if you want to grant access to `data-analyst` to all columns tagged.
# local.groups_iam.data-analysts
]
}
services = concat(var.project_services, [
"datacatalog.googleapis.com",
@ -42,6 +57,16 @@ module "common-project" {
])
}
# Data Catalog Policy tag
module "common-datacatalog" {
source = "../../../modules/data-catalog-policy-tag"
project_id = module.common-project.project_id
name = "${var.prefix}-datacatalog-policy-tags"
location = var.location
tags = var.data_catalog_tags
}
# To create KMS keys in the common projet: uncomment this section and assigne key links accondingly in local.service_encryption_keys variable
# module "cmn-kms-0" {

View File

@ -6,45 +6,53 @@ Legend: <code>+</code> additive, <code>•</code> conditional.
| members | roles |
|---|---|
|<b>gcp-data-analysts</b><br><small><i>group</i></small>|[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) |
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/dlp.estimatesAdmin](https://cloud.google.com/iam/docs/understanding-roles#dlp.estimatesAdmin) <br>[roles/dlp.reader](https://cloud.google.com/iam/docs/understanding-roles#dlp.reader) <br>[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) |
|<b>gcp-data-security</b><br><small><i>group</i></small>|[roles/dlp.admin](https://cloud.google.com/iam/docs/understanding-roles#dlp.admin) |
|<b>load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) |
|<b>trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) |
|<b>gcp-data-security</b><br><small><i>group</i></small>|[roles/datacatalog.admin](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.admin) <br>[roles/dlp.admin](https://cloud.google.com/iam/docs/understanding-roles#dlp.admin) |
|<b>load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) |
|<b>trf-bq-0</b><br><small><i>serviceAccount</i></small>|[roles/datacatalog.categoryFineGrainedReader](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryFineGrainedReader) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) |
|<b>trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/datacatalog.categoryFineGrainedReader](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryFineGrainedReader) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) |
## Project <i>dtl-0</i>
| members | roles |
|---|---|
|<b>gcp-data-analysts</b><br><small><i>group</i></small>|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user) <br>[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|<b>gcp-data-analysts</b><br><small><i>group</i></small>|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/bigquery.metadataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.metadataViewer) <br>[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user) <br>[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) |
|<b>load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator) |
|<b>trf-bq-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) |
|<b>trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) |
|<b>SERVICE_IDENTITY_service-networking</b><br><small><i>serviceAccount</i></small>|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) <code>+</code>|
|<b>load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator) |
|<b>trf-bq-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) <br>[roles/datacatalog.categoryAdmin](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryAdmin) |
|<b>trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) |
## Project <i>dtl-1</i>
| members | roles |
|---|---|
|<b>gcp-data-analysts</b><br><small><i>group</i></small>|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user) <br>[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|<b>gcp-data-analysts</b><br><small><i>group</i></small>|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/bigquery.metadataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.metadataViewer) <br>[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user) <br>[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) |
|<b>trf-bq-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) |
|<b>trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|<b>SERVICE_IDENTITY_service-networking</b><br><small><i>serviceAccount</i></small>|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) <code>+</code>|
|<b>load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/datacatalog.categoryAdmin](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryAdmin) |
|<b>trf-bq-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) |
|<b>trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) <br>[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
## Project <i>dtl-2</i>
| members | roles |
|---|---|
|<b>gcp-data-analysts</b><br><small><i>group</i></small>|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user) <br>[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|<b>gcp-data-analysts</b><br><small><i>group</i></small>|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/bigquery.metadataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.metadataViewer) <br>[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user) <br>[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) |
|<b>trf-bq-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) |
|<b>trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|<b>SERVICE_IDENTITY_service-networking</b><br><small><i>serviceAccount</i></small>|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) <code>+</code>|
|<b>load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/datacatalog.categoryAdmin](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryAdmin) |
|<b>trf-bq-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) |
|<b>trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) <br>[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
## Project <i>dtl-plg</i>
| members | roles |
|---|---|
|<b>gcp-data-analysts</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user) <br>[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|<b>gcp-data-analysts</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/bigquery.metadataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.metadataViewer) <br>[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user) <br>[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) |
|<b>SERVICE_IDENTITY_service-networking</b><br><small><i>serviceAccount</i></small>|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) <code>+</code>|
## Project <i>lnd</i>
@ -62,6 +70,8 @@ Legend: <code>+</code> additive, <code>•</code> conditional.
| members | roles |
|---|---|
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/compute.viewer](https://cloud.google.com/iam/docs/understanding-roles#compute.viewer) <br>[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) <br>[roles/dataflow.developer](https://cloud.google.com/iam/docs/understanding-roles#dataflow.developer) <br>[roles/viewer](https://cloud.google.com/iam/docs/understanding-roles#viewer) |
|<b>SERVICE_IDENTITY_dataflow-service-producer-prod</b><br><small><i>serviceAccount</i></small>|[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|<b>SERVICE_IDENTITY_service-networking</b><br><small><i>serviceAccount</i></small>|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) <code>+</code>|
|<b>load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) <br>[roles/dataflow.worker](https://cloud.google.com/iam/docs/understanding-roles#dataflow.worker) <br>[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|<b>orc-cmp-0</b><br><small><i>serviceAccount</i></small>|[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) |
@ -69,7 +79,9 @@ Legend: <code>+</code> additive, <code>•</code> conditional.
| members | roles |
|---|---|
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/cloudbuild.builds.editor](https://cloud.google.com/iam/docs/understanding-roles#cloudbuild.builds.editor) <br>[roles/composer.admin](https://cloud.google.com/iam/docs/understanding-roles#composer.admin) <br>[roles/composer.environmentAndStorageObjectAdmin](https://cloud.google.com/iam/docs/understanding-roles#composer.environmentAndStorageObjectAdmin) <br>[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) <br>[roles/iam.serviceAccountUser](https://cloud.google.com/iam/docs/understanding-roles#iam.serviceAccountUser) <br>[roles/iap.httpsResourceAccessor](https://cloud.google.com/iam/docs/understanding-roles#iap.httpsResourceAccessor) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) <br>[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/cloudbuild.builds.editor](https://cloud.google.com/iam/docs/understanding-roles#cloudbuild.builds.editor) <br>[roles/composer.admin](https://cloud.google.com/iam/docs/understanding-roles#composer.admin) <br>[roles/composer.environmentAndStorageObjectAdmin](https://cloud.google.com/iam/docs/understanding-roles#composer.environmentAndStorageObjectAdmin) <br>[roles/iam.serviceAccountUser](https://cloud.google.com/iam/docs/understanding-roles#iam.serviceAccountUser) <br>[roles/iap.httpsResourceAccessor](https://cloud.google.com/iam/docs/understanding-roles#iap.httpsResourceAccessor) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) <br>[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|<b>SERVICE_IDENTITY_cloudcomposer-accounts</b><br><small><i>serviceAccount</i></small>|[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|<b>SERVICE_IDENTITY_service-networking</b><br><small><i>serviceAccount</i></small>|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) <code>+</code>|
|<b>load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|<b>orc-cmp-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/composer.worker](https://cloud.google.com/iam/docs/understanding-roles#composer.worker) <br>[roles/iam.serviceAccountUser](https://cloud.google.com/iam/docs/understanding-roles#iam.serviceAccountUser) <br>[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|<b>trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) |
@ -79,6 +91,8 @@ Legend: <code>+</code> additive, <code>•</code> conditional.
| members | roles |
|---|---|
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) |
|<b>SERVICE_IDENTITY_dataflow-service-producer-prod</b><br><small><i>serviceAccount</i></small>|[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|<b>SERVICE_IDENTITY_service-networking</b><br><small><i>serviceAccount</i></small>|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) <code>+</code>|
|<b>orc-cmp-0</b><br><small><i>serviceAccount</i></small>|[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) |
|<b>trf-bq-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) |
|<b>trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/dataflow.worker](https://cloud.google.com/iam/docs/understanding-roles#dataflow.worker) <br>[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |

View File

@ -154,6 +154,19 @@ Cloud Data Loss Prevention resources and templates should be stored in the secur
You can find more details and best practices on using DLP to De-identification and re-identification of PII in large-scale datasets in the [GCP documentation](https://cloud.google.com/architecture/de-identification-re-identification-pii-using-cloud-dlp).
## Data Catalog
[Data Catalog](https://cloud.google.com/data-catalog) helps you to document your data entry at scale. Data Catalog relies on [tags](https://cloud.google.com/data-catalog/docs/tags-and-tag-templates#tags) and [tag template](https://cloud.google.com/data-catalog/docs/tags-and-tag-templates#tag-templates) to manage metadata for all data entries in a unified and centralized service. To implement [column-level security](https://cloud.google.com/bigquery/docs/column-level-security-intro) on BigQuery, we suggest to use `Tags` and `Tag templates`.
The default configuration will implement 3 tags:
- `3_Confidential`: policy tag for columns that include very sensitive information, such as credit card numbers.
- `2_Private`: policy tag for columns that include sensitive personal identifiable information (PII) information, such as a person's first name.
- `1_Sensitive`: policy tag for columns that include data that cannot be made public, such as the credit limit.
Anything that is not tagged is available to all users who have access to the data warehouse.
For the porpuse of the example no groups has access to tagged data. You can configure your tags and roles associated by configuring the `data_catalog_tags` variable. We suggest useing the "[Best practices for using policy tags in BigQuery](https://cloud.google.com/bigquery/docs/best-practices-policy-tags)" article as a guide to designing your tags structure and access pattern.
## How to run this script
To deploy this example on your GCP organization, you will need
@ -207,17 +220,10 @@ To do this, you need to remove IAM binging at project-level for the `data-analys
## Demo pipeline
The application layer is out of scope of this script, but as a demo, it is provided with a Cloud Composer DAG to mode data from the `landing` area to the `DataLake L2` dataset.
The application layer is out of scope of this script. As a demo purpuse only, several Cloud Composer DAGs are provided. Demos will import data from the `landing` area to the `DataLake L2` dataset suing different features.
Just follow the commands you find in the `demo_commands` Terraform output, go in the Cloud Composer UI and run the `data_pipeline_dag`.
You can find examples in the `[demo](./demo)` folder.
Description of commands:
- 01: copy sample data to a `landing` Cloud Storage bucket impersonating the `load` service account.
- 02: copy sample data structure definition in the `orchestration` Cloud Storage bucket impersonating the `orchestration` service account.
- 03: copy the Cloud Composer DAG to the Cloud Composer Storage bucket impersonating the `orchestration` service account.
- 04: Open the Cloud Composer Airflow UI and run the imported DAG.
- 05: Run the BigQuery query to see results.
<!-- BEGIN TFDOC -->
## Variables
@ -225,17 +231,18 @@ Description of commands:
| name | description | type | required | default |
|---|---|:---:|:---:|:---:|
| [billing_account_id](variables.tf#L17) | Billing account id. | <code>string</code> | ✓ | |
| [folder_id](variables.tf#L42) | Folder to be used for the networking resources in folders/nnnn format. | <code>string</code> | ✓ | |
| [organization_domain](variables.tf#L87) | Organization domain. | <code>string</code> | ✓ | |
| [prefix](variables.tf#L92) | Unique prefix used for resource names. | <code>string</code> | ✓ | |
| [folder_id](variables.tf#L53) | Folder to be used for the networking resources in folders/nnnn format. | <code>string</code> | ✓ | |
| [organization_domain](variables.tf#L98) | Organization domain. | <code>string</code> | ✓ | |
| [prefix](variables.tf#L103) | Unique prefix used for resource names. | <code>string</code> | ✓ | |
| [composer_config](variables.tf#L22) | Cloud Composer config. | <code title="object&#40;&#123;&#10; node_count &#61; number&#10; airflow_version &#61; string&#10; env_variables &#61; map&#40;string&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | | <code title="&#123;&#10; node_count &#61; 3&#10; airflow_version &#61; &#34;composer-1.17.5-airflow-2.1.4&#34;&#10; env_variables &#61; &#123;&#125;&#10;&#125;">&#123;&#8230;&#125;</code> |
| [data_force_destroy](variables.tf#L36) | Flag to set 'force_destroy' on data services like BiguQery or Cloud Storage. | <code>bool</code> | | <code>false</code> |
| [groups](variables.tf#L53) | User groups. | <code>map&#40;string&#41;</code> | | <code title="&#123;&#10; data-analysts &#61; &#34;gcp-data-analysts&#34;&#10; data-engineers &#61; &#34;gcp-data-engineers&#34;&#10; data-security &#61; &#34;gcp-data-security&#34;&#10;&#125;">&#123;&#8230;&#125;</code> |
| [location](variables.tf#L47) | Location used for multi-regional resources. | <code>string</code> | | <code>&#34;eu&#34;</code> |
| [network_config](variables.tf#L63) | Shared VPC network configurations to use. If null networks will be created in projects with preconfigured values. | <code title="object&#40;&#123;&#10; host_project &#61; string&#10; network_self_link &#61; string&#10; subnet_self_links &#61; object&#40;&#123;&#10; load &#61; string&#10; transformation &#61; string&#10; orchestration &#61; string&#10; &#125;&#41;&#10; composer_ip_ranges &#61; object&#40;&#123;&#10; cloudsql &#61; string&#10; gke_master &#61; string&#10; web_server &#61; string&#10; &#125;&#41;&#10; composer_secondary_ranges &#61; object&#40;&#123;&#10; pods &#61; string&#10; services &#61; string&#10; &#125;&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | | <code>null</code> |
| [project_services](variables.tf#L97) | List of core services enabled on all projects. | <code>list&#40;string&#41;</code> | | <code title="&#91;&#10; &#34;cloudresourcemanager.googleapis.com&#34;,&#10; &#34;iam.googleapis.com&#34;,&#10; &#34;serviceusage.googleapis.com&#34;,&#10; &#34;stackdriver.googleapis.com&#34;&#10;&#93;">&#91;&#8230;&#93;</code> |
| [project_suffix](variables.tf#L108) | Suffix used only for project ids. | <code>string</code> | | <code>null</code> |
| [region](variables.tf#L114) | Region used for regional resources. | <code>string</code> | | <code>&#34;europe-west1&#34;</code> |
| [data_catalog_tags](variables.tf#L36) | List of Data Catalog Policy tags to be created with optional IAM binging configuration in {tag => {ROLE => [MEMBERS]}} format. | <code>map&#40;map&#40;list&#40;string&#41;&#41;&#41;</code> | | <code title="&#123;&#10; &#34;3_Confidential&#34; &#61; null&#10; &#34;2_Private&#34; &#61; null&#10; &#34;1_Sensitive&#34; &#61; null&#10;&#125;">&#123;&#8230;&#125;</code> |
| [data_force_destroy](variables.tf#L47) | Flag to set 'force_destroy' on data services like BiguQery or Cloud Storage. | <code>bool</code> | | <code>false</code> |
| [groups](variables.tf#L64) | User groups. | <code>map&#40;string&#41;</code> | | <code title="&#123;&#10; data-analysts &#61; &#34;gcp-data-analysts&#34;&#10; data-engineers &#61; &#34;gcp-data-engineers&#34;&#10; data-security &#61; &#34;gcp-data-security&#34;&#10;&#125;">&#123;&#8230;&#125;</code> |
| [location](variables.tf#L58) | Location used for multi-regional resources. | <code>string</code> | | <code>&#34;eu&#34;</code> |
| [network_config](variables.tf#L74) | Shared VPC network configurations to use. If null networks will be created in projects with preconfigured values. | <code title="object&#40;&#123;&#10; host_project &#61; string&#10; network_self_link &#61; string&#10; subnet_self_links &#61; object&#40;&#123;&#10; load &#61; string&#10; transformation &#61; string&#10; orchestration &#61; string&#10; &#125;&#41;&#10; composer_ip_ranges &#61; object&#40;&#123;&#10; cloudsql &#61; string&#10; gke_master &#61; string&#10; web_server &#61; string&#10; &#125;&#41;&#10; composer_secondary_ranges &#61; object&#40;&#123;&#10; pods &#61; string&#10; services &#61; string&#10; &#125;&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | | <code>null</code> |
| [project_services](variables.tf#L108) | List of core services enabled on all projects. | <code>list&#40;string&#41;</code> | | <code title="&#91;&#10; &#34;cloudresourcemanager.googleapis.com&#34;,&#10; &#34;iam.googleapis.com&#34;,&#10; &#34;serviceusage.googleapis.com&#34;,&#10; &#34;stackdriver.googleapis.com&#34;&#10;&#93;">&#91;&#8230;&#93;</code> |
| [project_suffix](variables.tf#L119) | Suffix used only for project ids. | <code>string</code> | | <code>null</code> |
| [region](variables.tf#L125) | Region used for regional resources. | <code>string</code> | | <code>&#34;europe-west1&#34;</code> |
## Outputs
@ -254,13 +261,6 @@ Description of commands:
Features to add in future releases:
- Add support for Column level access on BigQuery
- Add example templates for Data Catalog
- Add example on how to use Cloud Data Loss Prevention
- Add solution to handle Tables, Views, and Authorized Views lifecycle
- Add solution to handle Metadata lifecycle
## To Test/Fix
- Composer require "Require OS Login" not enforced
- External Shared-VPC

View File

@ -1,3 +1,32 @@
# Data ingestion Demo
In this folder you can find an example to ingest data on the `data platfoem` instantiated in [here](../). See details in the [README.m](../#demo-pipeline) to run the demo.
In this folder, you can find an example to ingest data on the `data platform` instantiated [here](../).
The example is not intended to be a production-ready code.
## Demo use case
The demo imports purchase data generated by a store.
## Input files
Data are uploaded to the `landing` GCS bucket. File structure:
- `customers.csv`: Comma separate value with customer information in the following format: Customer ID, Name, Surname, Registration Timestamp
- `purchases.csv`: Comma separate value with customer information in the following format: Item ID, Customer ID, Item, Item price, Purchase Timestamp
## Data processing pipelines
Different data pipelines are provided to highlight different features and patterns. For the purpose of the example, a single pipeline handle all data lifecycles. When adapting them to your real use case, you may want to evaluate the option to handle each functional step on a separate pipeline or a dedicated tool. For example, you may want to use `Dataform` to handle data schemas lifecycle.
Below you can find a description of each example:
- Simple import data: [`datapipeline.py`](./datapipeline.py) is a simple pipeline to import provided data from the `landing` Google Cloud Storage bucket to the Data Hub L2 layer joining `customers` and `purchases` tables into `customerpurchase` table.
- Import data with Policy Tags: [`datapipeline_dc_tags.py`](./datapipeline.py) imports provided data from `landing` bucket to the Data Hub L2 layer protecting sensitive data using Data Catalog policy Tags.
- Delete tables: [`delete_table.py`](./delete_table.py) deletes BigQuery tables created by import pipelines.
## Runnin the demo
To run demo examples, please follow the following steps:
- 01: copy sample data to the `landing` Cloud Storage bucket impersonating the `load` service account.
- 02: copy sample data structure definition in the `orchestration` Cloud Storage bucket impersonating the `orchestration` service account.
- 03: copy the Cloud Composer DAG to the Cloud Composer Storage bucket impersonating the `orchestration` service account.
- 04: Open the Cloud Composer Airflow UI and run the imported DAG.
- 05: Run the BigQuery query to see results.
You can find pre-computed commands in the `demo_commands` output variable of the deployed terraform [data pipeline](../).

View File

@ -19,18 +19,21 @@
import csv
import datetime
import io
import json
import logging
import os
from airflow import models
from airflow.contrib.operators.dataflow_operator import DataflowTemplateOperator
from airflow.providers.google.cloud.operators.dataflow import DataflowTemplatedJobStartOperator
from airflow.operators import dummy
from airflow.providers.google.cloud.operators.bigquery import BigQueryInsertJobOperator
from airflow.providers.google.cloud.operators.bigquery import BigQueryInsertJobOperator, BigQueryUpsertTableOperator, BigQueryUpdateTableSchemaOperator
from airflow.utils.task_group import TaskGroup
# --------------------------------------------------------------------------------
# Set variables
# ------------------------------------------------------------
# Set variables - Needed for the DEMO
# --------------------------------------------------------------------------------
BQ_LOCATION = os.environ.get("BQ_LOCATION")
DATA_CAT_TAGS = json.loads(os.environ.get("DATA_CAT_TAGS"))
DTL_L0_PRJ = os.environ.get("DTL_L0_PRJ")
DTL_L0_BQ_DATASET = os.environ.get("DTL_L0_BQ_DATASET")
DTL_L0_GCS = os.environ.get("DTL_L0_GCS")
@ -84,7 +87,6 @@ default_args = {
'retries': 1,
'retry_delay': datetime.timedelta(minutes=5),
'dataflow_default_options': {
'project': LOD_PRJ,
'location': DF_REGION,
'zone': DF_ZONE,
'stagingLocation': LOD_GCS_STAGING,
@ -114,9 +116,13 @@ with models.DAG(
trigger_rule='all_success'
)
customers_import = DataflowTemplateOperator(
task_id="dataflow_customer_import",
# Bigquery Tables automatically created for demo porpuse.
# Consider a dedicated pipeline or tool for a real life scenario.
customers_import = DataflowTemplatedJobStartOperator(
task_id="dataflow_customers_import",
template="gs://dataflow-templates/latest/GCS_Text_to_BigQuery",
project_id=LOD_PRJ,
location=DF_REGION,
parameters={
"javascriptTextTransformFunctionName": "transform",
"JSONPath": ORC_GCS + "/customers_schema.json",
@ -127,9 +133,11 @@ with models.DAG(
},
)
purchases_import = DataflowTemplateOperator(
purchases_import = DataflowTemplatedJobStartOperator(
task_id="dataflow_purchases_import",
template="gs://dataflow-templates/latest/GCS_Text_to_BigQuery",
project_id=LOD_PRJ,
location=DF_REGION,
parameters={
"javascriptTextTransformFunctionName": "transform",
"JSONPath": ORC_GCS + "/purchases_schema.json",
@ -180,13 +188,13 @@ with models.DAG(
'jobType':'QUERY',
'query':{
'query':"""SELECT
customer_id,
purchase_id,
name,
surname,
item,
price,
timestamp
customer_id,
purchase_id,
name,
surname,
item,
price,
timestamp
FROM `{dtl_1_prj}.{dtl_1_dataset}.customer_purchase`
""".format(dtl_1_prj=DTL_L1_PRJ, dtl_1_dataset=DTL_L1_BQ_DATASET, ),
'destinationTable':{
@ -201,4 +209,4 @@ with models.DAG(
impersonation_chain=[TRF_SA_BQ]
)
start >> [customers_import, purchases_import] >> join_customer_purchase >> l2_customer_purchase >> end
start >> [customers_import, purchases_import] >> join_customer_purchase >> l2_customer_purchase >> end

View File

@ -0,0 +1,322 @@
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# --------------------------------------------------------------------------------
# Load The Dependencies
# --------------------------------------------------------------------------------
import csv
import datetime
import io
import json
import logging
import os
from airflow import models
from airflow.providers.google.cloud.operators.dataflow import DataflowTemplatedJobStartOperator
from airflow.operators import dummy
from airflow.providers.google.cloud.operators.bigquery import BigQueryInsertJobOperator, BigQueryUpsertTableOperator, BigQueryUpdateTableSchemaOperator
from airflow.utils.task_group import TaskGroup
# --------------------------------------------------------------------------------
# Set variables - Needed for the DEMO
# --------------------------------------------------------------------------------
BQ_LOCATION = os.environ.get("BQ_LOCATION")
DATA_CAT_TAGS = json.loads(os.environ.get("DATA_CAT_TAGS"))
DTL_L0_PRJ = os.environ.get("DTL_L0_PRJ")
DTL_L0_BQ_DATASET = os.environ.get("DTL_L0_BQ_DATASET")
DTL_L0_GCS = os.environ.get("DTL_L0_GCS")
DTL_L1_PRJ = os.environ.get("DTL_L1_PRJ")
DTL_L1_BQ_DATASET = os.environ.get("DTL_L1_BQ_DATASET")
DTL_L1_GCS = os.environ.get("DTL_L1_GCS")
DTL_L2_PRJ = os.environ.get("DTL_L2_PRJ")
DTL_L2_BQ_DATASET = os.environ.get("DTL_L2_BQ_DATASET")
DTL_L2_GCS = os.environ.get("DTL_L2_GCS")
DTL_PLG_PRJ = os.environ.get("DTL_PLG_PRJ")
DTL_PLG_BQ_DATASET = os.environ.get("DTL_PLG_BQ_DATASET")
DTL_PLG_GCS = os.environ.get("DTL_PLG_GCS")
GCP_REGION = os.environ.get("GCP_REGION")
LND_PRJ = os.environ.get("LND_PRJ")
LND_BQ = os.environ.get("LND_BQ")
LND_GCS = os.environ.get("LND_GCS")
LND_PS = os.environ.get("LND_PS")
LOD_PRJ = os.environ.get("LOD_PRJ")
LOD_GCS_STAGING = os.environ.get("LOD_GCS_STAGING")
LOD_NET_VPC = os.environ.get("LOD_NET_VPC")
LOD_NET_SUBNET = os.environ.get("LOD_NET_SUBNET")
LOD_SA_DF = os.environ.get("LOD_SA_DF")
ORC_PRJ = os.environ.get("ORC_PRJ")
ORC_GCS = os.environ.get("ORC_GCS")
TRF_PRJ = os.environ.get("TRF_PRJ")
TRF_GCS_STAGING = os.environ.get("TRF_GCS_STAGING")
TRF_NET_VPC = os.environ.get("TRF_NET_VPC")
TRF_NET_SUBNET = os.environ.get("TRF_NET_SUBNET")
TRF_SA_DF = os.environ.get("TRF_SA_DF")
TRF_SA_BQ = os.environ.get("TRF_SA_BQ")
DF_KMS_KEY = os.environ.get("DF_KMS_KEY", "")
DF_REGION = os.environ.get("GCP_REGION")
DF_ZONE = os.environ.get("GCP_REGION") + "-b"
# --------------------------------------------------------------------------------
# Set default arguments
# --------------------------------------------------------------------------------
# If you are running Airflow in more than one time zone
# see https://airflow.apache.org/docs/apache-airflow/stable/timezone.html
# for best practices
yesterday = datetime.datetime.now() - datetime.timedelta(days=1)
default_args = {
'owner': 'airflow',
'start_date': yesterday,
'depends_on_past': False,
'email': [''],
'email_on_failure': False,
'email_on_retry': False,
'retries': 1,
'retry_delay': datetime.timedelta(minutes=5),
'dataflow_default_options': {
'location': DF_REGION,
'zone': DF_ZONE,
'stagingLocation': LOD_GCS_STAGING,
'tempLocation': LOD_GCS_STAGING + "/tmp",
'serviceAccountEmail': LOD_SA_DF,
'subnetwork': LOD_NET_SUBNET,
'ipConfiguration': "WORKER_IP_PRIVATE",
'kmsKeyName' : DF_KMS_KEY
},
}
# --------------------------------------------------------------------------------
# Main DAG
# --------------------------------------------------------------------------------
with models.DAG(
'data_pipeline_dc_tags_dag',
default_args=default_args,
schedule_interval=None) as dag:
start = dummy.DummyOperator(
task_id='start',
trigger_rule='all_success'
)
end = dummy.DummyOperator(
task_id='end',
trigger_rule='all_success'
)
# Bigquery Tables created here for demo porpuse.
# Consider a dedicated pipeline or tool for a real life scenario.
with TaskGroup('upsert_table') as upsert_table:
upsert_table_customers = BigQueryUpsertTableOperator(
task_id="upsert_table_customers",
project_id=DTL_L0_PRJ,
dataset_id=DTL_L0_BQ_DATASET,
impersonation_chain=[TRF_SA_DF],
table_resource={
"tableReference": {"tableId": "customers"},
},
)
upsert_table_purchases = BigQueryUpsertTableOperator(
task_id="upsert_table_purchases",
project_id=DTL_L0_PRJ,
dataset_id=DTL_L0_BQ_DATASET,
impersonation_chain=[TRF_SA_BQ],
table_resource={
"tableReference": {"tableId": "purchases"}
},
)
upsert_table_customer_purchase_l1 = BigQueryUpsertTableOperator(
task_id="upsert_table_customer_purchase_l1",
project_id=DTL_L1_PRJ,
dataset_id=DTL_L1_BQ_DATASET,
impersonation_chain=[TRF_SA_BQ],
table_resource={
"tableReference": {"tableId": "customer_purchase"}
},
)
upsert_table_customer_purchase_l2 = BigQueryUpsertTableOperator(
task_id="upsert_table_customer_purchase_l2",
project_id=DTL_L2_PRJ,
dataset_id=DTL_L2_BQ_DATASET,
impersonation_chain=[TRF_SA_BQ],
table_resource={
"tableReference": {"tableId": "customer_purchase"}
},
)
# Bigquery Tables schema defined here for demo porpuse.
# Consider a dedicated pipeline or tool for a real life scenario.
with TaskGroup('update_schema_table') as update_schema_table:
update_table_schema_customers = BigQueryUpdateTableSchemaOperator(
task_id="update_table_schema_customers",
project_id=DTL_L0_PRJ,
dataset_id=DTL_L0_BQ_DATASET,
table_id="customers",
impersonation_chain=[TRF_SA_BQ],
include_policy_tags=True,
schema_fields_updates=[
{ "mode": "REQUIRED", "name": "id", "type": "INTEGER", "description": "ID" },
{ "mode": "REQUIRED", "name": "name", "type": "STRING", "description": "Name", "policyTags": { "names": [DATA_CAT_TAGS.get('2_Private', None)]}},
{ "mode": "REQUIRED", "name": "surname", "type": "STRING", "description": "Surname", "policyTags": { "names": [DATA_CAT_TAGS.get('2_Private', None)]} },
{ "mode": "REQUIRED", "name": "timestamp", "type": "TIMESTAMP", "description": "Timestamp" }
]
)
update_table_schema_customers = BigQueryUpdateTableSchemaOperator(
task_id="update_table_schema_purchases",
project_id=DTL_L0_PRJ,
dataset_id=DTL_L0_BQ_DATASET,
table_id="purchases",
impersonation_chain=[TRF_SA_BQ],
include_policy_tags=True,
schema_fields_updates=[
{ "mode": "REQUIRED", "name": "id", "type": "INTEGER", "description": "ID" },
{ "mode": "REQUIRED", "name": "customer_id", "type": "INTEGER", "description": "ID" },
{ "mode": "REQUIRED", "name": "item", "type": "STRING", "description": "Item Name" },
{ "mode": "REQUIRED", "name": "price", "type": "FLOAT", "description": "Item Price" },
{ "mode": "REQUIRED", "name": "timestamp", "type": "TIMESTAMP", "description": "Timestamp" }
]
)
update_table_schema_customer_purchase_l1 = BigQueryUpdateTableSchemaOperator(
task_id="update_table_schema_customer_purchase_l1",
project_id=DTL_L1_PRJ,
dataset_id=DTL_L1_BQ_DATASET,
table_id="customer_purchase",
impersonation_chain=[TRF_SA_BQ],
include_policy_tags=True,
schema_fields_updates=[
{ "mode": "REQUIRED", "name": "customer_id", "type": "INTEGER", "description": "ID" },
{ "mode": "REQUIRED", "name": "purchase_id", "type": "INTEGER", "description": "ID" },
{ "mode": "REQUIRED", "name": "name", "type": "STRING", "description": "Name", "policyTags": { "names": [DATA_CAT_TAGS.get('2_Private', None)]}},
{ "mode": "REQUIRED", "name": "surname", "type": "STRING", "description": "Surname", "policyTags": { "names": [DATA_CAT_TAGS.get('2_Private', None)]} },
{ "mode": "REQUIRED", "name": "item", "type": "STRING", "description": "Item Name" },
{ "mode": "REQUIRED", "name": "price", "type": "FLOAT", "description": "Item Price" },
{ "mode": "REQUIRED", "name": "timestamp", "type": "TIMESTAMP", "description": "Timestamp" }
]
)
update_table_schema_customer_purchase_l2 = BigQueryUpdateTableSchemaOperator(
task_id="update_table_schema_customer_purchase_l2",
project_id=DTL_L2_PRJ,
dataset_id=DTL_L2_BQ_DATASET,
table_id="customer_purchase",
impersonation_chain=[TRF_SA_BQ],
include_policy_tags=True,
schema_fields_updates=[
{ "mode": "REQUIRED", "name": "customer_id", "type": "INTEGER", "description": "ID" },
{ "mode": "REQUIRED", "name": "purchase_id", "type": "INTEGER", "description": "ID" },
{ "mode": "REQUIRED", "name": "name", "type": "STRING", "description": "Name", "policyTags": { "names": [DATA_CAT_TAGS.get('2_Private', None)]}},
{ "mode": "REQUIRED", "name": "surname", "type": "STRING", "description": "Surname", "policyTags": { "names": [DATA_CAT_TAGS.get('2_Private', None)]} },
{ "mode": "REQUIRED", "name": "item", "type": "STRING", "description": "Item Name" },
{ "mode": "REQUIRED", "name": "price", "type": "FLOAT", "description": "Item Price" },
{ "mode": "REQUIRED", "name": "timestamp", "type": "TIMESTAMP", "description": "Timestamp" }
]
)
customers_import = DataflowTemplatedJobStartOperator(
task_id="dataflow_customers_import",
template="gs://dataflow-templates/latest/GCS_Text_to_BigQuery",
project_id=LOD_PRJ,
location=DF_REGION,
parameters={
"javascriptTextTransformFunctionName": "transform",
"JSONPath": ORC_GCS + "/customers_schema.json",
"javascriptTextTransformGcsPath": ORC_GCS + "/customers_udf.js",
"inputFilePattern": LND_GCS + "/customers.csv",
"outputTable": DTL_L0_PRJ + ":"+DTL_L0_BQ_DATASET+".customers",
"bigQueryLoadingTemporaryDirectory": LOD_GCS_STAGING + "/tmp/bq/",
},
)
purchases_import = DataflowTemplatedJobStartOperator(
task_id="dataflow_purchases_import",
template="gs://dataflow-templates/latest/GCS_Text_to_BigQuery",
project_id=LOD_PRJ,
location=DF_REGION,
parameters={
"javascriptTextTransformFunctionName": "transform",
"JSONPath": ORC_GCS + "/purchases_schema.json",
"javascriptTextTransformGcsPath": ORC_GCS + "/purchases_udf.js",
"inputFilePattern": LND_GCS + "/purchases.csv",
"outputTable": DTL_L0_PRJ + ":"+DTL_L0_BQ_DATASET+".purchases",
"bigQueryLoadingTemporaryDirectory": LOD_GCS_STAGING + "/tmp/bq/",
},
)
join_customer_purchase = BigQueryInsertJobOperator(
task_id='bq_join_customer_purchase',
gcp_conn_id='bigquery_default',
project_id=TRF_PRJ,
location=BQ_LOCATION,
configuration={
'jobType':'QUERY',
'query':{
'query':"""SELECT
c.id as customer_id,
p.id as purchase_id,
c.name as name,
c.surname as surname,
p.item as item,
p.price as price,
p.timestamp as timestamp
FROM `{dtl_0_prj}.{dtl_0_dataset}.customers` c
JOIN `{dtl_0_prj}.{dtl_0_dataset}.purchases` p ON c.id = p.customer_id
""".format(dtl_0_prj=DTL_L0_PRJ, dtl_0_dataset=DTL_L0_BQ_DATASET, ),
'destinationTable':{
'projectId': DTL_L1_PRJ,
'datasetId': DTL_L1_BQ_DATASET,
'tableId': 'customer_purchase'
},
'writeDisposition':'WRITE_TRUNCATE',
"useLegacySql": False
}
},
impersonation_chain=[TRF_SA_BQ]
)
l2_customer_purchase = BigQueryInsertJobOperator(
task_id='bq_l2_customer_purchase',
gcp_conn_id='bigquery_default',
project_id=TRF_PRJ,
location=BQ_LOCATION,
configuration={
'jobType':'QUERY',
'query':{
'query':"""SELECT
customer_id,
purchase_id,
name,
surname,
item,
price,
timestamp
FROM `{dtl_1_prj}.{dtl_1_dataset}.customer_purchase`
""".format(dtl_1_prj=DTL_L1_PRJ, dtl_1_dataset=DTL_L1_BQ_DATASET, ),
'destinationTable':{
'projectId': DTL_L2_PRJ,
'datasetId': DTL_L2_BQ_DATASET,
'tableId': 'customer_purchase'
},
'writeDisposition':'WRITE_TRUNCATE',
"useLegacySql": False
}
},
impersonation_chain=[TRF_SA_BQ]
)
start >> upsert_table >> update_schema_table >> [customers_import, purchases_import] >> join_customer_purchase >> l2_customer_purchase >> end

View File

@ -0,0 +1,146 @@
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# --------------------------------------------------------------------------------
# Load The Dependencies
# --------------------------------------------------------------------------------
import csv
import datetime
import io
import json
import logging
import os
from airflow import models
from airflow.providers.google.cloud.operators.dataflow import DataflowTemplatedJobStartOperator
from airflow.operators import dummy
from airflow.providers.google.cloud.operators.bigquery import BigQueryDeleteTableOperator
from airflow.utils.task_group import TaskGroup
# --------------------------------------------------------------------------------
# Set variables - Needed for the DEMO
# --------------------------------------------------------------------------------
BQ_LOCATION = os.environ.get("BQ_LOCATION")
DATA_CAT_TAGS = json.loads(os.environ.get("DATA_CAT_TAGS"))
DTL_L0_PRJ = os.environ.get("DTL_L0_PRJ")
DTL_L0_BQ_DATASET = os.environ.get("DTL_L0_BQ_DATASET")
DTL_L0_GCS = os.environ.get("DTL_L0_GCS")
DTL_L1_PRJ = os.environ.get("DTL_L1_PRJ")
DTL_L1_BQ_DATASET = os.environ.get("DTL_L1_BQ_DATASET")
DTL_L1_GCS = os.environ.get("DTL_L1_GCS")
DTL_L2_PRJ = os.environ.get("DTL_L2_PRJ")
DTL_L2_BQ_DATASET = os.environ.get("DTL_L2_BQ_DATASET")
DTL_L2_GCS = os.environ.get("DTL_L2_GCS")
DTL_PLG_PRJ = os.environ.get("DTL_PLG_PRJ")
DTL_PLG_BQ_DATASET = os.environ.get("DTL_PLG_BQ_DATASET")
DTL_PLG_GCS = os.environ.get("DTL_PLG_GCS")
GCP_REGION = os.environ.get("GCP_REGION")
LND_PRJ = os.environ.get("LND_PRJ")
LND_BQ = os.environ.get("LND_BQ")
LND_GCS = os.environ.get("LND_GCS")
LND_PS = os.environ.get("LND_PS")
LOD_PRJ = os.environ.get("LOD_PRJ")
LOD_GCS_STAGING = os.environ.get("LOD_GCS_STAGING")
LOD_NET_VPC = os.environ.get("LOD_NET_VPC")
LOD_NET_SUBNET = os.environ.get("LOD_NET_SUBNET")
LOD_SA_DF = os.environ.get("LOD_SA_DF")
ORC_PRJ = os.environ.get("ORC_PRJ")
ORC_GCS = os.environ.get("ORC_GCS")
TRF_PRJ = os.environ.get("TRF_PRJ")
TRF_GCS_STAGING = os.environ.get("TRF_GCS_STAGING")
TRF_NET_VPC = os.environ.get("TRF_NET_VPC")
TRF_NET_SUBNET = os.environ.get("TRF_NET_SUBNET")
TRF_SA_DF = os.environ.get("TRF_SA_DF")
TRF_SA_BQ = os.environ.get("TRF_SA_BQ")
DF_KMS_KEY = os.environ.get("DF_KMS_KEY", "")
DF_REGION = os.environ.get("GCP_REGION")
DF_ZONE = os.environ.get("GCP_REGION") + "-b"
# --------------------------------------------------------------------------------
# Set default arguments
# --------------------------------------------------------------------------------
# If you are running Airflow in more than one time zone
# see https://airflow.apache.org/docs/apache-airflow/stable/timezone.html
# for best practices
yesterday = datetime.datetime.now() - datetime.timedelta(days=1)
default_args = {
'owner': 'airflow',
'start_date': yesterday,
'depends_on_past': False,
'email': [''],
'email_on_failure': False,
'email_on_retry': False,
'retries': 1,
'retry_delay': datetime.timedelta(minutes=5),
'dataflow_default_options': {
'location': DF_REGION,
'zone': DF_ZONE,
'stagingLocation': LOD_GCS_STAGING,
'tempLocation': LOD_GCS_STAGING + "/tmp",
'serviceAccountEmail': LOD_SA_DF,
'subnetwork': LOD_NET_SUBNET,
'ipConfiguration': "WORKER_IP_PRIVATE",
'kmsKeyName' : DF_KMS_KEY
},
}
# --------------------------------------------------------------------------------
# Main DAG
# --------------------------------------------------------------------------------
with models.DAG(
'delete_tables_dag',
default_args=default_args,
schedule_interval=None) as dag:
start = dummy.DummyOperator(
task_id='start',
trigger_rule='all_success'
)
end = dummy.DummyOperator(
task_id='end',
trigger_rule='all_success'
)
# Bigquery Tables deleted here for demo porpuse.
# Consider a dedicated pipeline or tool for a real life scenario.
with TaskGroup('delete_table') as delte_table:
delete_table_customers = BigQueryDeleteTableOperator(
task_id="delete_table_customers",
deletion_dataset_table=DTL_L0_PRJ+"."+DTL_L0_BQ_DATASET+".customers",
impersonation_chain=[TRF_SA_DF]
)
delete_table_purchases = BigQueryDeleteTableOperator(
task_id="delete_table_purchases",
deletion_dataset_table=DTL_L0_PRJ+"."+DTL_L0_BQ_DATASET+".purchases",
impersonation_chain=[TRF_SA_DF]
)
delete_table_customer_purchase_l1 = BigQueryDeleteTableOperator(
task_id="delete_table_customer_purchase_l1",
deletion_dataset_table=DTL_L1_PRJ+"."+DTL_L1_BQ_DATASET+".customer_purchase",
impersonation_chain=[TRF_SA_DF]
)
delete_table_customer_purchase_l2 = BigQueryDeleteTableOperator(
task_id="delete_table_customer_purchase_l2",
deletion_dataset_table=DTL_L2_PRJ+"."+DTL_L2_BQ_DATASET+".customer_purchase",
impersonation_chain=[TRF_SA_DF]
)
start >> delte_table >> end

View File

@ -98,7 +98,7 @@ output "demo_commands" {
03 = "gsutil -i ${module.orch-sa-cmp-0.email} cp demo/*.py ${google_composer_environment.orch-cmp-0.config[0].dag_gcs_prefix}/"
04 = "Open ${google_composer_environment.orch-cmp-0.config.0.airflow_uri} and run uploaded DAG."
05 = <<EOT
bq query --project_id=${module.lake-2-project.project_id} --use_legacy_sql=false 'SELECT * FROM `${module.lake-2-project.project_id}.${module.lake-2-bq-0.dataset_id}.customer_purchase` LIMIT 1000'"
bq query --project_id=${module.lake-2-project.project_id} --use_legacy_sql=false 'SELECT * EXCEPT (name, surname) FROM `${module.lake-2-project.project_id}.${module.lake-2-bq-0.dataset_id}.customer_purchase` LIMIT 1000'"
EOT
}
}

View File

@ -0,0 +1 @@
[{"mode": "REQUIRED", "name": "id", "type": "INTEGER", "description": "ID"}, {"mode": "REQUIRED", "name": "name", "type": "STRING", "description": "Name", "policyTags": {"names": ["projects/yoyo-cmn/locations/eu/taxonomies/3505167253647667255/policyTags/2896949743213674289"]}}, {"mode": "REQUIRED", "name": "surname", "type": "STRING", "description": "Surname", "policyTags": {"names": ["projects/yoyo-cmn/locations/eu/taxonomies/3505167253647667255/policyTags/2896949743213674289"]}}, {"mode": "REQUIRED", "name": "credicard", "type": "INTEGER", "description": "credicard", "policyTags": {"names": ["projects/yoyo-cmn/locations/eu/taxonomies/3505167253647667255/policyTags/1008821537023566954"]}}, {"mode": "REQUIRED", "name": "timestamp", "type": "TIMESTAMP", "description": "Timestamp"}]

View File

@ -33,6 +33,17 @@ variable "composer_config" {
}
}
variable "data_catalog_tags" {
description = "List of Data Catalog Policy tags to be created with optional IAM binging configuration in {tag => {ROLE => [MEMBERS]}} format."
type = map(map(list(string)))
nullable = false
default = {
"3_Confidential" = null
"2_Private" = null
"1_Sensitive" = null
}
}
variable "data_force_destroy" {
description = "Flag to set 'force_destroy' on data services like BiguQery or Cloud Storage."
type = bool

View File

@ -6,45 +6,53 @@ Legend: <code>+</code> additive, <code>•</code> conditional.
| members | roles |
|---|---|
|<b>gcp-data-analysts</b><br><small><i>group</i></small>|[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) |
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/dlp.estimatesAdmin](https://cloud.google.com/iam/docs/understanding-roles#dlp.estimatesAdmin) <br>[roles/dlp.reader](https://cloud.google.com/iam/docs/understanding-roles#dlp.reader) <br>[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) |
|<b>gcp-data-security</b><br><small><i>group</i></small>|[roles/dlp.admin](https://cloud.google.com/iam/docs/understanding-roles#dlp.admin) |
|<b>dev-data-load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) |
|<b>dev-data-trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) |
|<b>gcp-data-security</b><br><small><i>group</i></small>|[roles/datacatalog.admin](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.admin) <br>[roles/dlp.admin](https://cloud.google.com/iam/docs/understanding-roles#dlp.admin) |
|<b>dev-data-load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) |
|<b>dev-data-trf-bq-0</b><br><small><i>serviceAccount</i></small>|[roles/datacatalog.categoryFineGrainedReader](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryFineGrainedReader) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) |
|<b>dev-data-trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/datacatalog.categoryFineGrainedReader](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryFineGrainedReader) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) |
## Project <i>dev-data-dtl-0-0</i>
| members | roles |
|---|---|
|<b>gcp-data-analysts</b><br><small><i>group</i></small>|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user) <br>[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|<b>gcp-data-analysts</b><br><small><i>group</i></small>|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/bigquery.metadataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.metadataViewer) <br>[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user) <br>[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) |
|<b>dev-data-load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator) |
|<b>dev-data-trf-bq-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) |
|<b>dev-data-trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) |
|<b>SERVICE_IDENTITY_service-networking</b><br><small><i>serviceAccount</i></small>|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) <code>+</code>|
|<b>dev-data-load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator) |
|<b>dev-data-trf-bq-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) <br>[roles/datacatalog.categoryAdmin](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryAdmin) |
|<b>dev-data-trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) |
## Project <i>dev-data-dtl-1-0</i>
| members | roles |
|---|---|
|<b>gcp-data-analysts</b><br><small><i>group</i></small>|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user) <br>[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|<b>gcp-data-analysts</b><br><small><i>group</i></small>|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/bigquery.metadataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.metadataViewer) <br>[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user) <br>[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) |
|<b>dev-data-trf-bq-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) |
|<b>dev-data-trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|<b>SERVICE_IDENTITY_service-networking</b><br><small><i>serviceAccount</i></small>|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) <code>+</code>|
|<b>dev-data-load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/datacatalog.categoryAdmin](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryAdmin) |
|<b>dev-data-trf-bq-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) |
|<b>dev-data-trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) <br>[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
## Project <i>dev-data-dtl-2-0</i>
| members | roles |
|---|---|
|<b>gcp-data-analysts</b><br><small><i>group</i></small>|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user) <br>[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|<b>gcp-data-analysts</b><br><small><i>group</i></small>|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/bigquery.metadataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.metadataViewer) <br>[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user) <br>[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) |
|<b>dev-data-trf-bq-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) |
|<b>dev-data-trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|<b>SERVICE_IDENTITY_service-networking</b><br><small><i>serviceAccount</i></small>|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) <code>+</code>|
|<b>dev-data-load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/datacatalog.categoryAdmin](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryAdmin) |
|<b>dev-data-trf-bq-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) |
|<b>dev-data-trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) <br>[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
## Project <i>dev-data-dtl-plg-0</i>
| members | roles |
|---|---|
|<b>gcp-data-analysts</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user) <br>[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|<b>gcp-data-analysts</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/bigquery.metadataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.metadataViewer) <br>[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user) <br>[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) |
|<b>SERVICE_IDENTITY_service-networking</b><br><small><i>serviceAccount</i></small>|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) <code>+</code>|
## Project <i>dev-data-lnd-0</i>
@ -62,37 +70,40 @@ Legend: <code>+</code> additive, <code>•</code> conditional.
| members | roles |
|---|---|
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/compute.viewer](https://cloud.google.com/iam/docs/understanding-roles#compute.viewer) <br>[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) <br>[roles/dataflow.developer](https://cloud.google.com/iam/docs/understanding-roles#dataflow.developer) <br>[roles/viewer](https://cloud.google.com/iam/docs/understanding-roles#viewer) |
|<b>SERVICE_IDENTITY_dataflow-service-producer-prod</b><br><small><i>serviceAccount</i></small>|[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|<b>SERVICE_IDENTITY_service-networking</b><br><small><i>serviceAccount</i></small>|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) <code>+</code>|
|<b>dev-data-load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) <br>[roles/dataflow.worker](https://cloud.google.com/iam/docs/understanding-roles#dataflow.worker) <br>[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|<b>dev-data-orc-cmp-0</b><br><small><i>serviceAccount</i></small>|[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) |
|<b>service-426128559612</b><br><small><i>serviceAccount</i></small>|[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
## Project <i>dev-data-orc-0</i>
| members | roles |
|---|---|
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/cloudbuild.builds.editor](https://cloud.google.com/iam/docs/understanding-roles#cloudbuild.builds.editor) <br>[roles/composer.admin](https://cloud.google.com/iam/docs/understanding-roles#composer.admin) <br>[roles/composer.environmentAndStorageObjectAdmin](https://cloud.google.com/iam/docs/understanding-roles#composer.environmentAndStorageObjectAdmin) <br>[roles/iam.serviceAccountUser](https://cloud.google.com/iam/docs/understanding-roles#iam.serviceAccountUser) <br>[roles/iap.httpsResourceAccessor](https://cloud.google.com/iam/docs/understanding-roles#iap.httpsResourceAccessor) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) <br>[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|<b>SERVICE_IDENTITY_cloudcomposer-accounts</b><br><small><i>serviceAccount</i></small>|[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|<b>SERVICE_IDENTITY_service-networking</b><br><small><i>serviceAccount</i></small>|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) <code>+</code>|
|<b>dev-data-load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|<b>dev-data-orc-cmp-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/composer.worker](https://cloud.google.com/iam/docs/understanding-roles#composer.worker) <br>[roles/iam.serviceAccountUser](https://cloud.google.com/iam/docs/understanding-roles#iam.serviceAccountUser) <br>[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|<b>dev-data-trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) |
|<b>service-36960036774</b><br><small><i>serviceAccount</i></small>|[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
## Project <i>dev-data-trf-0</i>
| members | roles |
|---|---|
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) |
|<b>SERVICE_IDENTITY_dataflow-service-producer-prod</b><br><small><i>serviceAccount</i></small>|[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|<b>SERVICE_IDENTITY_service-networking</b><br><small><i>serviceAccount</i></small>|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) <code>+</code>|
|<b>dev-data-orc-cmp-0</b><br><small><i>serviceAccount</i></small>|[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) |
|<b>dev-data-trf-bq-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) |
|<b>dev-data-trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/dataflow.worker](https://cloud.google.com/iam/docs/understanding-roles#dataflow.worker) <br>[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|<b>service-883871192228</b><br><small><i>serviceAccount</i></small>|[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
## Project <i>dev-net-spoke-0</i>
| members | roles |
|---|---|
|<b>36960036774</b><br><small><i>serviceAccount</i></small>|[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) <code>+</code>|
|<b>PROJECT_CLOUD_SERVICES</b><br><small><i>serviceAccount</i></small>|[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) <code>+</code>|
|<b>SERVICE_IDENTITY_cloudcomposer-accounts</b><br><small><i>serviceAccount</i></small>|[roles/composer.sharedVpcAgent](https://cloud.google.com/iam/docs/understanding-roles#composer.sharedVpcAgent) <code>+</code>|
|<b>SERVICE_IDENTITY_container-engine-robot</b><br><small><i>serviceAccount</i></small>|[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) <code>+</code><br>[roles/container.hostServiceAgentUser](https://cloud.google.com/iam/docs/understanding-roles#container.hostServiceAgentUser) <code>+</code>|
|<b>SERVICE_IDENTITY_dataflow-service-producer-prod</b><br><small><i>serviceAccount</i></small>|[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) <code>+</code><br>[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) <code>+</code><br>[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) <code>+</code><br>[roles/container.hostServiceAgentUser](https://cloud.google.com/iam/docs/understanding-roles#container.hostServiceAgentUser) <code>+</code>|
|<b>dev-data-load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) <code>+</code>|
|<b>dev-data-trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) <code>+</code>|
|<b>service-36960036774</b><br><small><i>serviceAccount</i></small>|[roles/composer.sharedVpcAgent](https://cloud.google.com/iam/docs/understanding-roles#composer.sharedVpcAgent) <code>+</code><br>[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) <code>+</code><br>[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) <code>+</code><br>[roles/container.hostServiceAgentUser](https://cloud.google.com/iam/docs/understanding-roles#container.hostServiceAgentUser) <code>+</code><br>[roles/container.hostServiceAgentUser](https://cloud.google.com/iam/docs/understanding-roles#container.hostServiceAgentUser) <code>+</code>|
|<b>service-426128559612</b><br><small><i>serviceAccount</i></small>|[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) <code>+</code>|
|<b>service-883871192228</b><br><small><i>serviceAccount</i></small>|[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) <code>+</code>|

View File

@ -50,6 +50,19 @@ Cloud KMS crypto keys can be configured wither from the [FAST security stage](..
To configure the use of Cloud KMS on resources, you have to specify the key id on the `service_encryption_keys` variable. Key locations should match resource locations.
## Data Catalog
[Data Catalog](https://cloud.google.com/data-catalog) helps you to document your data entry at scale. Data Catalog relies on [tags](https://cloud.google.com/data-catalog/docs/tags-and-tag-templates#tags) and [tag template](https://cloud.google.com/data-catalog/docs/tags-and-tag-templates#tag-templates) to manage metadata for all data entries in a unified and centralized service. To implement [column-level security](https://cloud.google.com/bigquery/docs/column-level-security-intro) on BigQuery, we suggest to use `Tags` and `Tag templates`.
The default configuration will implement 3 tags:
- `3_Confidential`: policy tag for columns that include very sensitive information, such as credit card numbers.
- `2_Private`: policy tag for columns that include sensitive personal identifiable information (PII) information, such as a person's first name.
- `1_Sensitive`: policy tag for columns that include data that cannot be made public, such as the credit limit.
Anything that is not tagged is available to all users who have access to the data warehouse.
You can configure your tags and roles associated by configuring the `data_catalog_tags` variable. We suggest useing the "[Best practices for using policy tags in BigQuery](https://cloud.google.com/bigquery/docs/best-practices-policy-tags)" article as a guide to designing your tags structure and access pattern. By default, no groups has access to tagged data.
### VPC-SC
As is often the case in real-world configurations, [VPC-SC](https://cloud.google.com/vpc-service-controls) is needed to mitigate data exfiltration. VPC-SC can be configured from the [FAST security stage](../../02-security). This step is optional, but highly recomended, and depends on customer policies and security best practices.
@ -116,6 +129,12 @@ terraform init
terraform apply
```
## Demo pipeline
The application layer is out of scope of this script. As a demo purpuse only, several Cloud Composer DAGs are provided. Demos will import data from the `landing` area to the `DataLake L2` dataset suing different features.
You can find examples in the `[demo](../../../../examples/data-solutions/data-platform-foundations/demo)` folder.
<!-- TFDOC OPTS files:1 show_extra:1 -->
<!-- BEGIN TFDOC -->
@ -132,20 +151,21 @@ terraform apply
| name | description | type | required | default | producer |
|---|---|:---:|:---:|:---:|:---:|
| [billing_account](variables.tf#L17) | Billing account id and organization id ('nnnnnnnn' or null). | <code title="object&#40;&#123;&#10; id &#61; string&#10; organization_id &#61; number&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | ✓ | | <code>00-globals</code> |
| [folder_ids](variables.tf#L45) | Folder to be used for the networking resources in folders/nnnn format. | <code title="object&#40;&#123;&#10; data-platform &#61; string&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | ✓ | | <code>01-resman</code> |
| [host_project_ids](variables.tf#L63) | Shared VPC project ids. | <code title="object&#40;&#123;&#10; dev-spoke-0 &#61; string&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | ✓ | | <code>02-networking</code> |
| [organization](variables.tf#L89) | Organization details. | <code title="object&#40;&#123;&#10; domain &#61; string&#10; id &#61; number&#10; customer_id &#61; string&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | ✓ | | <code>00-globals</code> |
| [prefix](variables.tf#L105) | Unique prefix used for resource names. Not used for projects if 'project_create' is null. | <code>string</code> | ✓ | | <code>00-globals</code> |
| [folder_ids](variables.tf#L56) | Folder to be used for the networking resources in folders/nnnn format. | <code title="object&#40;&#123;&#10; data-platform &#61; string&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | ✓ | | <code>01-resman</code> |
| [host_project_ids](variables.tf#L74) | Shared VPC project ids. | <code title="object&#40;&#123;&#10; dev-spoke-0 &#61; string&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | ✓ | | <code>02-networking</code> |
| [organization](variables.tf#L100) | Organization details. | <code title="object&#40;&#123;&#10; domain &#61; string&#10; id &#61; number&#10; customer_id &#61; string&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | ✓ | | <code>00-globals</code> |
| [prefix](variables.tf#L116) | Unique prefix used for resource names. Not used for projects if 'project_create' is null. | <code>string</code> | ✓ | | <code>00-globals</code> |
| [composer_config](variables.tf#L26) | | <code title="object&#40;&#123;&#10; node_count &#61; number&#10; airflow_version &#61; string&#10; env_variables &#61; map&#40;string&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | | <code title="&#123;&#10; node_count &#61; 3&#10; airflow_version &#61; &#34;composer-1.17.5-airflow-2.1.4&#34;&#10; env_variables &#61; &#123;&#125;&#10;&#125;">&#123;&#8230;&#125;</code> | |
| [data_force_destroy](variables.tf#L39) | Flag to set 'force_destroy' on data services like BigQery or Cloud Storage. | <code>bool</code> | | <code>false</code> | |
| [groups](variables.tf#L53) | Groups. | <code>map&#40;string&#41;</code> | | <code title="&#123;&#10; data-analysts &#61; &#34;gcp-data-analysts&#34;&#10; data-engineers &#61; &#34;gcp-data-engineers&#34;&#10; data-security &#61; &#34;gcp-data-security&#34;&#10;&#125;">&#123;&#8230;&#125;</code> | |
| [network_config_composer](variables.tf#L71) | Network configurations to use for Composer. | <code title="object&#40;&#123;&#10; cloudsql_range &#61; string&#10; gke_master_range &#61; string&#10; gke_pods_name &#61; string&#10; gke_services_name &#61; string&#10; web_server_range &#61; string&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | | <code title="&#123;&#10; cloudsql_range &#61; &#34;192.168.254.0&#47;24&#34;&#10; gke_master_range &#61; &#34;192.168.255.0&#47;28&#34;&#10; gke_pods_name &#61; &#34;pods&#34;&#10; gke_services_name &#61; &#34;services&#34;&#10; web_server_range &#61; &#34;192.168.255.16&#47;28&#34;&#10;&#125;">&#123;&#8230;&#125;</code> | |
| [outputs_location](variables.tf#L99) | Path where providers, tfvars files, and lists for the following stages are written. Leave empty to disable. | <code>string</code> | | <code>null</code> | |
| [project_services](variables.tf#L111) | List of core services enabled on all projects. | <code>list&#40;string&#41;</code> | | <code title="&#91;&#10; &#34;cloudresourcemanager.googleapis.com&#34;,&#10; &#34;iam.googleapis.com&#34;,&#10; &#34;serviceusage.googleapis.com&#34;,&#10; &#34;stackdriver.googleapis.com&#34;&#10;&#93;">&#91;&#8230;&#93;</code> | |
| [region](variables.tf#L122) | Region used for regional resources. | <code>string</code> | | <code>&#34;europe-west1&#34;</code> | |
| [service_encryption_keys](variables.tf#L128) | Cloud KMS to use to encrypt different services. Key location should match service region. | <code title="object&#40;&#123;&#10; bq &#61; string&#10; composer &#61; string&#10; dataflow &#61; string&#10; storage &#61; string&#10; pubsub &#61; string&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | | <code>null</code> | |
| [subnet_self_links](variables.tf#L140) | Shared VPC subnet self links. | <code title="object&#40;&#123;&#10; dev-spoke-0 &#61; map&#40;string&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | | <code>null</code> | <code>02-networking</code> |
| [vpc_self_links](variables.tf#L149) | Shared VPC self links. | <code title="object&#40;&#123;&#10; dev-spoke-0 &#61; string&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | | <code>null</code> | <code>02-networking</code> |
| [data_catalog_tags](variables.tf#L39) | List of Data Catalog Policy tags to be created with optional IAM binging configuration in {tag => {ROLE => [MEMBERS]}} format. | <code>map&#40;map&#40;list&#40;string&#41;&#41;&#41;</code> | | <code title="&#123;&#10; &#34;3_Confidential&#34; &#61; null&#10; &#34;2_Private&#34; &#61; null&#10; &#34;1_Sensitive&#34; &#61; null&#10;&#125;">&#123;&#8230;&#125;</code> | |
| [data_force_destroy](variables.tf#L50) | Flag to set 'force_destroy' on data services like BigQery or Cloud Storage. | <code>bool</code> | | <code>false</code> | |
| [groups](variables.tf#L64) | Groups. | <code>map&#40;string&#41;</code> | | <code title="&#123;&#10; data-analysts &#61; &#34;gcp-data-analysts&#34;&#10; data-engineers &#61; &#34;gcp-data-engineers&#34;&#10; data-security &#61; &#34;gcp-data-security&#34;&#10;&#125;">&#123;&#8230;&#125;</code> | |
| [network_config_composer](variables.tf#L82) | Network configurations to use for Composer. | <code title="object&#40;&#123;&#10; cloudsql_range &#61; string&#10; gke_master_range &#61; string&#10; gke_pods_name &#61; string&#10; gke_services_name &#61; string&#10; web_server_range &#61; string&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | | <code title="&#123;&#10; cloudsql_range &#61; &#34;192.168.254.0&#47;24&#34;&#10; gke_master_range &#61; &#34;192.168.255.0&#47;28&#34;&#10; gke_pods_name &#61; &#34;pods&#34;&#10; gke_services_name &#61; &#34;services&#34;&#10; web_server_range &#61; &#34;192.168.255.16&#47;28&#34;&#10;&#125;">&#123;&#8230;&#125;</code> | |
| [outputs_location](variables.tf#L110) | Path where providers, tfvars files, and lists for the following stages are written. Leave empty to disable. | <code>string</code> | | <code>null</code> | |
| [project_services](variables.tf#L122) | List of core services enabled on all projects. | <code>list&#40;string&#41;</code> | | <code title="&#91;&#10; &#34;cloudresourcemanager.googleapis.com&#34;,&#10; &#34;iam.googleapis.com&#34;,&#10; &#34;serviceusage.googleapis.com&#34;,&#10; &#34;stackdriver.googleapis.com&#34;&#10;&#93;">&#91;&#8230;&#93;</code> | |
| [region](variables.tf#L133) | Region used for regional resources. | <code>string</code> | | <code>&#34;europe-west1&#34;</code> | |
| [service_encryption_keys](variables.tf#L139) | Cloud KMS to use to encrypt different services. Key location should match service region. | <code title="object&#40;&#123;&#10; bq &#61; string&#10; composer &#61; string&#10; dataflow &#61; string&#10; storage &#61; string&#10; pubsub &#61; string&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | | <code>null</code> | |
| [subnet_self_links](variables.tf#L151) | Shared VPC subnet self links. | <code title="object&#40;&#123;&#10; dev-spoke-0 &#61; map&#40;string&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | | <code>null</code> | <code>02-networking</code> |
| [vpc_self_links](variables.tf#L160) | Shared VPC self links. | <code title="object&#40;&#123;&#10; dev-spoke-0 &#61; string&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | | <code>null</code> | <code>02-networking</code> |
## Outputs

View File

@ -21,6 +21,7 @@ module "data-platform" {
billing_account_id = var.billing_account.id
composer_config = var.composer_config
data_force_destroy = var.data_force_destroy
data_catalog_tags = var.data_catalog_tags
folder_id = var.folder_ids.data-platform
groups = var.groups
network_config = {

View File

@ -36,6 +36,17 @@ variable "composer_config" {
}
}
variable "data_catalog_tags" {
description = "List of Data Catalog Policy tags to be created with optional IAM binging configuration in {tag => {ROLE => [MEMBERS]}} format."
type = map(map(list(string)))
nullable = false
default = {
"3_Confidential" = null
"2_Private" = null
"1_Sensitive" = null
}
}
variable "data_force_destroy" {
description = "Flag to set 'force_destroy' on data services like BigQery or Cloud Storage."
type = bool

View File

@ -23,5 +23,5 @@ FIXTURES_DIR = os.path.join(os.path.dirname(__file__), 'fixture')
def test_resources(e2e_plan_runner):
"Test that plan works and the numbers of resources is as expected."
modules, resources = e2e_plan_runner(FIXTURES_DIR)
assert len(modules) == 40
assert len(resources) == 296
assert len(modules) == 41
assert len(resources) == 313

View File

@ -65,10 +65,22 @@ def get_bindings(resources, prefix=None, folders=None):
member_type, _, member_id = member.partition(':')
if member_type == 'user':
continue
member_id = member_id.rpartition('@')[0]
member_id, member_domain = member_id.split('@', 1)
# Handle Cloud Services Service Account
if member_domain == 'cloudservices.gserviceaccount.com':
member_id = "PROJECT_CLOUD_SERVICES"
# Handle Cloud Service Identity Service Acocunt
if re.match("^service-\d{8}", member_id):
member_id = "SERVICE_IDENTITY_" + member_domain.split(".", 1)[0]
# Handle BQ Cloud Service Identity Service Acocunt
if re.match("^bq-\d{8}", member_id):
member_id = "IDENTITY_" + member_domain.split(".", 1)[0]
resource_type_output = "Service Identity - " + resource_type
else:
resource_type_output = resource_type
if prefix and member_id.startswith(prefix):
member_id = member_id[len(prefix) + 1:]
yield Binding(authoritative, resource_type, resource_id, role,
yield Binding(authoritative, resource_type_output, resource_id, role,
member_type, member_id, conditions)