Merge pull request #607 from GoogleCloudPlatform/lcaggio/dp-dc-policy-tag

[Data Platform] Add Data Catalog Policy tag
This commit is contained in:
lcaggio 2022-04-04 15:48:53 +02:00 committed by GitHub
commit 56b89211a7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
18 changed files with 719 additions and 99 deletions

View File

@ -67,6 +67,7 @@ resource "google_composer_environment" "orch-cmp-0" {
env_variables = merge( env_variables = merge(
var.composer_config.env_variables, { var.composer_config.env_variables, {
BQ_LOCATION = var.location BQ_LOCATION = var.location
DATA_CAT_TAGS = try(jsonencode(module.common-datacatalog.tags), "{}")
DF_KMS_KEY = try(var.service_encryption_keys.dataflow, "") DF_KMS_KEY = try(var.service_encryption_keys.dataflow, "")
DTL_L0_PRJ = module.lake-0-project.project_id DTL_L0_PRJ = module.lake-0-project.project_id
DTL_L0_BQ_DATASET = module.lake-0-bq-0.dataset_id DTL_L0_BQ_DATASET = module.lake-0-bq-0.dataset_id

View File

@ -23,6 +23,7 @@ locals {
(local.groups.data-analysts) = [ (local.groups.data-analysts) = [
"roles/bigquery.dataViewer", "roles/bigquery.dataViewer",
"roles/bigquery.jobUser", "roles/bigquery.jobUser",
"roles/bigquery.metadataViewer",
"roles/bigquery.user", "roles/bigquery.user",
"roles/datacatalog.viewer", "roles/datacatalog.viewer",
"roles/datacatalog.tagTemplateViewer", "roles/datacatalog.tagTemplateViewer",
@ -37,6 +38,7 @@ locals {
(local.groups.data-analysts) = [ (local.groups.data-analysts) = [
"roles/bigquery.dataEditor", "roles/bigquery.dataEditor",
"roles/bigquery.jobUser", "roles/bigquery.jobUser",
"roles/bigquery.metadataViewer",
"roles/bigquery.user", "roles/bigquery.user",
"roles/datacatalog.viewer", "roles/datacatalog.viewer",
"roles/datacatalog.tagTemplateViewer", "roles/datacatalog.tagTemplateViewer",
@ -44,7 +46,7 @@ locals {
] ]
} }
lake_0_iam = { lake_0_iam = {
"roles/bigquery.dataEditor" = [ "roles/bigquery.dataOwner" = [
module.load-sa-df-0.iam_email, module.load-sa-df-0.iam_email,
module.transf-sa-df-0.iam_email, module.transf-sa-df-0.iam_email,
module.transf-sa-bq-0.iam_email, module.transf-sa-bq-0.iam_email,
@ -52,18 +54,24 @@ locals {
"roles/bigquery.jobUser" = [ "roles/bigquery.jobUser" = [
module.load-sa-df-0.iam_email, module.load-sa-df-0.iam_email,
] ]
"roles/datacatalog.categoryAdmin" = [
module.transf-sa-bq-0.iam_email
]
"roles/storage.objectCreator" = [ "roles/storage.objectCreator" = [
module.load-sa-df-0.iam_email, module.load-sa-df-0.iam_email,
] ]
} }
lake_iam = { lake_iam = {
"roles/bigquery.dataEditor" = [ "roles/bigquery.dataOwner" = [
module.transf-sa-df-0.iam_email, module.transf-sa-df-0.iam_email,
module.transf-sa-bq-0.iam_email, module.transf-sa-bq-0.iam_email,
] ]
"roles/bigquery.jobUser" = [ "roles/bigquery.jobUser" = [
module.transf-sa-bq-0.iam_email, module.transf-sa-bq-0.iam_email,
] ]
"roles/datacatalog.categoryAdmin" = [
module.load-sa-df-0.iam_email
]
"roles/storage.objectCreator" = [ "roles/storage.objectCreator" = [
module.transf-sa-df-0.iam_email, module.transf-sa-df-0.iam_email,
] ]

View File

@ -21,6 +21,9 @@ module "common-project" {
prefix = var.prefix prefix = var.prefix
name = "cmn${local.project_suffix}" name = "cmn${local.project_suffix}"
group_iam = { group_iam = {
(local.groups.data-analysts) = [
"roles/datacatalog.viewer",
]
(local.groups.data-engineers) = [ (local.groups.data-engineers) = [
"roles/dlp.reader", "roles/dlp.reader",
"roles/dlp.user", "roles/dlp.user",
@ -28,6 +31,7 @@ module "common-project" {
] ]
(local.groups.data-security) = [ (local.groups.data-security) = [
"roles/dlp.admin", "roles/dlp.admin",
"roles/datacatalog.admin"
] ]
} }
iam = { iam = {
@ -35,6 +39,17 @@ module "common-project" {
module.load-sa-df-0.iam_email, module.load-sa-df-0.iam_email,
module.transf-sa-df-0.iam_email module.transf-sa-df-0.iam_email
] ]
"roles/datacatalog.viewer" = [
module.load-sa-df-0.iam_email,
module.transf-sa-df-0.iam_email,
module.transf-sa-bq-0.iam_email
]
"roles/datacatalog.categoryFineGrainedReader" = [
module.transf-sa-df-0.iam_email,
module.transf-sa-bq-0.iam_email,
# Uncomment if you want to grant access to `data-analyst` to all columns tagged.
# local.groups_iam.data-analysts
]
} }
services = concat(var.project_services, [ services = concat(var.project_services, [
"datacatalog.googleapis.com", "datacatalog.googleapis.com",
@ -42,6 +57,16 @@ module "common-project" {
]) ])
} }
# Data Catalog Policy tag
module "common-datacatalog" {
source = "../../../modules/data-catalog-policy-tag"
project_id = module.common-project.project_id
name = "${var.prefix}-datacatalog-policy-tags"
location = var.location
tags = var.data_catalog_tags
}
# To create KMS keys in the common projet: uncomment this section and assigne key links accondingly in local.service_encryption_keys variable # To create KMS keys in the common projet: uncomment this section and assigne key links accondingly in local.service_encryption_keys variable
# module "cmn-kms-0" { # module "cmn-kms-0" {

View File

@ -6,45 +6,53 @@ Legend: <code>+</code> additive, <code>•</code> conditional.
| members | roles | | members | roles |
|---|---| |---|---|
|<b>gcp-data-analysts</b><br><small><i>group</i></small>|[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) |
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/dlp.estimatesAdmin](https://cloud.google.com/iam/docs/understanding-roles#dlp.estimatesAdmin) <br>[roles/dlp.reader](https://cloud.google.com/iam/docs/understanding-roles#dlp.reader) <br>[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) | |<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/dlp.estimatesAdmin](https://cloud.google.com/iam/docs/understanding-roles#dlp.estimatesAdmin) <br>[roles/dlp.reader](https://cloud.google.com/iam/docs/understanding-roles#dlp.reader) <br>[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) |
|<b>gcp-data-security</b><br><small><i>group</i></small>|[roles/dlp.admin](https://cloud.google.com/iam/docs/understanding-roles#dlp.admin) | |<b>gcp-data-security</b><br><small><i>group</i></small>|[roles/datacatalog.admin](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.admin) <br>[roles/dlp.admin](https://cloud.google.com/iam/docs/understanding-roles#dlp.admin) |
|<b>load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) | |<b>load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) |
|<b>trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) | |<b>trf-bq-0</b><br><small><i>serviceAccount</i></small>|[roles/datacatalog.categoryFineGrainedReader](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryFineGrainedReader) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) |
|<b>trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/datacatalog.categoryFineGrainedReader](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryFineGrainedReader) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) |
## Project <i>dtl-0</i> ## Project <i>dtl-0</i>
| members | roles | | members | roles |
|---|---| |---|---|
|<b>gcp-data-analysts</b><br><small><i>group</i></small>|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user) <br>[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) | |<b>gcp-data-analysts</b><br><small><i>group</i></small>|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/bigquery.metadataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.metadataViewer) <br>[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user) <br>[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) | |<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) |
|<b>load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator) | |<b>SERVICE_IDENTITY_service-networking</b><br><small><i>serviceAccount</i></small>|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) <code>+</code>|
|<b>trf-bq-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) | |<b>load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator) |
|<b>trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) | |<b>trf-bq-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) <br>[roles/datacatalog.categoryAdmin](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryAdmin) |
|<b>trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) |
## Project <i>dtl-1</i> ## Project <i>dtl-1</i>
| members | roles | | members | roles |
|---|---| |---|---|
|<b>gcp-data-analysts</b><br><small><i>group</i></small>|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user) <br>[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) | |<b>gcp-data-analysts</b><br><small><i>group</i></small>|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/bigquery.metadataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.metadataViewer) <br>[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user) <br>[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) | |<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) |
|<b>trf-bq-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) | |<b>SERVICE_IDENTITY_service-networking</b><br><small><i>serviceAccount</i></small>|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) <code>+</code>|
|<b>trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) | |<b>load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/datacatalog.categoryAdmin](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryAdmin) |
|<b>trf-bq-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) |
|<b>trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) <br>[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
## Project <i>dtl-2</i> ## Project <i>dtl-2</i>
| members | roles | | members | roles |
|---|---| |---|---|
|<b>gcp-data-analysts</b><br><small><i>group</i></small>|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user) <br>[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) | |<b>gcp-data-analysts</b><br><small><i>group</i></small>|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/bigquery.metadataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.metadataViewer) <br>[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user) <br>[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) | |<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) |
|<b>trf-bq-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) | |<b>SERVICE_IDENTITY_service-networking</b><br><small><i>serviceAccount</i></small>|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) <code>+</code>|
|<b>trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) | |<b>load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/datacatalog.categoryAdmin](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryAdmin) |
|<b>trf-bq-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) |
|<b>trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) <br>[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
## Project <i>dtl-plg</i> ## Project <i>dtl-plg</i>
| members | roles | | members | roles |
|---|---| |---|---|
|<b>gcp-data-analysts</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user) <br>[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) | |<b>gcp-data-analysts</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/bigquery.metadataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.metadataViewer) <br>[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user) <br>[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) | |<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) |
|<b>SERVICE_IDENTITY_service-networking</b><br><small><i>serviceAccount</i></small>|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) <code>+</code>|
## Project <i>lnd</i> ## Project <i>lnd</i>
@ -62,6 +70,8 @@ Legend: <code>+</code> additive, <code>•</code> conditional.
| members | roles | | members | roles |
|---|---| |---|---|
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/compute.viewer](https://cloud.google.com/iam/docs/understanding-roles#compute.viewer) <br>[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) <br>[roles/dataflow.developer](https://cloud.google.com/iam/docs/understanding-roles#dataflow.developer) <br>[roles/viewer](https://cloud.google.com/iam/docs/understanding-roles#viewer) | |<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/compute.viewer](https://cloud.google.com/iam/docs/understanding-roles#compute.viewer) <br>[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) <br>[roles/dataflow.developer](https://cloud.google.com/iam/docs/understanding-roles#dataflow.developer) <br>[roles/viewer](https://cloud.google.com/iam/docs/understanding-roles#viewer) |
|<b>SERVICE_IDENTITY_dataflow-service-producer-prod</b><br><small><i>serviceAccount</i></small>|[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|<b>SERVICE_IDENTITY_service-networking</b><br><small><i>serviceAccount</i></small>|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) <code>+</code>|
|<b>load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) <br>[roles/dataflow.worker](https://cloud.google.com/iam/docs/understanding-roles#dataflow.worker) <br>[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) | |<b>load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) <br>[roles/dataflow.worker](https://cloud.google.com/iam/docs/understanding-roles#dataflow.worker) <br>[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|<b>orc-cmp-0</b><br><small><i>serviceAccount</i></small>|[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) | |<b>orc-cmp-0</b><br><small><i>serviceAccount</i></small>|[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) |
@ -69,7 +79,9 @@ Legend: <code>+</code> additive, <code>•</code> conditional.
| members | roles | | members | roles |
|---|---| |---|---|
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/cloudbuild.builds.editor](https://cloud.google.com/iam/docs/understanding-roles#cloudbuild.builds.editor) <br>[roles/composer.admin](https://cloud.google.com/iam/docs/understanding-roles#composer.admin) <br>[roles/composer.environmentAndStorageObjectAdmin](https://cloud.google.com/iam/docs/understanding-roles#composer.environmentAndStorageObjectAdmin) <br>[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) <br>[roles/iam.serviceAccountUser](https://cloud.google.com/iam/docs/understanding-roles#iam.serviceAccountUser) <br>[roles/iap.httpsResourceAccessor](https://cloud.google.com/iam/docs/understanding-roles#iap.httpsResourceAccessor) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) <br>[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) | |<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/cloudbuild.builds.editor](https://cloud.google.com/iam/docs/understanding-roles#cloudbuild.builds.editor) <br>[roles/composer.admin](https://cloud.google.com/iam/docs/understanding-roles#composer.admin) <br>[roles/composer.environmentAndStorageObjectAdmin](https://cloud.google.com/iam/docs/understanding-roles#composer.environmentAndStorageObjectAdmin) <br>[roles/iam.serviceAccountUser](https://cloud.google.com/iam/docs/understanding-roles#iam.serviceAccountUser) <br>[roles/iap.httpsResourceAccessor](https://cloud.google.com/iam/docs/understanding-roles#iap.httpsResourceAccessor) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) <br>[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|<b>SERVICE_IDENTITY_cloudcomposer-accounts</b><br><small><i>serviceAccount</i></small>|[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|<b>SERVICE_IDENTITY_service-networking</b><br><small><i>serviceAccount</i></small>|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) <code>+</code>|
|<b>load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) | |<b>load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|<b>orc-cmp-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/composer.worker](https://cloud.google.com/iam/docs/understanding-roles#composer.worker) <br>[roles/iam.serviceAccountUser](https://cloud.google.com/iam/docs/understanding-roles#iam.serviceAccountUser) <br>[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) | |<b>orc-cmp-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/composer.worker](https://cloud.google.com/iam/docs/understanding-roles#composer.worker) <br>[roles/iam.serviceAccountUser](https://cloud.google.com/iam/docs/understanding-roles#iam.serviceAccountUser) <br>[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|<b>trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) | |<b>trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) |
@ -79,6 +91,8 @@ Legend: <code>+</code> additive, <code>•</code> conditional.
| members | roles | | members | roles |
|---|---| |---|---|
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) | |<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) |
|<b>SERVICE_IDENTITY_dataflow-service-producer-prod</b><br><small><i>serviceAccount</i></small>|[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|<b>SERVICE_IDENTITY_service-networking</b><br><small><i>serviceAccount</i></small>|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) <code>+</code>|
|<b>orc-cmp-0</b><br><small><i>serviceAccount</i></small>|[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) | |<b>orc-cmp-0</b><br><small><i>serviceAccount</i></small>|[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) |
|<b>trf-bq-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) | |<b>trf-bq-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) |
|<b>trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/dataflow.worker](https://cloud.google.com/iam/docs/understanding-roles#dataflow.worker) <br>[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) | |<b>trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/dataflow.worker](https://cloud.google.com/iam/docs/understanding-roles#dataflow.worker) <br>[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |

View File

@ -154,6 +154,19 @@ Cloud Data Loss Prevention resources and templates should be stored in the secur
You can find more details and best practices on using DLP to De-identification and re-identification of PII in large-scale datasets in the [GCP documentation](https://cloud.google.com/architecture/de-identification-re-identification-pii-using-cloud-dlp). You can find more details and best practices on using DLP to De-identification and re-identification of PII in large-scale datasets in the [GCP documentation](https://cloud.google.com/architecture/de-identification-re-identification-pii-using-cloud-dlp).
## Data Catalog
[Data Catalog](https://cloud.google.com/data-catalog) helps you to document your data entry at scale. Data Catalog relies on [tags](https://cloud.google.com/data-catalog/docs/tags-and-tag-templates#tags) and [tag template](https://cloud.google.com/data-catalog/docs/tags-and-tag-templates#tag-templates) to manage metadata for all data entries in a unified and centralized service. To implement [column-level security](https://cloud.google.com/bigquery/docs/column-level-security-intro) on BigQuery, we suggest to use `Tags` and `Tag templates`.
The default configuration will implement 3 tags:
- `3_Confidential`: policy tag for columns that include very sensitive information, such as credit card numbers.
- `2_Private`: policy tag for columns that include sensitive personal identifiable information (PII) information, such as a person's first name.
- `1_Sensitive`: policy tag for columns that include data that cannot be made public, such as the credit limit.
Anything that is not tagged is available to all users who have access to the data warehouse.
For the porpuse of the example no groups has access to tagged data. You can configure your tags and roles associated by configuring the `data_catalog_tags` variable. We suggest useing the "[Best practices for using policy tags in BigQuery](https://cloud.google.com/bigquery/docs/best-practices-policy-tags)" article as a guide to designing your tags structure and access pattern.
## How to run this script ## How to run this script
To deploy this example on your GCP organization, you will need To deploy this example on your GCP organization, you will need
@ -207,17 +220,10 @@ To do this, you need to remove IAM binging at project-level for the `data-analys
## Demo pipeline ## Demo pipeline
The application layer is out of scope of this script, but as a demo, it is provided with a Cloud Composer DAG to mode data from the `landing` area to the `DataLake L2` dataset. The application layer is out of scope of this script. As a demo purpuse only, several Cloud Composer DAGs are provided. Demos will import data from the `landing` area to the `DataLake L2` dataset suing different features.
Just follow the commands you find in the `demo_commands` Terraform output, go in the Cloud Composer UI and run the `data_pipeline_dag`. You can find examples in the `[demo](./demo)` folder.
Description of commands:
- 01: copy sample data to a `landing` Cloud Storage bucket impersonating the `load` service account.
- 02: copy sample data structure definition in the `orchestration` Cloud Storage bucket impersonating the `orchestration` service account.
- 03: copy the Cloud Composer DAG to the Cloud Composer Storage bucket impersonating the `orchestration` service account.
- 04: Open the Cloud Composer Airflow UI and run the imported DAG.
- 05: Run the BigQuery query to see results.
<!-- BEGIN TFDOC --> <!-- BEGIN TFDOC -->
## Variables ## Variables
@ -225,17 +231,18 @@ Description of commands:
| name | description | type | required | default | | name | description | type | required | default |
|---|---|:---:|:---:|:---:| |---|---|:---:|:---:|:---:|
| [billing_account_id](variables.tf#L17) | Billing account id. | <code>string</code> | ✓ | | | [billing_account_id](variables.tf#L17) | Billing account id. | <code>string</code> | ✓ | |
| [folder_id](variables.tf#L42) | Folder to be used for the networking resources in folders/nnnn format. | <code>string</code> | ✓ | | | [folder_id](variables.tf#L53) | Folder to be used for the networking resources in folders/nnnn format. | <code>string</code> | ✓ | |
| [organization_domain](variables.tf#L87) | Organization domain. | <code>string</code> | ✓ | | | [organization_domain](variables.tf#L98) | Organization domain. | <code>string</code> | ✓ | |
| [prefix](variables.tf#L92) | Unique prefix used for resource names. | <code>string</code> | ✓ | | | [prefix](variables.tf#L103) | Unique prefix used for resource names. | <code>string</code> | ✓ | |
| [composer_config](variables.tf#L22) | Cloud Composer config. | <code title="object&#40;&#123;&#10; node_count &#61; number&#10; airflow_version &#61; string&#10; env_variables &#61; map&#40;string&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | | <code title="&#123;&#10; node_count &#61; 3&#10; airflow_version &#61; &#34;composer-1.17.5-airflow-2.1.4&#34;&#10; env_variables &#61; &#123;&#125;&#10;&#125;">&#123;&#8230;&#125;</code> | | [composer_config](variables.tf#L22) | Cloud Composer config. | <code title="object&#40;&#123;&#10; node_count &#61; number&#10; airflow_version &#61; string&#10; env_variables &#61; map&#40;string&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | | <code title="&#123;&#10; node_count &#61; 3&#10; airflow_version &#61; &#34;composer-1.17.5-airflow-2.1.4&#34;&#10; env_variables &#61; &#123;&#125;&#10;&#125;">&#123;&#8230;&#125;</code> |
| [data_force_destroy](variables.tf#L36) | Flag to set 'force_destroy' on data services like BiguQery or Cloud Storage. | <code>bool</code> | | <code>false</code> | | [data_catalog_tags](variables.tf#L36) | List of Data Catalog Policy tags to be created with optional IAM binging configuration in {tag => {ROLE => [MEMBERS]}} format. | <code>map&#40;map&#40;list&#40;string&#41;&#41;&#41;</code> | | <code title="&#123;&#10; &#34;3_Confidential&#34; &#61; null&#10; &#34;2_Private&#34; &#61; null&#10; &#34;1_Sensitive&#34; &#61; null&#10;&#125;">&#123;&#8230;&#125;</code> |
| [groups](variables.tf#L53) | User groups. | <code>map&#40;string&#41;</code> | | <code title="&#123;&#10; data-analysts &#61; &#34;gcp-data-analysts&#34;&#10; data-engineers &#61; &#34;gcp-data-engineers&#34;&#10; data-security &#61; &#34;gcp-data-security&#34;&#10;&#125;">&#123;&#8230;&#125;</code> | | [data_force_destroy](variables.tf#L47) | Flag to set 'force_destroy' on data services like BiguQery or Cloud Storage. | <code>bool</code> | | <code>false</code> |
| [location](variables.tf#L47) | Location used for multi-regional resources. | <code>string</code> | | <code>&#34;eu&#34;</code> | | [groups](variables.tf#L64) | User groups. | <code>map&#40;string&#41;</code> | | <code title="&#123;&#10; data-analysts &#61; &#34;gcp-data-analysts&#34;&#10; data-engineers &#61; &#34;gcp-data-engineers&#34;&#10; data-security &#61; &#34;gcp-data-security&#34;&#10;&#125;">&#123;&#8230;&#125;</code> |
| [network_config](variables.tf#L63) | Shared VPC network configurations to use. If null networks will be created in projects with preconfigured values. | <code title="object&#40;&#123;&#10; host_project &#61; string&#10; network_self_link &#61; string&#10; subnet_self_links &#61; object&#40;&#123;&#10; load &#61; string&#10; transformation &#61; string&#10; orchestration &#61; string&#10; &#125;&#41;&#10; composer_ip_ranges &#61; object&#40;&#123;&#10; cloudsql &#61; string&#10; gke_master &#61; string&#10; web_server &#61; string&#10; &#125;&#41;&#10; composer_secondary_ranges &#61; object&#40;&#123;&#10; pods &#61; string&#10; services &#61; string&#10; &#125;&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | | <code>null</code> | | [location](variables.tf#L58) | Location used for multi-regional resources. | <code>string</code> | | <code>&#34;eu&#34;</code> |
| [project_services](variables.tf#L97) | List of core services enabled on all projects. | <code>list&#40;string&#41;</code> | | <code title="&#91;&#10; &#34;cloudresourcemanager.googleapis.com&#34;,&#10; &#34;iam.googleapis.com&#34;,&#10; &#34;serviceusage.googleapis.com&#34;,&#10; &#34;stackdriver.googleapis.com&#34;&#10;&#93;">&#91;&#8230;&#93;</code> | | [network_config](variables.tf#L74) | Shared VPC network configurations to use. If null networks will be created in projects with preconfigured values. | <code title="object&#40;&#123;&#10; host_project &#61; string&#10; network_self_link &#61; string&#10; subnet_self_links &#61; object&#40;&#123;&#10; load &#61; string&#10; transformation &#61; string&#10; orchestration &#61; string&#10; &#125;&#41;&#10; composer_ip_ranges &#61; object&#40;&#123;&#10; cloudsql &#61; string&#10; gke_master &#61; string&#10; web_server &#61; string&#10; &#125;&#41;&#10; composer_secondary_ranges &#61; object&#40;&#123;&#10; pods &#61; string&#10; services &#61; string&#10; &#125;&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | | <code>null</code> |
| [project_suffix](variables.tf#L108) | Suffix used only for project ids. | <code>string</code> | | <code>null</code> | | [project_services](variables.tf#L108) | List of core services enabled on all projects. | <code>list&#40;string&#41;</code> | | <code title="&#91;&#10; &#34;cloudresourcemanager.googleapis.com&#34;,&#10; &#34;iam.googleapis.com&#34;,&#10; &#34;serviceusage.googleapis.com&#34;,&#10; &#34;stackdriver.googleapis.com&#34;&#10;&#93;">&#91;&#8230;&#93;</code> |
| [region](variables.tf#L114) | Region used for regional resources. | <code>string</code> | | <code>&#34;europe-west1&#34;</code> | | [project_suffix](variables.tf#L119) | Suffix used only for project ids. | <code>string</code> | | <code>null</code> |
| [region](variables.tf#L125) | Region used for regional resources. | <code>string</code> | | <code>&#34;europe-west1&#34;</code> |
## Outputs ## Outputs
@ -254,13 +261,6 @@ Description of commands:
Features to add in future releases: Features to add in future releases:
- Add support for Column level access on BigQuery
- Add example templates for Data Catalog
- Add example on how to use Cloud Data Loss Prevention - Add example on how to use Cloud Data Loss Prevention
- Add solution to handle Tables, Views, and Authorized Views lifecycle - Add solution to handle Tables, Views, and Authorized Views lifecycle
- Add solution to handle Metadata lifecycle - Add solution to handle Metadata lifecycle
## To Test/Fix
- Composer require "Require OS Login" not enforced
- External Shared-VPC

View File

@ -1,3 +1,32 @@
# Data ingestion Demo # Data ingestion Demo
In this folder you can find an example to ingest data on the `data platfoem` instantiated in [here](../). See details in the [README.m](../#demo-pipeline) to run the demo. In this folder, you can find an example to ingest data on the `data platform` instantiated [here](../).
The example is not intended to be a production-ready code.
## Demo use case
The demo imports purchase data generated by a store.
## Input files
Data are uploaded to the `landing` GCS bucket. File structure:
- `customers.csv`: Comma separate value with customer information in the following format: Customer ID, Name, Surname, Registration Timestamp
- `purchases.csv`: Comma separate value with customer information in the following format: Item ID, Customer ID, Item, Item price, Purchase Timestamp
## Data processing pipelines
Different data pipelines are provided to highlight different features and patterns. For the purpose of the example, a single pipeline handle all data lifecycles. When adapting them to your real use case, you may want to evaluate the option to handle each functional step on a separate pipeline or a dedicated tool. For example, you may want to use `Dataform` to handle data schemas lifecycle.
Below you can find a description of each example:
- Simple import data: [`datapipeline.py`](./datapipeline.py) is a simple pipeline to import provided data from the `landing` Google Cloud Storage bucket to the Data Hub L2 layer joining `customers` and `purchases` tables into `customerpurchase` table.
- Import data with Policy Tags: [`datapipeline_dc_tags.py`](./datapipeline.py) imports provided data from `landing` bucket to the Data Hub L2 layer protecting sensitive data using Data Catalog policy Tags.
- Delete tables: [`delete_table.py`](./delete_table.py) deletes BigQuery tables created by import pipelines.
## Runnin the demo
To run demo examples, please follow the following steps:
- 01: copy sample data to the `landing` Cloud Storage bucket impersonating the `load` service account.
- 02: copy sample data structure definition in the `orchestration` Cloud Storage bucket impersonating the `orchestration` service account.
- 03: copy the Cloud Composer DAG to the Cloud Composer Storage bucket impersonating the `orchestration` service account.
- 04: Open the Cloud Composer Airflow UI and run the imported DAG.
- 05: Run the BigQuery query to see results.
You can find pre-computed commands in the `demo_commands` output variable of the deployed terraform [data pipeline](../).

View File

@ -19,18 +19,21 @@
import csv import csv
import datetime import datetime
import io import io
import json
import logging import logging
import os import os
from airflow import models from airflow import models
from airflow.contrib.operators.dataflow_operator import DataflowTemplateOperator from airflow.providers.google.cloud.operators.dataflow import DataflowTemplatedJobStartOperator
from airflow.operators import dummy from airflow.operators import dummy
from airflow.providers.google.cloud.operators.bigquery import BigQueryInsertJobOperator from airflow.providers.google.cloud.operators.bigquery import BigQueryInsertJobOperator, BigQueryUpsertTableOperator, BigQueryUpdateTableSchemaOperator
from airflow.utils.task_group import TaskGroup
# -------------------------------------------------------------------------------- # --------------------------------------------------------------------------------
# Set variables # Set variables - Needed for the DEMO
# ------------------------------------------------------------ # --------------------------------------------------------------------------------
BQ_LOCATION = os.environ.get("BQ_LOCATION") BQ_LOCATION = os.environ.get("BQ_LOCATION")
DATA_CAT_TAGS = json.loads(os.environ.get("DATA_CAT_TAGS"))
DTL_L0_PRJ = os.environ.get("DTL_L0_PRJ") DTL_L0_PRJ = os.environ.get("DTL_L0_PRJ")
DTL_L0_BQ_DATASET = os.environ.get("DTL_L0_BQ_DATASET") DTL_L0_BQ_DATASET = os.environ.get("DTL_L0_BQ_DATASET")
DTL_L0_GCS = os.environ.get("DTL_L0_GCS") DTL_L0_GCS = os.environ.get("DTL_L0_GCS")
@ -84,7 +87,6 @@ default_args = {
'retries': 1, 'retries': 1,
'retry_delay': datetime.timedelta(minutes=5), 'retry_delay': datetime.timedelta(minutes=5),
'dataflow_default_options': { 'dataflow_default_options': {
'project': LOD_PRJ,
'location': DF_REGION, 'location': DF_REGION,
'zone': DF_ZONE, 'zone': DF_ZONE,
'stagingLocation': LOD_GCS_STAGING, 'stagingLocation': LOD_GCS_STAGING,
@ -114,9 +116,13 @@ with models.DAG(
trigger_rule='all_success' trigger_rule='all_success'
) )
customers_import = DataflowTemplateOperator( # Bigquery Tables automatically created for demo porpuse.
task_id="dataflow_customer_import", # Consider a dedicated pipeline or tool for a real life scenario.
customers_import = DataflowTemplatedJobStartOperator(
task_id="dataflow_customers_import",
template="gs://dataflow-templates/latest/GCS_Text_to_BigQuery", template="gs://dataflow-templates/latest/GCS_Text_to_BigQuery",
project_id=LOD_PRJ,
location=DF_REGION,
parameters={ parameters={
"javascriptTextTransformFunctionName": "transform", "javascriptTextTransformFunctionName": "transform",
"JSONPath": ORC_GCS + "/customers_schema.json", "JSONPath": ORC_GCS + "/customers_schema.json",
@ -127,9 +133,11 @@ with models.DAG(
}, },
) )
purchases_import = DataflowTemplateOperator( purchases_import = DataflowTemplatedJobStartOperator(
task_id="dataflow_purchases_import", task_id="dataflow_purchases_import",
template="gs://dataflow-templates/latest/GCS_Text_to_BigQuery", template="gs://dataflow-templates/latest/GCS_Text_to_BigQuery",
project_id=LOD_PRJ,
location=DF_REGION,
parameters={ parameters={
"javascriptTextTransformFunctionName": "transform", "javascriptTextTransformFunctionName": "transform",
"JSONPath": ORC_GCS + "/purchases_schema.json", "JSONPath": ORC_GCS + "/purchases_schema.json",
@ -180,13 +188,13 @@ with models.DAG(
'jobType':'QUERY', 'jobType':'QUERY',
'query':{ 'query':{
'query':"""SELECT 'query':"""SELECT
customer_id, customer_id,
purchase_id, purchase_id,
name, name,
surname, surname,
item, item,
price, price,
timestamp timestamp
FROM `{dtl_1_prj}.{dtl_1_dataset}.customer_purchase` FROM `{dtl_1_prj}.{dtl_1_dataset}.customer_purchase`
""".format(dtl_1_prj=DTL_L1_PRJ, dtl_1_dataset=DTL_L1_BQ_DATASET, ), """.format(dtl_1_prj=DTL_L1_PRJ, dtl_1_dataset=DTL_L1_BQ_DATASET, ),
'destinationTable':{ 'destinationTable':{
@ -201,4 +209,4 @@ with models.DAG(
impersonation_chain=[TRF_SA_BQ] impersonation_chain=[TRF_SA_BQ]
) )
start >> [customers_import, purchases_import] >> join_customer_purchase >> l2_customer_purchase >> end start >> [customers_import, purchases_import] >> join_customer_purchase >> l2_customer_purchase >> end

View File

@ -0,0 +1,322 @@
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# --------------------------------------------------------------------------------
# Load The Dependencies
# --------------------------------------------------------------------------------
import csv
import datetime
import io
import json
import logging
import os
from airflow import models
from airflow.providers.google.cloud.operators.dataflow import DataflowTemplatedJobStartOperator
from airflow.operators import dummy
from airflow.providers.google.cloud.operators.bigquery import BigQueryInsertJobOperator, BigQueryUpsertTableOperator, BigQueryUpdateTableSchemaOperator
from airflow.utils.task_group import TaskGroup
# --------------------------------------------------------------------------------
# Set variables - Needed for the DEMO
# --------------------------------------------------------------------------------
BQ_LOCATION = os.environ.get("BQ_LOCATION")
DATA_CAT_TAGS = json.loads(os.environ.get("DATA_CAT_TAGS"))
DTL_L0_PRJ = os.environ.get("DTL_L0_PRJ")
DTL_L0_BQ_DATASET = os.environ.get("DTL_L0_BQ_DATASET")
DTL_L0_GCS = os.environ.get("DTL_L0_GCS")
DTL_L1_PRJ = os.environ.get("DTL_L1_PRJ")
DTL_L1_BQ_DATASET = os.environ.get("DTL_L1_BQ_DATASET")
DTL_L1_GCS = os.environ.get("DTL_L1_GCS")
DTL_L2_PRJ = os.environ.get("DTL_L2_PRJ")
DTL_L2_BQ_DATASET = os.environ.get("DTL_L2_BQ_DATASET")
DTL_L2_GCS = os.environ.get("DTL_L2_GCS")
DTL_PLG_PRJ = os.environ.get("DTL_PLG_PRJ")
DTL_PLG_BQ_DATASET = os.environ.get("DTL_PLG_BQ_DATASET")
DTL_PLG_GCS = os.environ.get("DTL_PLG_GCS")
GCP_REGION = os.environ.get("GCP_REGION")
LND_PRJ = os.environ.get("LND_PRJ")
LND_BQ = os.environ.get("LND_BQ")
LND_GCS = os.environ.get("LND_GCS")
LND_PS = os.environ.get("LND_PS")
LOD_PRJ = os.environ.get("LOD_PRJ")
LOD_GCS_STAGING = os.environ.get("LOD_GCS_STAGING")
LOD_NET_VPC = os.environ.get("LOD_NET_VPC")
LOD_NET_SUBNET = os.environ.get("LOD_NET_SUBNET")
LOD_SA_DF = os.environ.get("LOD_SA_DF")
ORC_PRJ = os.environ.get("ORC_PRJ")
ORC_GCS = os.environ.get("ORC_GCS")
TRF_PRJ = os.environ.get("TRF_PRJ")
TRF_GCS_STAGING = os.environ.get("TRF_GCS_STAGING")
TRF_NET_VPC = os.environ.get("TRF_NET_VPC")
TRF_NET_SUBNET = os.environ.get("TRF_NET_SUBNET")
TRF_SA_DF = os.environ.get("TRF_SA_DF")
TRF_SA_BQ = os.environ.get("TRF_SA_BQ")
DF_KMS_KEY = os.environ.get("DF_KMS_KEY", "")
DF_REGION = os.environ.get("GCP_REGION")
DF_ZONE = os.environ.get("GCP_REGION") + "-b"
# --------------------------------------------------------------------------------
# Set default arguments
# --------------------------------------------------------------------------------
# If you are running Airflow in more than one time zone
# see https://airflow.apache.org/docs/apache-airflow/stable/timezone.html
# for best practices
yesterday = datetime.datetime.now() - datetime.timedelta(days=1)
default_args = {
'owner': 'airflow',
'start_date': yesterday,
'depends_on_past': False,
'email': [''],
'email_on_failure': False,
'email_on_retry': False,
'retries': 1,
'retry_delay': datetime.timedelta(minutes=5),
'dataflow_default_options': {
'location': DF_REGION,
'zone': DF_ZONE,
'stagingLocation': LOD_GCS_STAGING,
'tempLocation': LOD_GCS_STAGING + "/tmp",
'serviceAccountEmail': LOD_SA_DF,
'subnetwork': LOD_NET_SUBNET,
'ipConfiguration': "WORKER_IP_PRIVATE",
'kmsKeyName' : DF_KMS_KEY
},
}
# --------------------------------------------------------------------------------
# Main DAG
# --------------------------------------------------------------------------------
with models.DAG(
'data_pipeline_dc_tags_dag',
default_args=default_args,
schedule_interval=None) as dag:
start = dummy.DummyOperator(
task_id='start',
trigger_rule='all_success'
)
end = dummy.DummyOperator(
task_id='end',
trigger_rule='all_success'
)
# Bigquery Tables created here for demo porpuse.
# Consider a dedicated pipeline or tool for a real life scenario.
with TaskGroup('upsert_table') as upsert_table:
upsert_table_customers = BigQueryUpsertTableOperator(
task_id="upsert_table_customers",
project_id=DTL_L0_PRJ,
dataset_id=DTL_L0_BQ_DATASET,
impersonation_chain=[TRF_SA_DF],
table_resource={
"tableReference": {"tableId": "customers"},
},
)
upsert_table_purchases = BigQueryUpsertTableOperator(
task_id="upsert_table_purchases",
project_id=DTL_L0_PRJ,
dataset_id=DTL_L0_BQ_DATASET,
impersonation_chain=[TRF_SA_BQ],
table_resource={
"tableReference": {"tableId": "purchases"}
},
)
upsert_table_customer_purchase_l1 = BigQueryUpsertTableOperator(
task_id="upsert_table_customer_purchase_l1",
project_id=DTL_L1_PRJ,
dataset_id=DTL_L1_BQ_DATASET,
impersonation_chain=[TRF_SA_BQ],
table_resource={
"tableReference": {"tableId": "customer_purchase"}
},
)
upsert_table_customer_purchase_l2 = BigQueryUpsertTableOperator(
task_id="upsert_table_customer_purchase_l2",
project_id=DTL_L2_PRJ,
dataset_id=DTL_L2_BQ_DATASET,
impersonation_chain=[TRF_SA_BQ],
table_resource={
"tableReference": {"tableId": "customer_purchase"}
},
)
# Bigquery Tables schema defined here for demo porpuse.
# Consider a dedicated pipeline or tool for a real life scenario.
with TaskGroup('update_schema_table') as update_schema_table:
update_table_schema_customers = BigQueryUpdateTableSchemaOperator(
task_id="update_table_schema_customers",
project_id=DTL_L0_PRJ,
dataset_id=DTL_L0_BQ_DATASET,
table_id="customers",
impersonation_chain=[TRF_SA_BQ],
include_policy_tags=True,
schema_fields_updates=[
{ "mode": "REQUIRED", "name": "id", "type": "INTEGER", "description": "ID" },
{ "mode": "REQUIRED", "name": "name", "type": "STRING", "description": "Name", "policyTags": { "names": [DATA_CAT_TAGS.get('2_Private', None)]}},
{ "mode": "REQUIRED", "name": "surname", "type": "STRING", "description": "Surname", "policyTags": { "names": [DATA_CAT_TAGS.get('2_Private', None)]} },
{ "mode": "REQUIRED", "name": "timestamp", "type": "TIMESTAMP", "description": "Timestamp" }
]
)
update_table_schema_customers = BigQueryUpdateTableSchemaOperator(
task_id="update_table_schema_purchases",
project_id=DTL_L0_PRJ,
dataset_id=DTL_L0_BQ_DATASET,
table_id="purchases",
impersonation_chain=[TRF_SA_BQ],
include_policy_tags=True,
schema_fields_updates=[
{ "mode": "REQUIRED", "name": "id", "type": "INTEGER", "description": "ID" },
{ "mode": "REQUIRED", "name": "customer_id", "type": "INTEGER", "description": "ID" },
{ "mode": "REQUIRED", "name": "item", "type": "STRING", "description": "Item Name" },
{ "mode": "REQUIRED", "name": "price", "type": "FLOAT", "description": "Item Price" },
{ "mode": "REQUIRED", "name": "timestamp", "type": "TIMESTAMP", "description": "Timestamp" }
]
)
update_table_schema_customer_purchase_l1 = BigQueryUpdateTableSchemaOperator(
task_id="update_table_schema_customer_purchase_l1",
project_id=DTL_L1_PRJ,
dataset_id=DTL_L1_BQ_DATASET,
table_id="customer_purchase",
impersonation_chain=[TRF_SA_BQ],
include_policy_tags=True,
schema_fields_updates=[
{ "mode": "REQUIRED", "name": "customer_id", "type": "INTEGER", "description": "ID" },
{ "mode": "REQUIRED", "name": "purchase_id", "type": "INTEGER", "description": "ID" },
{ "mode": "REQUIRED", "name": "name", "type": "STRING", "description": "Name", "policyTags": { "names": [DATA_CAT_TAGS.get('2_Private', None)]}},
{ "mode": "REQUIRED", "name": "surname", "type": "STRING", "description": "Surname", "policyTags": { "names": [DATA_CAT_TAGS.get('2_Private', None)]} },
{ "mode": "REQUIRED", "name": "item", "type": "STRING", "description": "Item Name" },
{ "mode": "REQUIRED", "name": "price", "type": "FLOAT", "description": "Item Price" },
{ "mode": "REQUIRED", "name": "timestamp", "type": "TIMESTAMP", "description": "Timestamp" }
]
)
update_table_schema_customer_purchase_l2 = BigQueryUpdateTableSchemaOperator(
task_id="update_table_schema_customer_purchase_l2",
project_id=DTL_L2_PRJ,
dataset_id=DTL_L2_BQ_DATASET,
table_id="customer_purchase",
impersonation_chain=[TRF_SA_BQ],
include_policy_tags=True,
schema_fields_updates=[
{ "mode": "REQUIRED", "name": "customer_id", "type": "INTEGER", "description": "ID" },
{ "mode": "REQUIRED", "name": "purchase_id", "type": "INTEGER", "description": "ID" },
{ "mode": "REQUIRED", "name": "name", "type": "STRING", "description": "Name", "policyTags": { "names": [DATA_CAT_TAGS.get('2_Private', None)]}},
{ "mode": "REQUIRED", "name": "surname", "type": "STRING", "description": "Surname", "policyTags": { "names": [DATA_CAT_TAGS.get('2_Private', None)]} },
{ "mode": "REQUIRED", "name": "item", "type": "STRING", "description": "Item Name" },
{ "mode": "REQUIRED", "name": "price", "type": "FLOAT", "description": "Item Price" },
{ "mode": "REQUIRED", "name": "timestamp", "type": "TIMESTAMP", "description": "Timestamp" }
]
)
customers_import = DataflowTemplatedJobStartOperator(
task_id="dataflow_customers_import",
template="gs://dataflow-templates/latest/GCS_Text_to_BigQuery",
project_id=LOD_PRJ,
location=DF_REGION,
parameters={
"javascriptTextTransformFunctionName": "transform",
"JSONPath": ORC_GCS + "/customers_schema.json",
"javascriptTextTransformGcsPath": ORC_GCS + "/customers_udf.js",
"inputFilePattern": LND_GCS + "/customers.csv",
"outputTable": DTL_L0_PRJ + ":"+DTL_L0_BQ_DATASET+".customers",
"bigQueryLoadingTemporaryDirectory": LOD_GCS_STAGING + "/tmp/bq/",
},
)
purchases_import = DataflowTemplatedJobStartOperator(
task_id="dataflow_purchases_import",
template="gs://dataflow-templates/latest/GCS_Text_to_BigQuery",
project_id=LOD_PRJ,
location=DF_REGION,
parameters={
"javascriptTextTransformFunctionName": "transform",
"JSONPath": ORC_GCS + "/purchases_schema.json",
"javascriptTextTransformGcsPath": ORC_GCS + "/purchases_udf.js",
"inputFilePattern": LND_GCS + "/purchases.csv",
"outputTable": DTL_L0_PRJ + ":"+DTL_L0_BQ_DATASET+".purchases",
"bigQueryLoadingTemporaryDirectory": LOD_GCS_STAGING + "/tmp/bq/",
},
)
join_customer_purchase = BigQueryInsertJobOperator(
task_id='bq_join_customer_purchase',
gcp_conn_id='bigquery_default',
project_id=TRF_PRJ,
location=BQ_LOCATION,
configuration={
'jobType':'QUERY',
'query':{
'query':"""SELECT
c.id as customer_id,
p.id as purchase_id,
c.name as name,
c.surname as surname,
p.item as item,
p.price as price,
p.timestamp as timestamp
FROM `{dtl_0_prj}.{dtl_0_dataset}.customers` c
JOIN `{dtl_0_prj}.{dtl_0_dataset}.purchases` p ON c.id = p.customer_id
""".format(dtl_0_prj=DTL_L0_PRJ, dtl_0_dataset=DTL_L0_BQ_DATASET, ),
'destinationTable':{
'projectId': DTL_L1_PRJ,
'datasetId': DTL_L1_BQ_DATASET,
'tableId': 'customer_purchase'
},
'writeDisposition':'WRITE_TRUNCATE',
"useLegacySql": False
}
},
impersonation_chain=[TRF_SA_BQ]
)
l2_customer_purchase = BigQueryInsertJobOperator(
task_id='bq_l2_customer_purchase',
gcp_conn_id='bigquery_default',
project_id=TRF_PRJ,
location=BQ_LOCATION,
configuration={
'jobType':'QUERY',
'query':{
'query':"""SELECT
customer_id,
purchase_id,
name,
surname,
item,
price,
timestamp
FROM `{dtl_1_prj}.{dtl_1_dataset}.customer_purchase`
""".format(dtl_1_prj=DTL_L1_PRJ, dtl_1_dataset=DTL_L1_BQ_DATASET, ),
'destinationTable':{
'projectId': DTL_L2_PRJ,
'datasetId': DTL_L2_BQ_DATASET,
'tableId': 'customer_purchase'
},
'writeDisposition':'WRITE_TRUNCATE',
"useLegacySql": False
}
},
impersonation_chain=[TRF_SA_BQ]
)
start >> upsert_table >> update_schema_table >> [customers_import, purchases_import] >> join_customer_purchase >> l2_customer_purchase >> end

View File

@ -0,0 +1,146 @@
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# --------------------------------------------------------------------------------
# Load The Dependencies
# --------------------------------------------------------------------------------
import csv
import datetime
import io
import json
import logging
import os
from airflow import models
from airflow.providers.google.cloud.operators.dataflow import DataflowTemplatedJobStartOperator
from airflow.operators import dummy
from airflow.providers.google.cloud.operators.bigquery import BigQueryDeleteTableOperator
from airflow.utils.task_group import TaskGroup
# --------------------------------------------------------------------------------
# Set variables - Needed for the DEMO
# --------------------------------------------------------------------------------
BQ_LOCATION = os.environ.get("BQ_LOCATION")
DATA_CAT_TAGS = json.loads(os.environ.get("DATA_CAT_TAGS"))
DTL_L0_PRJ = os.environ.get("DTL_L0_PRJ")
DTL_L0_BQ_DATASET = os.environ.get("DTL_L0_BQ_DATASET")
DTL_L0_GCS = os.environ.get("DTL_L0_GCS")
DTL_L1_PRJ = os.environ.get("DTL_L1_PRJ")
DTL_L1_BQ_DATASET = os.environ.get("DTL_L1_BQ_DATASET")
DTL_L1_GCS = os.environ.get("DTL_L1_GCS")
DTL_L2_PRJ = os.environ.get("DTL_L2_PRJ")
DTL_L2_BQ_DATASET = os.environ.get("DTL_L2_BQ_DATASET")
DTL_L2_GCS = os.environ.get("DTL_L2_GCS")
DTL_PLG_PRJ = os.environ.get("DTL_PLG_PRJ")
DTL_PLG_BQ_DATASET = os.environ.get("DTL_PLG_BQ_DATASET")
DTL_PLG_GCS = os.environ.get("DTL_PLG_GCS")
GCP_REGION = os.environ.get("GCP_REGION")
LND_PRJ = os.environ.get("LND_PRJ")
LND_BQ = os.environ.get("LND_BQ")
LND_GCS = os.environ.get("LND_GCS")
LND_PS = os.environ.get("LND_PS")
LOD_PRJ = os.environ.get("LOD_PRJ")
LOD_GCS_STAGING = os.environ.get("LOD_GCS_STAGING")
LOD_NET_VPC = os.environ.get("LOD_NET_VPC")
LOD_NET_SUBNET = os.environ.get("LOD_NET_SUBNET")
LOD_SA_DF = os.environ.get("LOD_SA_DF")
ORC_PRJ = os.environ.get("ORC_PRJ")
ORC_GCS = os.environ.get("ORC_GCS")
TRF_PRJ = os.environ.get("TRF_PRJ")
TRF_GCS_STAGING = os.environ.get("TRF_GCS_STAGING")
TRF_NET_VPC = os.environ.get("TRF_NET_VPC")
TRF_NET_SUBNET = os.environ.get("TRF_NET_SUBNET")
TRF_SA_DF = os.environ.get("TRF_SA_DF")
TRF_SA_BQ = os.environ.get("TRF_SA_BQ")
DF_KMS_KEY = os.environ.get("DF_KMS_KEY", "")
DF_REGION = os.environ.get("GCP_REGION")
DF_ZONE = os.environ.get("GCP_REGION") + "-b"
# --------------------------------------------------------------------------------
# Set default arguments
# --------------------------------------------------------------------------------
# If you are running Airflow in more than one time zone
# see https://airflow.apache.org/docs/apache-airflow/stable/timezone.html
# for best practices
yesterday = datetime.datetime.now() - datetime.timedelta(days=1)
default_args = {
'owner': 'airflow',
'start_date': yesterday,
'depends_on_past': False,
'email': [''],
'email_on_failure': False,
'email_on_retry': False,
'retries': 1,
'retry_delay': datetime.timedelta(minutes=5),
'dataflow_default_options': {
'location': DF_REGION,
'zone': DF_ZONE,
'stagingLocation': LOD_GCS_STAGING,
'tempLocation': LOD_GCS_STAGING + "/tmp",
'serviceAccountEmail': LOD_SA_DF,
'subnetwork': LOD_NET_SUBNET,
'ipConfiguration': "WORKER_IP_PRIVATE",
'kmsKeyName' : DF_KMS_KEY
},
}
# --------------------------------------------------------------------------------
# Main DAG
# --------------------------------------------------------------------------------
with models.DAG(
'delete_tables_dag',
default_args=default_args,
schedule_interval=None) as dag:
start = dummy.DummyOperator(
task_id='start',
trigger_rule='all_success'
)
end = dummy.DummyOperator(
task_id='end',
trigger_rule='all_success'
)
# Bigquery Tables deleted here for demo porpuse.
# Consider a dedicated pipeline or tool for a real life scenario.
with TaskGroup('delete_table') as delte_table:
delete_table_customers = BigQueryDeleteTableOperator(
task_id="delete_table_customers",
deletion_dataset_table=DTL_L0_PRJ+"."+DTL_L0_BQ_DATASET+".customers",
impersonation_chain=[TRF_SA_DF]
)
delete_table_purchases = BigQueryDeleteTableOperator(
task_id="delete_table_purchases",
deletion_dataset_table=DTL_L0_PRJ+"."+DTL_L0_BQ_DATASET+".purchases",
impersonation_chain=[TRF_SA_DF]
)
delete_table_customer_purchase_l1 = BigQueryDeleteTableOperator(
task_id="delete_table_customer_purchase_l1",
deletion_dataset_table=DTL_L1_PRJ+"."+DTL_L1_BQ_DATASET+".customer_purchase",
impersonation_chain=[TRF_SA_DF]
)
delete_table_customer_purchase_l2 = BigQueryDeleteTableOperator(
task_id="delete_table_customer_purchase_l2",
deletion_dataset_table=DTL_L2_PRJ+"."+DTL_L2_BQ_DATASET+".customer_purchase",
impersonation_chain=[TRF_SA_DF]
)
start >> delte_table >> end

View File

@ -98,7 +98,7 @@ output "demo_commands" {
03 = "gsutil -i ${module.orch-sa-cmp-0.email} cp demo/*.py ${google_composer_environment.orch-cmp-0.config[0].dag_gcs_prefix}/" 03 = "gsutil -i ${module.orch-sa-cmp-0.email} cp demo/*.py ${google_composer_environment.orch-cmp-0.config[0].dag_gcs_prefix}/"
04 = "Open ${google_composer_environment.orch-cmp-0.config.0.airflow_uri} and run uploaded DAG." 04 = "Open ${google_composer_environment.orch-cmp-0.config.0.airflow_uri} and run uploaded DAG."
05 = <<EOT 05 = <<EOT
bq query --project_id=${module.lake-2-project.project_id} --use_legacy_sql=false 'SELECT * FROM `${module.lake-2-project.project_id}.${module.lake-2-bq-0.dataset_id}.customer_purchase` LIMIT 1000'" bq query --project_id=${module.lake-2-project.project_id} --use_legacy_sql=false 'SELECT * EXCEPT (name, surname) FROM `${module.lake-2-project.project_id}.${module.lake-2-bq-0.dataset_id}.customer_purchase` LIMIT 1000'"
EOT EOT
} }
} }

View File

@ -0,0 +1 @@
[{"mode": "REQUIRED", "name": "id", "type": "INTEGER", "description": "ID"}, {"mode": "REQUIRED", "name": "name", "type": "STRING", "description": "Name", "policyTags": {"names": ["projects/yoyo-cmn/locations/eu/taxonomies/3505167253647667255/policyTags/2896949743213674289"]}}, {"mode": "REQUIRED", "name": "surname", "type": "STRING", "description": "Surname", "policyTags": {"names": ["projects/yoyo-cmn/locations/eu/taxonomies/3505167253647667255/policyTags/2896949743213674289"]}}, {"mode": "REQUIRED", "name": "credicard", "type": "INTEGER", "description": "credicard", "policyTags": {"names": ["projects/yoyo-cmn/locations/eu/taxonomies/3505167253647667255/policyTags/1008821537023566954"]}}, {"mode": "REQUIRED", "name": "timestamp", "type": "TIMESTAMP", "description": "Timestamp"}]

View File

@ -33,6 +33,17 @@ variable "composer_config" {
} }
} }
variable "data_catalog_tags" {
description = "List of Data Catalog Policy tags to be created with optional IAM binging configuration in {tag => {ROLE => [MEMBERS]}} format."
type = map(map(list(string)))
nullable = false
default = {
"3_Confidential" = null
"2_Private" = null
"1_Sensitive" = null
}
}
variable "data_force_destroy" { variable "data_force_destroy" {
description = "Flag to set 'force_destroy' on data services like BiguQery or Cloud Storage." description = "Flag to set 'force_destroy' on data services like BiguQery or Cloud Storage."
type = bool type = bool

View File

@ -6,45 +6,53 @@ Legend: <code>+</code> additive, <code>•</code> conditional.
| members | roles | | members | roles |
|---|---| |---|---|
|<b>gcp-data-analysts</b><br><small><i>group</i></small>|[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) |
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/dlp.estimatesAdmin](https://cloud.google.com/iam/docs/understanding-roles#dlp.estimatesAdmin) <br>[roles/dlp.reader](https://cloud.google.com/iam/docs/understanding-roles#dlp.reader) <br>[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) | |<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/dlp.estimatesAdmin](https://cloud.google.com/iam/docs/understanding-roles#dlp.estimatesAdmin) <br>[roles/dlp.reader](https://cloud.google.com/iam/docs/understanding-roles#dlp.reader) <br>[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) |
|<b>gcp-data-security</b><br><small><i>group</i></small>|[roles/dlp.admin](https://cloud.google.com/iam/docs/understanding-roles#dlp.admin) | |<b>gcp-data-security</b><br><small><i>group</i></small>|[roles/datacatalog.admin](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.admin) <br>[roles/dlp.admin](https://cloud.google.com/iam/docs/understanding-roles#dlp.admin) |
|<b>dev-data-load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) | |<b>dev-data-load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) |
|<b>dev-data-trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) | |<b>dev-data-trf-bq-0</b><br><small><i>serviceAccount</i></small>|[roles/datacatalog.categoryFineGrainedReader](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryFineGrainedReader) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) |
|<b>dev-data-trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/datacatalog.categoryFineGrainedReader](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryFineGrainedReader) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) |
## Project <i>dev-data-dtl-0-0</i> ## Project <i>dev-data-dtl-0-0</i>
| members | roles | | members | roles |
|---|---| |---|---|
|<b>gcp-data-analysts</b><br><small><i>group</i></small>|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user) <br>[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) | |<b>gcp-data-analysts</b><br><small><i>group</i></small>|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/bigquery.metadataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.metadataViewer) <br>[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user) <br>[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) | |<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) |
|<b>dev-data-load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator) | |<b>SERVICE_IDENTITY_service-networking</b><br><small><i>serviceAccount</i></small>|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) <code>+</code>|
|<b>dev-data-trf-bq-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) | |<b>dev-data-load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator) |
|<b>dev-data-trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) | |<b>dev-data-trf-bq-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) <br>[roles/datacatalog.categoryAdmin](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryAdmin) |
|<b>dev-data-trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) |
## Project <i>dev-data-dtl-1-0</i> ## Project <i>dev-data-dtl-1-0</i>
| members | roles | | members | roles |
|---|---| |---|---|
|<b>gcp-data-analysts</b><br><small><i>group</i></small>|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user) <br>[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) | |<b>gcp-data-analysts</b><br><small><i>group</i></small>|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/bigquery.metadataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.metadataViewer) <br>[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user) <br>[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) | |<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) |
|<b>dev-data-trf-bq-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) | |<b>SERVICE_IDENTITY_service-networking</b><br><small><i>serviceAccount</i></small>|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) <code>+</code>|
|<b>dev-data-trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) | |<b>dev-data-load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/datacatalog.categoryAdmin](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryAdmin) |
|<b>dev-data-trf-bq-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) |
|<b>dev-data-trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) <br>[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
## Project <i>dev-data-dtl-2-0</i> ## Project <i>dev-data-dtl-2-0</i>
| members | roles | | members | roles |
|---|---| |---|---|
|<b>gcp-data-analysts</b><br><small><i>group</i></small>|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user) <br>[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) | |<b>gcp-data-analysts</b><br><small><i>group</i></small>|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/bigquery.metadataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.metadataViewer) <br>[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user) <br>[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) | |<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) |
|<b>dev-data-trf-bq-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) | |<b>SERVICE_IDENTITY_service-networking</b><br><small><i>serviceAccount</i></small>|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) <code>+</code>|
|<b>dev-data-trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) | |<b>dev-data-load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/datacatalog.categoryAdmin](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryAdmin) |
|<b>dev-data-trf-bq-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) |
|<b>dev-data-trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) <br>[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
## Project <i>dev-data-dtl-plg-0</i> ## Project <i>dev-data-dtl-plg-0</i>
| members | roles | | members | roles |
|---|---| |---|---|
|<b>gcp-data-analysts</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user) <br>[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) | |<b>gcp-data-analysts</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/bigquery.metadataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.metadataViewer) <br>[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user) <br>[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) | |<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) |
|<b>SERVICE_IDENTITY_service-networking</b><br><small><i>serviceAccount</i></small>|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) <code>+</code>|
## Project <i>dev-data-lnd-0</i> ## Project <i>dev-data-lnd-0</i>
@ -62,37 +70,40 @@ Legend: <code>+</code> additive, <code>•</code> conditional.
| members | roles | | members | roles |
|---|---| |---|---|
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/compute.viewer](https://cloud.google.com/iam/docs/understanding-roles#compute.viewer) <br>[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) <br>[roles/dataflow.developer](https://cloud.google.com/iam/docs/understanding-roles#dataflow.developer) <br>[roles/viewer](https://cloud.google.com/iam/docs/understanding-roles#viewer) | |<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/compute.viewer](https://cloud.google.com/iam/docs/understanding-roles#compute.viewer) <br>[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) <br>[roles/dataflow.developer](https://cloud.google.com/iam/docs/understanding-roles#dataflow.developer) <br>[roles/viewer](https://cloud.google.com/iam/docs/understanding-roles#viewer) |
|<b>SERVICE_IDENTITY_dataflow-service-producer-prod</b><br><small><i>serviceAccount</i></small>|[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|<b>SERVICE_IDENTITY_service-networking</b><br><small><i>serviceAccount</i></small>|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) <code>+</code>|
|<b>dev-data-load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) <br>[roles/dataflow.worker](https://cloud.google.com/iam/docs/understanding-roles#dataflow.worker) <br>[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) | |<b>dev-data-load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) <br>[roles/dataflow.worker](https://cloud.google.com/iam/docs/understanding-roles#dataflow.worker) <br>[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|<b>dev-data-orc-cmp-0</b><br><small><i>serviceAccount</i></small>|[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) | |<b>dev-data-orc-cmp-0</b><br><small><i>serviceAccount</i></small>|[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) |
|<b>service-426128559612</b><br><small><i>serviceAccount</i></small>|[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
## Project <i>dev-data-orc-0</i> ## Project <i>dev-data-orc-0</i>
| members | roles | | members | roles |
|---|---| |---|---|
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/cloudbuild.builds.editor](https://cloud.google.com/iam/docs/understanding-roles#cloudbuild.builds.editor) <br>[roles/composer.admin](https://cloud.google.com/iam/docs/understanding-roles#composer.admin) <br>[roles/composer.environmentAndStorageObjectAdmin](https://cloud.google.com/iam/docs/understanding-roles#composer.environmentAndStorageObjectAdmin) <br>[roles/iam.serviceAccountUser](https://cloud.google.com/iam/docs/understanding-roles#iam.serviceAccountUser) <br>[roles/iap.httpsResourceAccessor](https://cloud.google.com/iam/docs/understanding-roles#iap.httpsResourceAccessor) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) <br>[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) | |<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/cloudbuild.builds.editor](https://cloud.google.com/iam/docs/understanding-roles#cloudbuild.builds.editor) <br>[roles/composer.admin](https://cloud.google.com/iam/docs/understanding-roles#composer.admin) <br>[roles/composer.environmentAndStorageObjectAdmin](https://cloud.google.com/iam/docs/understanding-roles#composer.environmentAndStorageObjectAdmin) <br>[roles/iam.serviceAccountUser](https://cloud.google.com/iam/docs/understanding-roles#iam.serviceAccountUser) <br>[roles/iap.httpsResourceAccessor](https://cloud.google.com/iam/docs/understanding-roles#iap.httpsResourceAccessor) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) <br>[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|<b>SERVICE_IDENTITY_cloudcomposer-accounts</b><br><small><i>serviceAccount</i></small>|[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|<b>SERVICE_IDENTITY_service-networking</b><br><small><i>serviceAccount</i></small>|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) <code>+</code>|
|<b>dev-data-load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) | |<b>dev-data-load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|<b>dev-data-orc-cmp-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/composer.worker](https://cloud.google.com/iam/docs/understanding-roles#composer.worker) <br>[roles/iam.serviceAccountUser](https://cloud.google.com/iam/docs/understanding-roles#iam.serviceAccountUser) <br>[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) | |<b>dev-data-orc-cmp-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/composer.worker](https://cloud.google.com/iam/docs/understanding-roles#composer.worker) <br>[roles/iam.serviceAccountUser](https://cloud.google.com/iam/docs/understanding-roles#iam.serviceAccountUser) <br>[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|<b>dev-data-trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) | |<b>dev-data-trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) |
|<b>service-36960036774</b><br><small><i>serviceAccount</i></small>|[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
## Project <i>dev-data-trf-0</i> ## Project <i>dev-data-trf-0</i>
| members | roles | | members | roles |
|---|---| |---|---|
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) | |<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) |
|<b>SERVICE_IDENTITY_dataflow-service-producer-prod</b><br><small><i>serviceAccount</i></small>|[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|<b>SERVICE_IDENTITY_service-networking</b><br><small><i>serviceAccount</i></small>|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) <code>+</code>|
|<b>dev-data-orc-cmp-0</b><br><small><i>serviceAccount</i></small>|[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) | |<b>dev-data-orc-cmp-0</b><br><small><i>serviceAccount</i></small>|[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) |
|<b>dev-data-trf-bq-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) | |<b>dev-data-trf-bq-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) |
|<b>dev-data-trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/dataflow.worker](https://cloud.google.com/iam/docs/understanding-roles#dataflow.worker) <br>[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) | |<b>dev-data-trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/dataflow.worker](https://cloud.google.com/iam/docs/understanding-roles#dataflow.worker) <br>[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|<b>service-883871192228</b><br><small><i>serviceAccount</i></small>|[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
## Project <i>dev-net-spoke-0</i> ## Project <i>dev-net-spoke-0</i>
| members | roles | | members | roles |
|---|---| |---|---|
|<b>36960036774</b><br><small><i>serviceAccount</i></small>|[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) <code>+</code>| |<b>PROJECT_CLOUD_SERVICES</b><br><small><i>serviceAccount</i></small>|[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) <code>+</code>|
|<b>SERVICE_IDENTITY_cloudcomposer-accounts</b><br><small><i>serviceAccount</i></small>|[roles/composer.sharedVpcAgent](https://cloud.google.com/iam/docs/understanding-roles#composer.sharedVpcAgent) <code>+</code>|
|<b>SERVICE_IDENTITY_container-engine-robot</b><br><small><i>serviceAccount</i></small>|[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) <code>+</code><br>[roles/container.hostServiceAgentUser](https://cloud.google.com/iam/docs/understanding-roles#container.hostServiceAgentUser) <code>+</code>|
|<b>SERVICE_IDENTITY_dataflow-service-producer-prod</b><br><small><i>serviceAccount</i></small>|[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) <code>+</code><br>[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) <code>+</code><br>[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) <code>+</code><br>[roles/container.hostServiceAgentUser](https://cloud.google.com/iam/docs/understanding-roles#container.hostServiceAgentUser) <code>+</code>|
|<b>dev-data-load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) <code>+</code>| |<b>dev-data-load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) <code>+</code>|
|<b>dev-data-trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) <code>+</code>| |<b>dev-data-trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) <code>+</code>|
|<b>service-36960036774</b><br><small><i>serviceAccount</i></small>|[roles/composer.sharedVpcAgent](https://cloud.google.com/iam/docs/understanding-roles#composer.sharedVpcAgent) <code>+</code><br>[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) <code>+</code><br>[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) <code>+</code><br>[roles/container.hostServiceAgentUser](https://cloud.google.com/iam/docs/understanding-roles#container.hostServiceAgentUser) <code>+</code><br>[roles/container.hostServiceAgentUser](https://cloud.google.com/iam/docs/understanding-roles#container.hostServiceAgentUser) <code>+</code>|
|<b>service-426128559612</b><br><small><i>serviceAccount</i></small>|[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) <code>+</code>|
|<b>service-883871192228</b><br><small><i>serviceAccount</i></small>|[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) <code>+</code>|

View File

@ -50,6 +50,19 @@ Cloud KMS crypto keys can be configured wither from the [FAST security stage](..
To configure the use of Cloud KMS on resources, you have to specify the key id on the `service_encryption_keys` variable. Key locations should match resource locations. To configure the use of Cloud KMS on resources, you have to specify the key id on the `service_encryption_keys` variable. Key locations should match resource locations.
## Data Catalog
[Data Catalog](https://cloud.google.com/data-catalog) helps you to document your data entry at scale. Data Catalog relies on [tags](https://cloud.google.com/data-catalog/docs/tags-and-tag-templates#tags) and [tag template](https://cloud.google.com/data-catalog/docs/tags-and-tag-templates#tag-templates) to manage metadata for all data entries in a unified and centralized service. To implement [column-level security](https://cloud.google.com/bigquery/docs/column-level-security-intro) on BigQuery, we suggest to use `Tags` and `Tag templates`.
The default configuration will implement 3 tags:
- `3_Confidential`: policy tag for columns that include very sensitive information, such as credit card numbers.
- `2_Private`: policy tag for columns that include sensitive personal identifiable information (PII) information, such as a person's first name.
- `1_Sensitive`: policy tag for columns that include data that cannot be made public, such as the credit limit.
Anything that is not tagged is available to all users who have access to the data warehouse.
You can configure your tags and roles associated by configuring the `data_catalog_tags` variable. We suggest useing the "[Best practices for using policy tags in BigQuery](https://cloud.google.com/bigquery/docs/best-practices-policy-tags)" article as a guide to designing your tags structure and access pattern. By default, no groups has access to tagged data.
### VPC-SC ### VPC-SC
As is often the case in real-world configurations, [VPC-SC](https://cloud.google.com/vpc-service-controls) is needed to mitigate data exfiltration. VPC-SC can be configured from the [FAST security stage](../../02-security). This step is optional, but highly recomended, and depends on customer policies and security best practices. As is often the case in real-world configurations, [VPC-SC](https://cloud.google.com/vpc-service-controls) is needed to mitigate data exfiltration. VPC-SC can be configured from the [FAST security stage](../../02-security). This step is optional, but highly recomended, and depends on customer policies and security best practices.
@ -116,6 +129,12 @@ terraform init
terraform apply terraform apply
``` ```
## Demo pipeline
The application layer is out of scope of this script. As a demo purpuse only, several Cloud Composer DAGs are provided. Demos will import data from the `landing` area to the `DataLake L2` dataset suing different features.
You can find examples in the `[demo](../../../../examples/data-solutions/data-platform-foundations/demo)` folder.
<!-- TFDOC OPTS files:1 show_extra:1 --> <!-- TFDOC OPTS files:1 show_extra:1 -->
<!-- BEGIN TFDOC --> <!-- BEGIN TFDOC -->
@ -132,20 +151,21 @@ terraform apply
| name | description | type | required | default | producer | | name | description | type | required | default | producer |
|---|---|:---:|:---:|:---:|:---:| |---|---|:---:|:---:|:---:|:---:|
| [billing_account](variables.tf#L17) | Billing account id and organization id ('nnnnnnnn' or null). | <code title="object&#40;&#123;&#10; id &#61; string&#10; organization_id &#61; number&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | ✓ | | <code>00-globals</code> | | [billing_account](variables.tf#L17) | Billing account id and organization id ('nnnnnnnn' or null). | <code title="object&#40;&#123;&#10; id &#61; string&#10; organization_id &#61; number&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | ✓ | | <code>00-globals</code> |
| [folder_ids](variables.tf#L45) | Folder to be used for the networking resources in folders/nnnn format. | <code title="object&#40;&#123;&#10; data-platform &#61; string&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | ✓ | | <code>01-resman</code> | | [folder_ids](variables.tf#L56) | Folder to be used for the networking resources in folders/nnnn format. | <code title="object&#40;&#123;&#10; data-platform &#61; string&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | ✓ | | <code>01-resman</code> |
| [host_project_ids](variables.tf#L63) | Shared VPC project ids. | <code title="object&#40;&#123;&#10; dev-spoke-0 &#61; string&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | ✓ | | <code>02-networking</code> | | [host_project_ids](variables.tf#L74) | Shared VPC project ids. | <code title="object&#40;&#123;&#10; dev-spoke-0 &#61; string&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | ✓ | | <code>02-networking</code> |
| [organization](variables.tf#L89) | Organization details. | <code title="object&#40;&#123;&#10; domain &#61; string&#10; id &#61; number&#10; customer_id &#61; string&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | ✓ | | <code>00-globals</code> | | [organization](variables.tf#L100) | Organization details. | <code title="object&#40;&#123;&#10; domain &#61; string&#10; id &#61; number&#10; customer_id &#61; string&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | ✓ | | <code>00-globals</code> |
| [prefix](variables.tf#L105) | Unique prefix used for resource names. Not used for projects if 'project_create' is null. | <code>string</code> | ✓ | | <code>00-globals</code> | | [prefix](variables.tf#L116) | Unique prefix used for resource names. Not used for projects if 'project_create' is null. | <code>string</code> | ✓ | | <code>00-globals</code> |
| [composer_config](variables.tf#L26) | | <code title="object&#40;&#123;&#10; node_count &#61; number&#10; airflow_version &#61; string&#10; env_variables &#61; map&#40;string&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | | <code title="&#123;&#10; node_count &#61; 3&#10; airflow_version &#61; &#34;composer-1.17.5-airflow-2.1.4&#34;&#10; env_variables &#61; &#123;&#125;&#10;&#125;">&#123;&#8230;&#125;</code> | | | [composer_config](variables.tf#L26) | | <code title="object&#40;&#123;&#10; node_count &#61; number&#10; airflow_version &#61; string&#10; env_variables &#61; map&#40;string&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | | <code title="&#123;&#10; node_count &#61; 3&#10; airflow_version &#61; &#34;composer-1.17.5-airflow-2.1.4&#34;&#10; env_variables &#61; &#123;&#125;&#10;&#125;">&#123;&#8230;&#125;</code> | |
| [data_force_destroy](variables.tf#L39) | Flag to set 'force_destroy' on data services like BigQery or Cloud Storage. | <code>bool</code> | | <code>false</code> | | | [data_catalog_tags](variables.tf#L39) | List of Data Catalog Policy tags to be created with optional IAM binging configuration in {tag => {ROLE => [MEMBERS]}} format. | <code>map&#40;map&#40;list&#40;string&#41;&#41;&#41;</code> | | <code title="&#123;&#10; &#34;3_Confidential&#34; &#61; null&#10; &#34;2_Private&#34; &#61; null&#10; &#34;1_Sensitive&#34; &#61; null&#10;&#125;">&#123;&#8230;&#125;</code> | |
| [groups](variables.tf#L53) | Groups. | <code>map&#40;string&#41;</code> | | <code title="&#123;&#10; data-analysts &#61; &#34;gcp-data-analysts&#34;&#10; data-engineers &#61; &#34;gcp-data-engineers&#34;&#10; data-security &#61; &#34;gcp-data-security&#34;&#10;&#125;">&#123;&#8230;&#125;</code> | | | [data_force_destroy](variables.tf#L50) | Flag to set 'force_destroy' on data services like BigQery or Cloud Storage. | <code>bool</code> | | <code>false</code> | |
| [network_config_composer](variables.tf#L71) | Network configurations to use for Composer. | <code title="object&#40;&#123;&#10; cloudsql_range &#61; string&#10; gke_master_range &#61; string&#10; gke_pods_name &#61; string&#10; gke_services_name &#61; string&#10; web_server_range &#61; string&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | | <code title="&#123;&#10; cloudsql_range &#61; &#34;192.168.254.0&#47;24&#34;&#10; gke_master_range &#61; &#34;192.168.255.0&#47;28&#34;&#10; gke_pods_name &#61; &#34;pods&#34;&#10; gke_services_name &#61; &#34;services&#34;&#10; web_server_range &#61; &#34;192.168.255.16&#47;28&#34;&#10;&#125;">&#123;&#8230;&#125;</code> | | | [groups](variables.tf#L64) | Groups. | <code>map&#40;string&#41;</code> | | <code title="&#123;&#10; data-analysts &#61; &#34;gcp-data-analysts&#34;&#10; data-engineers &#61; &#34;gcp-data-engineers&#34;&#10; data-security &#61; &#34;gcp-data-security&#34;&#10;&#125;">&#123;&#8230;&#125;</code> | |
| [outputs_location](variables.tf#L99) | Path where providers, tfvars files, and lists for the following stages are written. Leave empty to disable. | <code>string</code> | | <code>null</code> | | | [network_config_composer](variables.tf#L82) | Network configurations to use for Composer. | <code title="object&#40;&#123;&#10; cloudsql_range &#61; string&#10; gke_master_range &#61; string&#10; gke_pods_name &#61; string&#10; gke_services_name &#61; string&#10; web_server_range &#61; string&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | | <code title="&#123;&#10; cloudsql_range &#61; &#34;192.168.254.0&#47;24&#34;&#10; gke_master_range &#61; &#34;192.168.255.0&#47;28&#34;&#10; gke_pods_name &#61; &#34;pods&#34;&#10; gke_services_name &#61; &#34;services&#34;&#10; web_server_range &#61; &#34;192.168.255.16&#47;28&#34;&#10;&#125;">&#123;&#8230;&#125;</code> | |
| [project_services](variables.tf#L111) | List of core services enabled on all projects. | <code>list&#40;string&#41;</code> | | <code title="&#91;&#10; &#34;cloudresourcemanager.googleapis.com&#34;,&#10; &#34;iam.googleapis.com&#34;,&#10; &#34;serviceusage.googleapis.com&#34;,&#10; &#34;stackdriver.googleapis.com&#34;&#10;&#93;">&#91;&#8230;&#93;</code> | | | [outputs_location](variables.tf#L110) | Path where providers, tfvars files, and lists for the following stages are written. Leave empty to disable. | <code>string</code> | | <code>null</code> | |
| [region](variables.tf#L122) | Region used for regional resources. | <code>string</code> | | <code>&#34;europe-west1&#34;</code> | | | [project_services](variables.tf#L122) | List of core services enabled on all projects. | <code>list&#40;string&#41;</code> | | <code title="&#91;&#10; &#34;cloudresourcemanager.googleapis.com&#34;,&#10; &#34;iam.googleapis.com&#34;,&#10; &#34;serviceusage.googleapis.com&#34;,&#10; &#34;stackdriver.googleapis.com&#34;&#10;&#93;">&#91;&#8230;&#93;</code> | |
| [service_encryption_keys](variables.tf#L128) | Cloud KMS to use to encrypt different services. Key location should match service region. | <code title="object&#40;&#123;&#10; bq &#61; string&#10; composer &#61; string&#10; dataflow &#61; string&#10; storage &#61; string&#10; pubsub &#61; string&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | | <code>null</code> | | | [region](variables.tf#L133) | Region used for regional resources. | <code>string</code> | | <code>&#34;europe-west1&#34;</code> | |
| [subnet_self_links](variables.tf#L140) | Shared VPC subnet self links. | <code title="object&#40;&#123;&#10; dev-spoke-0 &#61; map&#40;string&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | | <code>null</code> | <code>02-networking</code> | | [service_encryption_keys](variables.tf#L139) | Cloud KMS to use to encrypt different services. Key location should match service region. | <code title="object&#40;&#123;&#10; bq &#61; string&#10; composer &#61; string&#10; dataflow &#61; string&#10; storage &#61; string&#10; pubsub &#61; string&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | | <code>null</code> | |
| [vpc_self_links](variables.tf#L149) | Shared VPC self links. | <code title="object&#40;&#123;&#10; dev-spoke-0 &#61; string&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | | <code>null</code> | <code>02-networking</code> | | [subnet_self_links](variables.tf#L151) | Shared VPC subnet self links. | <code title="object&#40;&#123;&#10; dev-spoke-0 &#61; map&#40;string&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | | <code>null</code> | <code>02-networking</code> |
| [vpc_self_links](variables.tf#L160) | Shared VPC self links. | <code title="object&#40;&#123;&#10; dev-spoke-0 &#61; string&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | | <code>null</code> | <code>02-networking</code> |
## Outputs ## Outputs

View File

@ -21,6 +21,7 @@ module "data-platform" {
billing_account_id = var.billing_account.id billing_account_id = var.billing_account.id
composer_config = var.composer_config composer_config = var.composer_config
data_force_destroy = var.data_force_destroy data_force_destroy = var.data_force_destroy
data_catalog_tags = var.data_catalog_tags
folder_id = var.folder_ids.data-platform folder_id = var.folder_ids.data-platform
groups = var.groups groups = var.groups
network_config = { network_config = {

View File

@ -36,6 +36,17 @@ variable "composer_config" {
} }
} }
variable "data_catalog_tags" {
description = "List of Data Catalog Policy tags to be created with optional IAM binging configuration in {tag => {ROLE => [MEMBERS]}} format."
type = map(map(list(string)))
nullable = false
default = {
"3_Confidential" = null
"2_Private" = null
"1_Sensitive" = null
}
}
variable "data_force_destroy" { variable "data_force_destroy" {
description = "Flag to set 'force_destroy' on data services like BigQery or Cloud Storage." description = "Flag to set 'force_destroy' on data services like BigQery or Cloud Storage."
type = bool type = bool

View File

@ -23,5 +23,5 @@ FIXTURES_DIR = os.path.join(os.path.dirname(__file__), 'fixture')
def test_resources(e2e_plan_runner): def test_resources(e2e_plan_runner):
"Test that plan works and the numbers of resources is as expected." "Test that plan works and the numbers of resources is as expected."
modules, resources = e2e_plan_runner(FIXTURES_DIR) modules, resources = e2e_plan_runner(FIXTURES_DIR)
assert len(modules) == 40 assert len(modules) == 41
assert len(resources) == 296 assert len(resources) == 313

View File

@ -65,10 +65,22 @@ def get_bindings(resources, prefix=None, folders=None):
member_type, _, member_id = member.partition(':') member_type, _, member_id = member.partition(':')
if member_type == 'user': if member_type == 'user':
continue continue
member_id = member_id.rpartition('@')[0] member_id, member_domain = member_id.split('@', 1)
# Handle Cloud Services Service Account
if member_domain == 'cloudservices.gserviceaccount.com':
member_id = "PROJECT_CLOUD_SERVICES"
# Handle Cloud Service Identity Service Acocunt
if re.match("^service-\d{8}", member_id):
member_id = "SERVICE_IDENTITY_" + member_domain.split(".", 1)[0]
# Handle BQ Cloud Service Identity Service Acocunt
if re.match("^bq-\d{8}", member_id):
member_id = "IDENTITY_" + member_domain.split(".", 1)[0]
resource_type_output = "Service Identity - " + resource_type
else:
resource_type_output = resource_type
if prefix and member_id.startswith(prefix): if prefix and member_id.startswith(prefix):
member_id = member_id[len(prefix) + 1:] member_id = member_id[len(prefix) + 1:]
yield Binding(authoritative, resource_type, resource_id, role, yield Binding(authoritative, resource_type_output, resource_id, role,
member_type, member_id, conditions) member_type, member_id, conditions)