diff --git a/examples/data-solutions/data-platform-foundations/03-composer.tf b/examples/data-solutions/data-platform-foundations/03-composer.tf
index 231d0cc5..fac47ec5 100644
--- a/examples/data-solutions/data-platform-foundations/03-composer.tf
+++ b/examples/data-solutions/data-platform-foundations/03-composer.tf
@@ -67,6 +67,7 @@ resource "google_composer_environment" "orch-cmp-0" {
env_variables = merge(
var.composer_config.env_variables, {
BQ_LOCATION = var.location
+ DATA_CAT_TAGS = try(jsonencode(module.common-datacatalog.tags), "{}")
DF_KMS_KEY = try(var.service_encryption_keys.dataflow, "")
DTL_L0_PRJ = module.lake-0-project.project_id
DTL_L0_BQ_DATASET = module.lake-0-bq-0.dataset_id
diff --git a/examples/data-solutions/data-platform-foundations/05-datalake.tf b/examples/data-solutions/data-platform-foundations/05-datalake.tf
index 64ec1b24..b163f9e5 100644
--- a/examples/data-solutions/data-platform-foundations/05-datalake.tf
+++ b/examples/data-solutions/data-platform-foundations/05-datalake.tf
@@ -23,6 +23,7 @@ locals {
(local.groups.data-analysts) = [
"roles/bigquery.dataViewer",
"roles/bigquery.jobUser",
+ "roles/bigquery.metadataViewer",
"roles/bigquery.user",
"roles/datacatalog.viewer",
"roles/datacatalog.tagTemplateViewer",
@@ -37,6 +38,7 @@ locals {
(local.groups.data-analysts) = [
"roles/bigquery.dataEditor",
"roles/bigquery.jobUser",
+ "roles/bigquery.metadataViewer",
"roles/bigquery.user",
"roles/datacatalog.viewer",
"roles/datacatalog.tagTemplateViewer",
@@ -44,7 +46,7 @@ locals {
]
}
lake_0_iam = {
- "roles/bigquery.dataEditor" = [
+ "roles/bigquery.dataOwner" = [
module.load-sa-df-0.iam_email,
module.transf-sa-df-0.iam_email,
module.transf-sa-bq-0.iam_email,
@@ -52,18 +54,24 @@ locals {
"roles/bigquery.jobUser" = [
module.load-sa-df-0.iam_email,
]
+ "roles/datacatalog.categoryAdmin" = [
+ module.transf-sa-bq-0.iam_email
+ ]
"roles/storage.objectCreator" = [
module.load-sa-df-0.iam_email,
]
}
lake_iam = {
- "roles/bigquery.dataEditor" = [
+ "roles/bigquery.dataOwner" = [
module.transf-sa-df-0.iam_email,
module.transf-sa-bq-0.iam_email,
]
"roles/bigquery.jobUser" = [
module.transf-sa-bq-0.iam_email,
]
+ "roles/datacatalog.categoryAdmin" = [
+ module.load-sa-df-0.iam_email
+ ]
"roles/storage.objectCreator" = [
module.transf-sa-df-0.iam_email,
]
diff --git a/examples/data-solutions/data-platform-foundations/06-common.tf b/examples/data-solutions/data-platform-foundations/06-common.tf
index cc18a46f..80451500 100644
--- a/examples/data-solutions/data-platform-foundations/06-common.tf
+++ b/examples/data-solutions/data-platform-foundations/06-common.tf
@@ -21,6 +21,9 @@ module "common-project" {
prefix = var.prefix
name = "cmn${local.project_suffix}"
group_iam = {
+ (local.groups.data-analysts) = [
+ "roles/datacatalog.viewer",
+ ]
(local.groups.data-engineers) = [
"roles/dlp.reader",
"roles/dlp.user",
@@ -28,6 +31,7 @@ module "common-project" {
]
(local.groups.data-security) = [
"roles/dlp.admin",
+ "roles/datacatalog.admin"
]
}
iam = {
@@ -35,6 +39,17 @@ module "common-project" {
module.load-sa-df-0.iam_email,
module.transf-sa-df-0.iam_email
]
+ "roles/datacatalog.viewer" = [
+ module.load-sa-df-0.iam_email,
+ module.transf-sa-df-0.iam_email,
+ module.transf-sa-bq-0.iam_email
+ ]
+ "roles/datacatalog.categoryFineGrainedReader" = [
+ module.transf-sa-df-0.iam_email,
+ module.transf-sa-bq-0.iam_email,
+ # Uncomment if you want to grant access to `data-analyst` to all columns tagged.
+ # local.groups_iam.data-analysts
+ ]
}
services = concat(var.project_services, [
"datacatalog.googleapis.com",
@@ -42,6 +57,16 @@ module "common-project" {
])
}
+# Data Catalog Policy tag
+
+module "common-datacatalog" {
+ source = "../../../modules/data-catalog-policy-tag"
+ project_id = module.common-project.project_id
+ name = "${var.prefix}-datacatalog-policy-tags"
+ location = var.location
+ tags = var.data_catalog_tags
+}
+
# To create KMS keys in the common projet: uncomment this section and assigne key links accondingly in local.service_encryption_keys variable
# module "cmn-kms-0" {
diff --git a/examples/data-solutions/data-platform-foundations/IAM.md b/examples/data-solutions/data-platform-foundations/IAM.md
index aed1c405..d6ccbecb 100644
--- a/examples/data-solutions/data-platform-foundations/IAM.md
+++ b/examples/data-solutions/data-platform-foundations/IAM.md
@@ -6,45 +6,53 @@ Legend: +
additive, •
conditional.
| members | roles |
|---|---|
+|gcp-data-analysts
group|[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) |
|gcp-data-engineers
group|[roles/dlp.estimatesAdmin](https://cloud.google.com/iam/docs/understanding-roles#dlp.estimatesAdmin)
[roles/dlp.reader](https://cloud.google.com/iam/docs/understanding-roles#dlp.reader)
[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) |
-|gcp-data-security
group|[roles/dlp.admin](https://cloud.google.com/iam/docs/understanding-roles#dlp.admin) |
-|load-df-0
serviceAccount|[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) |
-|trf-df-0
serviceAccount|[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) |
+|gcp-data-security
group|[roles/datacatalog.admin](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.admin)
[roles/dlp.admin](https://cloud.google.com/iam/docs/understanding-roles#dlp.admin) |
+|load-df-0
serviceAccount|[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer)
[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) |
+|trf-bq-0
serviceAccount|[roles/datacatalog.categoryFineGrainedReader](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryFineGrainedReader)
[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) |
+|trf-df-0
serviceAccount|[roles/datacatalog.categoryFineGrainedReader](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryFineGrainedReader)
[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer)
[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) |
## Project dtl-0
| members | roles |
|---|---|
-|gcp-data-analysts
group|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user)
[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer)
[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
+|gcp-data-analysts
group|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/bigquery.metadataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.metadataViewer)
[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user)
[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer)
[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|gcp-data-engineers
group|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) |
-|load-df-0
serviceAccount|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator) |
-|trf-bq-0
serviceAccount|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) |
-|trf-df-0
serviceAccount|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) |
+|SERVICE_IDENTITY_service-networking
serviceAccount|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) +
|
+|load-df-0
serviceAccount|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator) |
+|trf-bq-0
serviceAccount|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner)
[roles/datacatalog.categoryAdmin](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryAdmin) |
+|trf-df-0
serviceAccount|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) |
## Project dtl-1
| members | roles |
|---|---|
-|gcp-data-analysts
group|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user)
[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer)
[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
+|gcp-data-analysts
group|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/bigquery.metadataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.metadataViewer)
[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user)
[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer)
[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|gcp-data-engineers
group|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) |
-|trf-bq-0
serviceAccount|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) |
-|trf-df-0
serviceAccount|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
+|SERVICE_IDENTITY_service-networking
serviceAccount|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) +
|
+|load-df-0
serviceAccount|[roles/datacatalog.categoryAdmin](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryAdmin) |
+|trf-bq-0
serviceAccount|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) |
+|trf-df-0
serviceAccount|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner)
[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
## Project dtl-2
| members | roles |
|---|---|
-|gcp-data-analysts
group|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user)
[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer)
[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
+|gcp-data-analysts
group|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/bigquery.metadataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.metadataViewer)
[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user)
[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer)
[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|gcp-data-engineers
group|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) |
-|trf-bq-0
serviceAccount|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) |
-|trf-df-0
serviceAccount|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
+|SERVICE_IDENTITY_service-networking
serviceAccount|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) +
|
+|load-df-0
serviceAccount|[roles/datacatalog.categoryAdmin](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryAdmin) |
+|trf-bq-0
serviceAccount|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) |
+|trf-df-0
serviceAccount|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner)
[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
## Project dtl-plg
| members | roles |
|---|---|
-|gcp-data-analysts
group|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user)
[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer)
[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer)
[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
+|gcp-data-analysts
group|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/bigquery.metadataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.metadataViewer)
[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user)
[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer)
[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer)
[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|gcp-data-engineers
group|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) |
+|SERVICE_IDENTITY_service-networking
serviceAccount|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) +
|
## Project lnd
@@ -62,6 +70,8 @@ Legend: +
additive, •
conditional.
| members | roles |
|---|---|
|gcp-data-engineers
group|[roles/compute.viewer](https://cloud.google.com/iam/docs/understanding-roles#compute.viewer)
[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin)
[roles/dataflow.developer](https://cloud.google.com/iam/docs/understanding-roles#dataflow.developer)
[roles/viewer](https://cloud.google.com/iam/docs/understanding-roles#viewer) |
+|SERVICE_IDENTITY_dataflow-service-producer-prod
serviceAccount|[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
+|SERVICE_IDENTITY_service-networking
serviceAccount|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) +
|
|load-df-0
serviceAccount|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin)
[roles/dataflow.worker](https://cloud.google.com/iam/docs/understanding-roles#dataflow.worker)
[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|orc-cmp-0
serviceAccount|[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) |
@@ -69,7 +79,9 @@ Legend: +
additive, •
conditional.
| members | roles |
|---|---|
-|gcp-data-engineers
group|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/cloudbuild.builds.editor](https://cloud.google.com/iam/docs/understanding-roles#cloudbuild.builds.editor)
[roles/composer.admin](https://cloud.google.com/iam/docs/understanding-roles#composer.admin)
[roles/composer.environmentAndStorageObjectAdmin](https://cloud.google.com/iam/docs/understanding-roles#composer.environmentAndStorageObjectAdmin)
[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser)
[roles/iam.serviceAccountUser](https://cloud.google.com/iam/docs/understanding-roles#iam.serviceAccountUser)
[roles/iap.httpsResourceAccessor](https://cloud.google.com/iam/docs/understanding-roles#iap.httpsResourceAccessor)
[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin)
[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
+|gcp-data-engineers
group|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/cloudbuild.builds.editor](https://cloud.google.com/iam/docs/understanding-roles#cloudbuild.builds.editor)
[roles/composer.admin](https://cloud.google.com/iam/docs/understanding-roles#composer.admin)
[roles/composer.environmentAndStorageObjectAdmin](https://cloud.google.com/iam/docs/understanding-roles#composer.environmentAndStorageObjectAdmin)
[roles/iam.serviceAccountUser](https://cloud.google.com/iam/docs/understanding-roles#iam.serviceAccountUser)
[roles/iap.httpsResourceAccessor](https://cloud.google.com/iam/docs/understanding-roles#iap.httpsResourceAccessor)
[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin)
[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
+|SERVICE_IDENTITY_cloudcomposer-accounts
serviceAccount|[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
+|SERVICE_IDENTITY_service-networking
serviceAccount|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) +
|
|load-df-0
serviceAccount|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|orc-cmp-0
serviceAccount|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/composer.worker](https://cloud.google.com/iam/docs/understanding-roles#composer.worker)
[roles/iam.serviceAccountUser](https://cloud.google.com/iam/docs/understanding-roles#iam.serviceAccountUser)
[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|trf-df-0
serviceAccount|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) |
@@ -79,6 +91,8 @@ Legend: +
additive, •
conditional.
| members | roles |
|---|---|
|gcp-data-engineers
group|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) |
+|SERVICE_IDENTITY_dataflow-service-producer-prod
serviceAccount|[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
+|SERVICE_IDENTITY_service-networking
serviceAccount|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) +
|
|orc-cmp-0
serviceAccount|[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) |
|trf-bq-0
serviceAccount|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) |
|trf-df-0
serviceAccount|[roles/dataflow.worker](https://cloud.google.com/iam/docs/understanding-roles#dataflow.worker)
[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
diff --git a/examples/data-solutions/data-platform-foundations/README.md b/examples/data-solutions/data-platform-foundations/README.md
index 8b243251..09f8e63a 100644
--- a/examples/data-solutions/data-platform-foundations/README.md
+++ b/examples/data-solutions/data-platform-foundations/README.md
@@ -154,6 +154,19 @@ Cloud Data Loss Prevention resources and templates should be stored in the secur
You can find more details and best practices on using DLP to De-identification and re-identification of PII in large-scale datasets in the [GCP documentation](https://cloud.google.com/architecture/de-identification-re-identification-pii-using-cloud-dlp).
+## Data Catalog
+
+[Data Catalog](https://cloud.google.com/data-catalog) helps you to document your data entry at scale. Data Catalog relies on [tags](https://cloud.google.com/data-catalog/docs/tags-and-tag-templates#tags) and [tag template](https://cloud.google.com/data-catalog/docs/tags-and-tag-templates#tag-templates) to manage metadata for all data entries in a unified and centralized service. To implement [column-level security](https://cloud.google.com/bigquery/docs/column-level-security-intro) on BigQuery, we suggest to use `Tags` and `Tag templates`.
+
+The default configuration will implement 3 tags:
+ - `3_Confidential`: policy tag for columns that include very sensitive information, such as credit card numbers.
+ - `2_Private`: policy tag for columns that include sensitive personal identifiable information (PII) information, such as a person's first name.
+ - `1_Sensitive`: policy tag for columns that include data that cannot be made public, such as the credit limit.
+
+Anything that is not tagged is available to all users who have access to the data warehouse.
+
+For the porpuse of the example no groups has access to tagged data. You can configure your tags and roles associated by configuring the `data_catalog_tags` variable. We suggest useing the "[Best practices for using policy tags in BigQuery](https://cloud.google.com/bigquery/docs/best-practices-policy-tags)" article as a guide to designing your tags structure and access pattern.
+
## How to run this script
To deploy this example on your GCP organization, you will need
@@ -207,17 +220,10 @@ To do this, you need to remove IAM binging at project-level for the `data-analys
## Demo pipeline
-The application layer is out of scope of this script, but as a demo, it is provided with a Cloud Composer DAG to mode data from the `landing` area to the `DataLake L2` dataset.
+The application layer is out of scope of this script. As a demo purpuse only, several Cloud Composer DAGs are provided. Demos will import data from the `landing` area to the `DataLake L2` dataset suing different features.
-Just follow the commands you find in the `demo_commands` Terraform output, go in the Cloud Composer UI and run the `data_pipeline_dag`.
+You can find examples in the `[demo](./demo)` folder.
-Description of commands:
-
-- 01: copy sample data to a `landing` Cloud Storage bucket impersonating the `load` service account.
-- 02: copy sample data structure definition in the `orchestration` Cloud Storage bucket impersonating the `orchestration` service account.
-- 03: copy the Cloud Composer DAG to the Cloud Composer Storage bucket impersonating the `orchestration` service account.
-- 04: Open the Cloud Composer Airflow UI and run the imported DAG.
-- 05: Run the BigQuery query to see results.
## Variables
@@ -225,17 +231,18 @@ Description of commands:
| name | description | type | required | default |
|---|---|:---:|:---:|:---:|
| [billing_account_id](variables.tf#L17) | Billing account id. | string
| ✓ | |
-| [folder_id](variables.tf#L42) | Folder to be used for the networking resources in folders/nnnn format. | string
| ✓ | |
-| [organization_domain](variables.tf#L87) | Organization domain. | string
| ✓ | |
-| [prefix](variables.tf#L92) | Unique prefix used for resource names. | string
| ✓ | |
+| [folder_id](variables.tf#L53) | Folder to be used for the networking resources in folders/nnnn format. | string
| ✓ | |
+| [organization_domain](variables.tf#L98) | Organization domain. | string
| ✓ | |
+| [prefix](variables.tf#L103) | Unique prefix used for resource names. | string
| ✓ | |
| [composer_config](variables.tf#L22) | Cloud Composer config. | object({…})
| | {…}
|
-| [data_force_destroy](variables.tf#L36) | Flag to set 'force_destroy' on data services like BiguQery or Cloud Storage. | bool
| | false
|
-| [groups](variables.tf#L53) | User groups. | map(string)
| | {…}
|
-| [location](variables.tf#L47) | Location used for multi-regional resources. | string
| | "eu"
|
-| [network_config](variables.tf#L63) | Shared VPC network configurations to use. If null networks will be created in projects with preconfigured values. | object({…})
| | null
|
-| [project_services](variables.tf#L97) | List of core services enabled on all projects. | list(string)
| | […]
|
-| [project_suffix](variables.tf#L108) | Suffix used only for project ids. | string
| | null
|
-| [region](variables.tf#L114) | Region used for regional resources. | string
| | "europe-west1"
|
+| [data_catalog_tags](variables.tf#L36) | List of Data Catalog Policy tags to be created with optional IAM binging configuration in {tag => {ROLE => [MEMBERS]}} format. | map(map(list(string)))
| | {…}
|
+| [data_force_destroy](variables.tf#L47) | Flag to set 'force_destroy' on data services like BiguQery or Cloud Storage. | bool
| | false
|
+| [groups](variables.tf#L64) | User groups. | map(string)
| | {…}
|
+| [location](variables.tf#L58) | Location used for multi-regional resources. | string
| | "eu"
|
+| [network_config](variables.tf#L74) | Shared VPC network configurations to use. If null networks will be created in projects with preconfigured values. | object({…})
| | null
|
+| [project_services](variables.tf#L108) | List of core services enabled on all projects. | list(string)
| | […]
|
+| [project_suffix](variables.tf#L119) | Suffix used only for project ids. | string
| | null
|
+| [region](variables.tf#L125) | Region used for regional resources. | string
| | "europe-west1"
|
## Outputs
@@ -254,13 +261,6 @@ Description of commands:
Features to add in future releases:
-- Add support for Column level access on BigQuery
-- Add example templates for Data Catalog
- Add example on how to use Cloud Data Loss Prevention
- Add solution to handle Tables, Views, and Authorized Views lifecycle
- Add solution to handle Metadata lifecycle
-
-## To Test/Fix
-
-- Composer require "Require OS Login" not enforced
-- External Shared-VPC
diff --git a/examples/data-solutions/data-platform-foundations/demo/README.md b/examples/data-solutions/data-platform-foundations/demo/README.md
index 78297f7a..5347b2cf 100644
--- a/examples/data-solutions/data-platform-foundations/demo/README.md
+++ b/examples/data-solutions/data-platform-foundations/demo/README.md
@@ -1,3 +1,32 @@
# Data ingestion Demo
-In this folder you can find an example to ingest data on the `data platfoem` instantiated in [here](../). See details in the [README.m](../#demo-pipeline) to run the demo.
\ No newline at end of file
+In this folder, you can find an example to ingest data on the `data platform` instantiated [here](../).
+
+The example is not intended to be a production-ready code.
+
+## Demo use case
+The demo imports purchase data generated by a store.
+
+## Input files
+Data are uploaded to the `landing` GCS bucket. File structure:
+ - `customers.csv`: Comma separate value with customer information in the following format: Customer ID, Name, Surname, Registration Timestamp
+ - `purchases.csv`: Comma separate value with customer information in the following format: Item ID, Customer ID, Item, Item price, Purchase Timestamp
+
+## Data processing pipelines
+Different data pipelines are provided to highlight different features and patterns. For the purpose of the example, a single pipeline handle all data lifecycles. When adapting them to your real use case, you may want to evaluate the option to handle each functional step on a separate pipeline or a dedicated tool. For example, you may want to use `Dataform` to handle data schemas lifecycle.
+
+Below you can find a description of each example:
+ - Simple import data: [`datapipeline.py`](./datapipeline.py) is a simple pipeline to import provided data from the `landing` Google Cloud Storage bucket to the Data Hub L2 layer joining `customers` and `purchases` tables into `customerpurchase` table.
+ - Import data with Policy Tags: [`datapipeline_dc_tags.py`](./datapipeline.py) imports provided data from `landing` bucket to the Data Hub L2 layer protecting sensitive data using Data Catalog policy Tags.
+ - Delete tables: [`delete_table.py`](./delete_table.py) deletes BigQuery tables created by import pipelines.
+
+## Runnin the demo
+To run demo examples, please follow the following steps:
+
+- 01: copy sample data to the `landing` Cloud Storage bucket impersonating the `load` service account.
+- 02: copy sample data structure definition in the `orchestration` Cloud Storage bucket impersonating the `orchestration` service account.
+- 03: copy the Cloud Composer DAG to the Cloud Composer Storage bucket impersonating the `orchestration` service account.
+- 04: Open the Cloud Composer Airflow UI and run the imported DAG.
+- 05: Run the BigQuery query to see results.
+
+You can find pre-computed commands in the `demo_commands` output variable of the deployed terraform [data pipeline](../).
diff --git a/examples/data-solutions/data-platform-foundations/demo/datapipeline.py b/examples/data-solutions/data-platform-foundations/demo/datapipeline.py
index fd633ebd..1f748c08 100644
--- a/examples/data-solutions/data-platform-foundations/demo/datapipeline.py
+++ b/examples/data-solutions/data-platform-foundations/demo/datapipeline.py
@@ -19,18 +19,21 @@
import csv
import datetime
import io
+import json
import logging
import os
from airflow import models
-from airflow.contrib.operators.dataflow_operator import DataflowTemplateOperator
+from airflow.providers.google.cloud.operators.dataflow import DataflowTemplatedJobStartOperator
from airflow.operators import dummy
-from airflow.providers.google.cloud.operators.bigquery import BigQueryInsertJobOperator
+from airflow.providers.google.cloud.operators.bigquery import BigQueryInsertJobOperator, BigQueryUpsertTableOperator, BigQueryUpdateTableSchemaOperator
+from airflow.utils.task_group import TaskGroup
# --------------------------------------------------------------------------------
-# Set variables
-# ------------------------------------------------------------
+# Set variables - Needed for the DEMO
+# --------------------------------------------------------------------------------
BQ_LOCATION = os.environ.get("BQ_LOCATION")
+DATA_CAT_TAGS = json.loads(os.environ.get("DATA_CAT_TAGS"))
DTL_L0_PRJ = os.environ.get("DTL_L0_PRJ")
DTL_L0_BQ_DATASET = os.environ.get("DTL_L0_BQ_DATASET")
DTL_L0_GCS = os.environ.get("DTL_L0_GCS")
@@ -84,7 +87,6 @@ default_args = {
'retries': 1,
'retry_delay': datetime.timedelta(minutes=5),
'dataflow_default_options': {
- 'project': LOD_PRJ,
'location': DF_REGION,
'zone': DF_ZONE,
'stagingLocation': LOD_GCS_STAGING,
@@ -114,9 +116,13 @@ with models.DAG(
trigger_rule='all_success'
)
- customers_import = DataflowTemplateOperator(
- task_id="dataflow_customer_import",
+ # Bigquery Tables automatically created for demo porpuse.
+ # Consider a dedicated pipeline or tool for a real life scenario.
+ customers_import = DataflowTemplatedJobStartOperator(
+ task_id="dataflow_customers_import",
template="gs://dataflow-templates/latest/GCS_Text_to_BigQuery",
+ project_id=LOD_PRJ,
+ location=DF_REGION,
parameters={
"javascriptTextTransformFunctionName": "transform",
"JSONPath": ORC_GCS + "/customers_schema.json",
@@ -127,9 +133,11 @@ with models.DAG(
},
)
- purchases_import = DataflowTemplateOperator(
+ purchases_import = DataflowTemplatedJobStartOperator(
task_id="dataflow_purchases_import",
template="gs://dataflow-templates/latest/GCS_Text_to_BigQuery",
+ project_id=LOD_PRJ,
+ location=DF_REGION,
parameters={
"javascriptTextTransformFunctionName": "transform",
"JSONPath": ORC_GCS + "/purchases_schema.json",
@@ -180,13 +188,13 @@ with models.DAG(
'jobType':'QUERY',
'query':{
'query':"""SELECT
- customer_id,
- purchase_id,
- name,
- surname,
- item,
- price,
- timestamp
+ customer_id,
+ purchase_id,
+ name,
+ surname,
+ item,
+ price,
+ timestamp
FROM `{dtl_1_prj}.{dtl_1_dataset}.customer_purchase`
""".format(dtl_1_prj=DTL_L1_PRJ, dtl_1_dataset=DTL_L1_BQ_DATASET, ),
'destinationTable':{
@@ -201,4 +209,4 @@ with models.DAG(
impersonation_chain=[TRF_SA_BQ]
)
- start >> [customers_import, purchases_import] >> join_customer_purchase >> l2_customer_purchase >> end
+ start >> [customers_import, purchases_import] >> join_customer_purchase >> l2_customer_purchase >> end
\ No newline at end of file
diff --git a/examples/data-solutions/data-platform-foundations/demo/datapipeline_dc_tags.py b/examples/data-solutions/data-platform-foundations/demo/datapipeline_dc_tags.py
new file mode 100644
index 00000000..2fb88c9e
--- /dev/null
+++ b/examples/data-solutions/data-platform-foundations/demo/datapipeline_dc_tags.py
@@ -0,0 +1,322 @@
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# --------------------------------------------------------------------------------
+# Load The Dependencies
+# --------------------------------------------------------------------------------
+
+import csv
+import datetime
+import io
+import json
+import logging
+import os
+
+from airflow import models
+from airflow.providers.google.cloud.operators.dataflow import DataflowTemplatedJobStartOperator
+from airflow.operators import dummy
+from airflow.providers.google.cloud.operators.bigquery import BigQueryInsertJobOperator, BigQueryUpsertTableOperator, BigQueryUpdateTableSchemaOperator
+from airflow.utils.task_group import TaskGroup
+
+# --------------------------------------------------------------------------------
+# Set variables - Needed for the DEMO
+# --------------------------------------------------------------------------------
+BQ_LOCATION = os.environ.get("BQ_LOCATION")
+DATA_CAT_TAGS = json.loads(os.environ.get("DATA_CAT_TAGS"))
+DTL_L0_PRJ = os.environ.get("DTL_L0_PRJ")
+DTL_L0_BQ_DATASET = os.environ.get("DTL_L0_BQ_DATASET")
+DTL_L0_GCS = os.environ.get("DTL_L0_GCS")
+DTL_L1_PRJ = os.environ.get("DTL_L1_PRJ")
+DTL_L1_BQ_DATASET = os.environ.get("DTL_L1_BQ_DATASET")
+DTL_L1_GCS = os.environ.get("DTL_L1_GCS")
+DTL_L2_PRJ = os.environ.get("DTL_L2_PRJ")
+DTL_L2_BQ_DATASET = os.environ.get("DTL_L2_BQ_DATASET")
+DTL_L2_GCS = os.environ.get("DTL_L2_GCS")
+DTL_PLG_PRJ = os.environ.get("DTL_PLG_PRJ")
+DTL_PLG_BQ_DATASET = os.environ.get("DTL_PLG_BQ_DATASET")
+DTL_PLG_GCS = os.environ.get("DTL_PLG_GCS")
+GCP_REGION = os.environ.get("GCP_REGION")
+LND_PRJ = os.environ.get("LND_PRJ")
+LND_BQ = os.environ.get("LND_BQ")
+LND_GCS = os.environ.get("LND_GCS")
+LND_PS = os.environ.get("LND_PS")
+LOD_PRJ = os.environ.get("LOD_PRJ")
+LOD_GCS_STAGING = os.environ.get("LOD_GCS_STAGING")
+LOD_NET_VPC = os.environ.get("LOD_NET_VPC")
+LOD_NET_SUBNET = os.environ.get("LOD_NET_SUBNET")
+LOD_SA_DF = os.environ.get("LOD_SA_DF")
+ORC_PRJ = os.environ.get("ORC_PRJ")
+ORC_GCS = os.environ.get("ORC_GCS")
+TRF_PRJ = os.environ.get("TRF_PRJ")
+TRF_GCS_STAGING = os.environ.get("TRF_GCS_STAGING")
+TRF_NET_VPC = os.environ.get("TRF_NET_VPC")
+TRF_NET_SUBNET = os.environ.get("TRF_NET_SUBNET")
+TRF_SA_DF = os.environ.get("TRF_SA_DF")
+TRF_SA_BQ = os.environ.get("TRF_SA_BQ")
+DF_KMS_KEY = os.environ.get("DF_KMS_KEY", "")
+DF_REGION = os.environ.get("GCP_REGION")
+DF_ZONE = os.environ.get("GCP_REGION") + "-b"
+
+# --------------------------------------------------------------------------------
+# Set default arguments
+# --------------------------------------------------------------------------------
+
+# If you are running Airflow in more than one time zone
+# see https://airflow.apache.org/docs/apache-airflow/stable/timezone.html
+# for best practices
+yesterday = datetime.datetime.now() - datetime.timedelta(days=1)
+
+default_args = {
+ 'owner': 'airflow',
+ 'start_date': yesterday,
+ 'depends_on_past': False,
+ 'email': [''],
+ 'email_on_failure': False,
+ 'email_on_retry': False,
+ 'retries': 1,
+ 'retry_delay': datetime.timedelta(minutes=5),
+ 'dataflow_default_options': {
+ 'location': DF_REGION,
+ 'zone': DF_ZONE,
+ 'stagingLocation': LOD_GCS_STAGING,
+ 'tempLocation': LOD_GCS_STAGING + "/tmp",
+ 'serviceAccountEmail': LOD_SA_DF,
+ 'subnetwork': LOD_NET_SUBNET,
+ 'ipConfiguration': "WORKER_IP_PRIVATE",
+ 'kmsKeyName' : DF_KMS_KEY
+ },
+}
+
+# --------------------------------------------------------------------------------
+# Main DAG
+# --------------------------------------------------------------------------------
+
+with models.DAG(
+ 'data_pipeline_dc_tags_dag',
+ default_args=default_args,
+ schedule_interval=None) as dag:
+ start = dummy.DummyOperator(
+ task_id='start',
+ trigger_rule='all_success'
+ )
+
+ end = dummy.DummyOperator(
+ task_id='end',
+ trigger_rule='all_success'
+ )
+
+ # Bigquery Tables created here for demo porpuse.
+ # Consider a dedicated pipeline or tool for a real life scenario.
+ with TaskGroup('upsert_table') as upsert_table:
+ upsert_table_customers = BigQueryUpsertTableOperator(
+ task_id="upsert_table_customers",
+ project_id=DTL_L0_PRJ,
+ dataset_id=DTL_L0_BQ_DATASET,
+ impersonation_chain=[TRF_SA_DF],
+ table_resource={
+ "tableReference": {"tableId": "customers"},
+ },
+ )
+
+ upsert_table_purchases = BigQueryUpsertTableOperator(
+ task_id="upsert_table_purchases",
+ project_id=DTL_L0_PRJ,
+ dataset_id=DTL_L0_BQ_DATASET,
+ impersonation_chain=[TRF_SA_BQ],
+ table_resource={
+ "tableReference": {"tableId": "purchases"}
+ },
+ )
+
+ upsert_table_customer_purchase_l1 = BigQueryUpsertTableOperator(
+ task_id="upsert_table_customer_purchase_l1",
+ project_id=DTL_L1_PRJ,
+ dataset_id=DTL_L1_BQ_DATASET,
+ impersonation_chain=[TRF_SA_BQ],
+ table_resource={
+ "tableReference": {"tableId": "customer_purchase"}
+ },
+ )
+
+ upsert_table_customer_purchase_l2 = BigQueryUpsertTableOperator(
+ task_id="upsert_table_customer_purchase_l2",
+ project_id=DTL_L2_PRJ,
+ dataset_id=DTL_L2_BQ_DATASET,
+ impersonation_chain=[TRF_SA_BQ],
+ table_resource={
+ "tableReference": {"tableId": "customer_purchase"}
+ },
+ )
+
+ # Bigquery Tables schema defined here for demo porpuse.
+ # Consider a dedicated pipeline or tool for a real life scenario.
+ with TaskGroup('update_schema_table') as update_schema_table:
+ update_table_schema_customers = BigQueryUpdateTableSchemaOperator(
+ task_id="update_table_schema_customers",
+ project_id=DTL_L0_PRJ,
+ dataset_id=DTL_L0_BQ_DATASET,
+ table_id="customers",
+ impersonation_chain=[TRF_SA_BQ],
+ include_policy_tags=True,
+ schema_fields_updates=[
+ { "mode": "REQUIRED", "name": "id", "type": "INTEGER", "description": "ID" },
+ { "mode": "REQUIRED", "name": "name", "type": "STRING", "description": "Name", "policyTags": { "names": [DATA_CAT_TAGS.get('2_Private', None)]}},
+ { "mode": "REQUIRED", "name": "surname", "type": "STRING", "description": "Surname", "policyTags": { "names": [DATA_CAT_TAGS.get('2_Private', None)]} },
+ { "mode": "REQUIRED", "name": "timestamp", "type": "TIMESTAMP", "description": "Timestamp" }
+ ]
+ )
+
+ update_table_schema_customers = BigQueryUpdateTableSchemaOperator(
+ task_id="update_table_schema_purchases",
+ project_id=DTL_L0_PRJ,
+ dataset_id=DTL_L0_BQ_DATASET,
+ table_id="purchases",
+ impersonation_chain=[TRF_SA_BQ],
+ include_policy_tags=True,
+ schema_fields_updates=[
+ { "mode": "REQUIRED", "name": "id", "type": "INTEGER", "description": "ID" },
+ { "mode": "REQUIRED", "name": "customer_id", "type": "INTEGER", "description": "ID" },
+ { "mode": "REQUIRED", "name": "item", "type": "STRING", "description": "Item Name" },
+ { "mode": "REQUIRED", "name": "price", "type": "FLOAT", "description": "Item Price" },
+ { "mode": "REQUIRED", "name": "timestamp", "type": "TIMESTAMP", "description": "Timestamp" }
+ ]
+ )
+
+ update_table_schema_customer_purchase_l1 = BigQueryUpdateTableSchemaOperator(
+ task_id="update_table_schema_customer_purchase_l1",
+ project_id=DTL_L1_PRJ,
+ dataset_id=DTL_L1_BQ_DATASET,
+ table_id="customer_purchase",
+ impersonation_chain=[TRF_SA_BQ],
+ include_policy_tags=True,
+ schema_fields_updates=[
+ { "mode": "REQUIRED", "name": "customer_id", "type": "INTEGER", "description": "ID" },
+ { "mode": "REQUIRED", "name": "purchase_id", "type": "INTEGER", "description": "ID" },
+ { "mode": "REQUIRED", "name": "name", "type": "STRING", "description": "Name", "policyTags": { "names": [DATA_CAT_TAGS.get('2_Private', None)]}},
+ { "mode": "REQUIRED", "name": "surname", "type": "STRING", "description": "Surname", "policyTags": { "names": [DATA_CAT_TAGS.get('2_Private', None)]} },
+ { "mode": "REQUIRED", "name": "item", "type": "STRING", "description": "Item Name" },
+ { "mode": "REQUIRED", "name": "price", "type": "FLOAT", "description": "Item Price" },
+ { "mode": "REQUIRED", "name": "timestamp", "type": "TIMESTAMP", "description": "Timestamp" }
+ ]
+ )
+
+ update_table_schema_customer_purchase_l2 = BigQueryUpdateTableSchemaOperator(
+ task_id="update_table_schema_customer_purchase_l2",
+ project_id=DTL_L2_PRJ,
+ dataset_id=DTL_L2_BQ_DATASET,
+ table_id="customer_purchase",
+ impersonation_chain=[TRF_SA_BQ],
+ include_policy_tags=True,
+ schema_fields_updates=[
+ { "mode": "REQUIRED", "name": "customer_id", "type": "INTEGER", "description": "ID" },
+ { "mode": "REQUIRED", "name": "purchase_id", "type": "INTEGER", "description": "ID" },
+ { "mode": "REQUIRED", "name": "name", "type": "STRING", "description": "Name", "policyTags": { "names": [DATA_CAT_TAGS.get('2_Private', None)]}},
+ { "mode": "REQUIRED", "name": "surname", "type": "STRING", "description": "Surname", "policyTags": { "names": [DATA_CAT_TAGS.get('2_Private', None)]} },
+ { "mode": "REQUIRED", "name": "item", "type": "STRING", "description": "Item Name" },
+ { "mode": "REQUIRED", "name": "price", "type": "FLOAT", "description": "Item Price" },
+ { "mode": "REQUIRED", "name": "timestamp", "type": "TIMESTAMP", "description": "Timestamp" }
+ ]
+ )
+
+ customers_import = DataflowTemplatedJobStartOperator(
+ task_id="dataflow_customers_import",
+ template="gs://dataflow-templates/latest/GCS_Text_to_BigQuery",
+ project_id=LOD_PRJ,
+ location=DF_REGION,
+ parameters={
+ "javascriptTextTransformFunctionName": "transform",
+ "JSONPath": ORC_GCS + "/customers_schema.json",
+ "javascriptTextTransformGcsPath": ORC_GCS + "/customers_udf.js",
+ "inputFilePattern": LND_GCS + "/customers.csv",
+ "outputTable": DTL_L0_PRJ + ":"+DTL_L0_BQ_DATASET+".customers",
+ "bigQueryLoadingTemporaryDirectory": LOD_GCS_STAGING + "/tmp/bq/",
+ },
+ )
+
+ purchases_import = DataflowTemplatedJobStartOperator(
+ task_id="dataflow_purchases_import",
+ template="gs://dataflow-templates/latest/GCS_Text_to_BigQuery",
+ project_id=LOD_PRJ,
+ location=DF_REGION,
+ parameters={
+ "javascriptTextTransformFunctionName": "transform",
+ "JSONPath": ORC_GCS + "/purchases_schema.json",
+ "javascriptTextTransformGcsPath": ORC_GCS + "/purchases_udf.js",
+ "inputFilePattern": LND_GCS + "/purchases.csv",
+ "outputTable": DTL_L0_PRJ + ":"+DTL_L0_BQ_DATASET+".purchases",
+ "bigQueryLoadingTemporaryDirectory": LOD_GCS_STAGING + "/tmp/bq/",
+ },
+ )
+
+ join_customer_purchase = BigQueryInsertJobOperator(
+ task_id='bq_join_customer_purchase',
+ gcp_conn_id='bigquery_default',
+ project_id=TRF_PRJ,
+ location=BQ_LOCATION,
+ configuration={
+ 'jobType':'QUERY',
+ 'query':{
+ 'query':"""SELECT
+ c.id as customer_id,
+ p.id as purchase_id,
+ c.name as name,
+ c.surname as surname,
+ p.item as item,
+ p.price as price,
+ p.timestamp as timestamp
+ FROM `{dtl_0_prj}.{dtl_0_dataset}.customers` c
+ JOIN `{dtl_0_prj}.{dtl_0_dataset}.purchases` p ON c.id = p.customer_id
+ """.format(dtl_0_prj=DTL_L0_PRJ, dtl_0_dataset=DTL_L0_BQ_DATASET, ),
+ 'destinationTable':{
+ 'projectId': DTL_L1_PRJ,
+ 'datasetId': DTL_L1_BQ_DATASET,
+ 'tableId': 'customer_purchase'
+ },
+ 'writeDisposition':'WRITE_TRUNCATE',
+ "useLegacySql": False
+ }
+ },
+ impersonation_chain=[TRF_SA_BQ]
+ )
+
+ l2_customer_purchase = BigQueryInsertJobOperator(
+ task_id='bq_l2_customer_purchase',
+ gcp_conn_id='bigquery_default',
+ project_id=TRF_PRJ,
+ location=BQ_LOCATION,
+ configuration={
+ 'jobType':'QUERY',
+ 'query':{
+ 'query':"""SELECT
+ customer_id,
+ purchase_id,
+ name,
+ surname,
+ item,
+ price,
+ timestamp
+ FROM `{dtl_1_prj}.{dtl_1_dataset}.customer_purchase`
+ """.format(dtl_1_prj=DTL_L1_PRJ, dtl_1_dataset=DTL_L1_BQ_DATASET, ),
+ 'destinationTable':{
+ 'projectId': DTL_L2_PRJ,
+ 'datasetId': DTL_L2_BQ_DATASET,
+ 'tableId': 'customer_purchase'
+ },
+ 'writeDisposition':'WRITE_TRUNCATE',
+ "useLegacySql": False
+ }
+ },
+ impersonation_chain=[TRF_SA_BQ]
+ )
+ start >> upsert_table >> update_schema_table >> [customers_import, purchases_import] >> join_customer_purchase >> l2_customer_purchase >> end
diff --git a/examples/data-solutions/data-platform-foundations/demo/delete_table.py b/examples/data-solutions/data-platform-foundations/demo/delete_table.py
new file mode 100644
index 00000000..a2585a68
--- /dev/null
+++ b/examples/data-solutions/data-platform-foundations/demo/delete_table.py
@@ -0,0 +1,146 @@
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# --------------------------------------------------------------------------------
+# Load The Dependencies
+# --------------------------------------------------------------------------------
+
+import csv
+import datetime
+import io
+import json
+import logging
+import os
+
+from airflow import models
+from airflow.providers.google.cloud.operators.dataflow import DataflowTemplatedJobStartOperator
+from airflow.operators import dummy
+from airflow.providers.google.cloud.operators.bigquery import BigQueryDeleteTableOperator
+from airflow.utils.task_group import TaskGroup
+
+# --------------------------------------------------------------------------------
+# Set variables - Needed for the DEMO
+# --------------------------------------------------------------------------------
+BQ_LOCATION = os.environ.get("BQ_LOCATION")
+DATA_CAT_TAGS = json.loads(os.environ.get("DATA_CAT_TAGS"))
+DTL_L0_PRJ = os.environ.get("DTL_L0_PRJ")
+DTL_L0_BQ_DATASET = os.environ.get("DTL_L0_BQ_DATASET")
+DTL_L0_GCS = os.environ.get("DTL_L0_GCS")
+DTL_L1_PRJ = os.environ.get("DTL_L1_PRJ")
+DTL_L1_BQ_DATASET = os.environ.get("DTL_L1_BQ_DATASET")
+DTL_L1_GCS = os.environ.get("DTL_L1_GCS")
+DTL_L2_PRJ = os.environ.get("DTL_L2_PRJ")
+DTL_L2_BQ_DATASET = os.environ.get("DTL_L2_BQ_DATASET")
+DTL_L2_GCS = os.environ.get("DTL_L2_GCS")
+DTL_PLG_PRJ = os.environ.get("DTL_PLG_PRJ")
+DTL_PLG_BQ_DATASET = os.environ.get("DTL_PLG_BQ_DATASET")
+DTL_PLG_GCS = os.environ.get("DTL_PLG_GCS")
+GCP_REGION = os.environ.get("GCP_REGION")
+LND_PRJ = os.environ.get("LND_PRJ")
+LND_BQ = os.environ.get("LND_BQ")
+LND_GCS = os.environ.get("LND_GCS")
+LND_PS = os.environ.get("LND_PS")
+LOD_PRJ = os.environ.get("LOD_PRJ")
+LOD_GCS_STAGING = os.environ.get("LOD_GCS_STAGING")
+LOD_NET_VPC = os.environ.get("LOD_NET_VPC")
+LOD_NET_SUBNET = os.environ.get("LOD_NET_SUBNET")
+LOD_SA_DF = os.environ.get("LOD_SA_DF")
+ORC_PRJ = os.environ.get("ORC_PRJ")
+ORC_GCS = os.environ.get("ORC_GCS")
+TRF_PRJ = os.environ.get("TRF_PRJ")
+TRF_GCS_STAGING = os.environ.get("TRF_GCS_STAGING")
+TRF_NET_VPC = os.environ.get("TRF_NET_VPC")
+TRF_NET_SUBNET = os.environ.get("TRF_NET_SUBNET")
+TRF_SA_DF = os.environ.get("TRF_SA_DF")
+TRF_SA_BQ = os.environ.get("TRF_SA_BQ")
+DF_KMS_KEY = os.environ.get("DF_KMS_KEY", "")
+DF_REGION = os.environ.get("GCP_REGION")
+DF_ZONE = os.environ.get("GCP_REGION") + "-b"
+
+# --------------------------------------------------------------------------------
+# Set default arguments
+# --------------------------------------------------------------------------------
+
+# If you are running Airflow in more than one time zone
+# see https://airflow.apache.org/docs/apache-airflow/stable/timezone.html
+# for best practices
+yesterday = datetime.datetime.now() - datetime.timedelta(days=1)
+
+default_args = {
+ 'owner': 'airflow',
+ 'start_date': yesterday,
+ 'depends_on_past': False,
+ 'email': [''],
+ 'email_on_failure': False,
+ 'email_on_retry': False,
+ 'retries': 1,
+ 'retry_delay': datetime.timedelta(minutes=5),
+ 'dataflow_default_options': {
+ 'location': DF_REGION,
+ 'zone': DF_ZONE,
+ 'stagingLocation': LOD_GCS_STAGING,
+ 'tempLocation': LOD_GCS_STAGING + "/tmp",
+ 'serviceAccountEmail': LOD_SA_DF,
+ 'subnetwork': LOD_NET_SUBNET,
+ 'ipConfiguration': "WORKER_IP_PRIVATE",
+ 'kmsKeyName' : DF_KMS_KEY
+ },
+}
+
+# --------------------------------------------------------------------------------
+# Main DAG
+# --------------------------------------------------------------------------------
+
+with models.DAG(
+ 'delete_tables_dag',
+ default_args=default_args,
+ schedule_interval=None) as dag:
+ start = dummy.DummyOperator(
+ task_id='start',
+ trigger_rule='all_success'
+ )
+
+ end = dummy.DummyOperator(
+ task_id='end',
+ trigger_rule='all_success'
+ )
+
+ # Bigquery Tables deleted here for demo porpuse.
+ # Consider a dedicated pipeline or tool for a real life scenario.
+ with TaskGroup('delete_table') as delte_table:
+ delete_table_customers = BigQueryDeleteTableOperator(
+ task_id="delete_table_customers",
+ deletion_dataset_table=DTL_L0_PRJ+"."+DTL_L0_BQ_DATASET+".customers",
+ impersonation_chain=[TRF_SA_DF]
+ )
+
+ delete_table_purchases = BigQueryDeleteTableOperator(
+ task_id="delete_table_purchases",
+ deletion_dataset_table=DTL_L0_PRJ+"."+DTL_L0_BQ_DATASET+".purchases",
+ impersonation_chain=[TRF_SA_DF]
+ )
+
+ delete_table_customer_purchase_l1 = BigQueryDeleteTableOperator(
+ task_id="delete_table_customer_purchase_l1",
+ deletion_dataset_table=DTL_L1_PRJ+"."+DTL_L1_BQ_DATASET+".customer_purchase",
+ impersonation_chain=[TRF_SA_DF]
+ )
+
+ delete_table_customer_purchase_l2 = BigQueryDeleteTableOperator(
+ task_id="delete_table_customer_purchase_l2",
+ deletion_dataset_table=DTL_L2_PRJ+"."+DTL_L2_BQ_DATASET+".customer_purchase",
+ impersonation_chain=[TRF_SA_DF]
+ )
+
+ start >> delte_table >> end
diff --git a/examples/data-solutions/data-platform-foundations/outputs.tf b/examples/data-solutions/data-platform-foundations/outputs.tf
index e5a2de3e..32e98fc6 100644
--- a/examples/data-solutions/data-platform-foundations/outputs.tf
+++ b/examples/data-solutions/data-platform-foundations/outputs.tf
@@ -98,7 +98,7 @@ output "demo_commands" {
03 = "gsutil -i ${module.orch-sa-cmp-0.email} cp demo/*.py ${google_composer_environment.orch-cmp-0.config[0].dag_gcs_prefix}/"
04 = "Open ${google_composer_environment.orch-cmp-0.config.0.airflow_uri} and run uploaded DAG."
05 = <+ additive, •
conditional.
| members | roles |
|---|---|
+|gcp-data-analysts
group|[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) |
|gcp-data-engineers
group|[roles/dlp.estimatesAdmin](https://cloud.google.com/iam/docs/understanding-roles#dlp.estimatesAdmin)
[roles/dlp.reader](https://cloud.google.com/iam/docs/understanding-roles#dlp.reader)
[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) |
-|gcp-data-security
group|[roles/dlp.admin](https://cloud.google.com/iam/docs/understanding-roles#dlp.admin) |
-|dev-data-load-df-0
serviceAccount|[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) |
-|dev-data-trf-df-0
serviceAccount|[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) |
+|gcp-data-security
group|[roles/datacatalog.admin](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.admin)
[roles/dlp.admin](https://cloud.google.com/iam/docs/understanding-roles#dlp.admin) |
+|dev-data-load-df-0
serviceAccount|[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer)
[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) |
+|dev-data-trf-bq-0
serviceAccount|[roles/datacatalog.categoryFineGrainedReader](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryFineGrainedReader)
[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) |
+|dev-data-trf-df-0
serviceAccount|[roles/datacatalog.categoryFineGrainedReader](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryFineGrainedReader)
[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer)
[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) |
## Project dev-data-dtl-0-0
| members | roles |
|---|---|
-|gcp-data-analysts
group|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user)
[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer)
[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
+|gcp-data-analysts
group|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/bigquery.metadataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.metadataViewer)
[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user)
[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer)
[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|gcp-data-engineers
group|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) |
-|dev-data-load-df-0
serviceAccount|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator) |
-|dev-data-trf-bq-0
serviceAccount|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) |
-|dev-data-trf-df-0
serviceAccount|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) |
+|SERVICE_IDENTITY_service-networking
serviceAccount|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) +
|
+|dev-data-load-df-0
serviceAccount|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator) |
+|dev-data-trf-bq-0
serviceAccount|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner)
[roles/datacatalog.categoryAdmin](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryAdmin) |
+|dev-data-trf-df-0
serviceAccount|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) |
## Project dev-data-dtl-1-0
| members | roles |
|---|---|
-|gcp-data-analysts
group|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user)
[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer)
[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
+|gcp-data-analysts
group|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/bigquery.metadataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.metadataViewer)
[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user)
[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer)
[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|gcp-data-engineers
group|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) |
-|dev-data-trf-bq-0
serviceAccount|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) |
-|dev-data-trf-df-0
serviceAccount|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
+|SERVICE_IDENTITY_service-networking
serviceAccount|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) +
|
+|dev-data-load-df-0
serviceAccount|[roles/datacatalog.categoryAdmin](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryAdmin) |
+|dev-data-trf-bq-0
serviceAccount|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) |
+|dev-data-trf-df-0
serviceAccount|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner)
[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
## Project dev-data-dtl-2-0
| members | roles |
|---|---|
-|gcp-data-analysts
group|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user)
[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer)
[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
+|gcp-data-analysts
group|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/bigquery.metadataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.metadataViewer)
[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user)
[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer)
[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|gcp-data-engineers
group|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) |
-|dev-data-trf-bq-0
serviceAccount|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) |
-|dev-data-trf-df-0
serviceAccount|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
+|SERVICE_IDENTITY_service-networking
serviceAccount|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) +
|
+|dev-data-load-df-0
serviceAccount|[roles/datacatalog.categoryAdmin](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryAdmin) |
+|dev-data-trf-bq-0
serviceAccount|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) |
+|dev-data-trf-df-0
serviceAccount|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner)
[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
## Project dev-data-dtl-plg-0
| members | roles |
|---|---|
-|gcp-data-analysts
group|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user)
[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer)
[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer)
[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
+|gcp-data-analysts
group|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/bigquery.metadataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.metadataViewer)
[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user)
[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer)
[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer)
[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|gcp-data-engineers
group|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) |
+|SERVICE_IDENTITY_service-networking
serviceAccount|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) +
|
## Project dev-data-lnd-0
@@ -62,37 +70,40 @@ Legend: +
additive, •
conditional.
| members | roles |
|---|---|
|gcp-data-engineers
group|[roles/compute.viewer](https://cloud.google.com/iam/docs/understanding-roles#compute.viewer)
[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin)
[roles/dataflow.developer](https://cloud.google.com/iam/docs/understanding-roles#dataflow.developer)
[roles/viewer](https://cloud.google.com/iam/docs/understanding-roles#viewer) |
+|SERVICE_IDENTITY_dataflow-service-producer-prod
serviceAccount|[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
+|SERVICE_IDENTITY_service-networking
serviceAccount|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) +
|
|dev-data-load-df-0
serviceAccount|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin)
[roles/dataflow.worker](https://cloud.google.com/iam/docs/understanding-roles#dataflow.worker)
[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|dev-data-orc-cmp-0
serviceAccount|[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) |
-|service-426128559612
serviceAccount|[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
## Project dev-data-orc-0
| members | roles |
|---|---|
|gcp-data-engineers
group|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/cloudbuild.builds.editor](https://cloud.google.com/iam/docs/understanding-roles#cloudbuild.builds.editor)
[roles/composer.admin](https://cloud.google.com/iam/docs/understanding-roles#composer.admin)
[roles/composer.environmentAndStorageObjectAdmin](https://cloud.google.com/iam/docs/understanding-roles#composer.environmentAndStorageObjectAdmin)
[roles/iam.serviceAccountUser](https://cloud.google.com/iam/docs/understanding-roles#iam.serviceAccountUser)
[roles/iap.httpsResourceAccessor](https://cloud.google.com/iam/docs/understanding-roles#iap.httpsResourceAccessor)
[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin)
[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
+|SERVICE_IDENTITY_cloudcomposer-accounts
serviceAccount|[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
+|SERVICE_IDENTITY_service-networking
serviceAccount|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) +
|
|dev-data-load-df-0
serviceAccount|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|dev-data-orc-cmp-0
serviceAccount|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/composer.worker](https://cloud.google.com/iam/docs/understanding-roles#composer.worker)
[roles/iam.serviceAccountUser](https://cloud.google.com/iam/docs/understanding-roles#iam.serviceAccountUser)
[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|dev-data-trf-df-0
serviceAccount|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) |
-|service-36960036774
serviceAccount|[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
## Project dev-data-trf-0
| members | roles |
|---|---|
|gcp-data-engineers
group|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) |
+|SERVICE_IDENTITY_dataflow-service-producer-prod
serviceAccount|[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
+|SERVICE_IDENTITY_service-networking
serviceAccount|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) +
|
|dev-data-orc-cmp-0
serviceAccount|[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) |
|dev-data-trf-bq-0
serviceAccount|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) |
|dev-data-trf-df-0
serviceAccount|[roles/dataflow.worker](https://cloud.google.com/iam/docs/understanding-roles#dataflow.worker)
[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
-|service-883871192228
serviceAccount|[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
## Project dev-net-spoke-0
| members | roles |
|---|---|
-|36960036774
serviceAccount|[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) +
|
+|PROJECT_CLOUD_SERVICES
serviceAccount|[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) +
|
+|SERVICE_IDENTITY_cloudcomposer-accounts
serviceAccount|[roles/composer.sharedVpcAgent](https://cloud.google.com/iam/docs/understanding-roles#composer.sharedVpcAgent) +
|
+|SERVICE_IDENTITY_container-engine-robot
serviceAccount|[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) +
[roles/container.hostServiceAgentUser](https://cloud.google.com/iam/docs/understanding-roles#container.hostServiceAgentUser) +
|
+|SERVICE_IDENTITY_dataflow-service-producer-prod
serviceAccount|[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) +
[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) +
[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) +
[roles/container.hostServiceAgentUser](https://cloud.google.com/iam/docs/understanding-roles#container.hostServiceAgentUser) +
|
|dev-data-load-df-0
serviceAccount|[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) +
|
|dev-data-trf-df-0
serviceAccount|[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) +
|
-|service-36960036774
serviceAccount|[roles/composer.sharedVpcAgent](https://cloud.google.com/iam/docs/understanding-roles#composer.sharedVpcAgent) +
[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) +
[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) +
[roles/container.hostServiceAgentUser](https://cloud.google.com/iam/docs/understanding-roles#container.hostServiceAgentUser) +
[roles/container.hostServiceAgentUser](https://cloud.google.com/iam/docs/understanding-roles#container.hostServiceAgentUser) +
|
-|service-426128559612
serviceAccount|[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) +
|
-|service-883871192228
serviceAccount|[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) +
|
diff --git a/fast/stages/03-data-platform/dev/README.md b/fast/stages/03-data-platform/dev/README.md
index 19adb068..5ecc2ad5 100644
--- a/fast/stages/03-data-platform/dev/README.md
+++ b/fast/stages/03-data-platform/dev/README.md
@@ -50,6 +50,19 @@ Cloud KMS crypto keys can be configured wither from the [FAST security stage](..
To configure the use of Cloud KMS on resources, you have to specify the key id on the `service_encryption_keys` variable. Key locations should match resource locations.
+## Data Catalog
+
+[Data Catalog](https://cloud.google.com/data-catalog) helps you to document your data entry at scale. Data Catalog relies on [tags](https://cloud.google.com/data-catalog/docs/tags-and-tag-templates#tags) and [tag template](https://cloud.google.com/data-catalog/docs/tags-and-tag-templates#tag-templates) to manage metadata for all data entries in a unified and centralized service. To implement [column-level security](https://cloud.google.com/bigquery/docs/column-level-security-intro) on BigQuery, we suggest to use `Tags` and `Tag templates`.
+
+The default configuration will implement 3 tags:
+ - `3_Confidential`: policy tag for columns that include very sensitive information, such as credit card numbers.
+ - `2_Private`: policy tag for columns that include sensitive personal identifiable information (PII) information, such as a person's first name.
+ - `1_Sensitive`: policy tag for columns that include data that cannot be made public, such as the credit limit.
+
+Anything that is not tagged is available to all users who have access to the data warehouse.
+
+You can configure your tags and roles associated by configuring the `data_catalog_tags` variable. We suggest useing the "[Best practices for using policy tags in BigQuery](https://cloud.google.com/bigquery/docs/best-practices-policy-tags)" article as a guide to designing your tags structure and access pattern. By default, no groups has access to tagged data.
+
### VPC-SC
As is often the case in real-world configurations, [VPC-SC](https://cloud.google.com/vpc-service-controls) is needed to mitigate data exfiltration. VPC-SC can be configured from the [FAST security stage](../../02-security). This step is optional, but highly recomended, and depends on customer policies and security best practices.
@@ -116,6 +129,12 @@ terraform init
terraform apply
```
+## Demo pipeline
+
+The application layer is out of scope of this script. As a demo purpuse only, several Cloud Composer DAGs are provided. Demos will import data from the `landing` area to the `DataLake L2` dataset suing different features.
+
+You can find examples in the `[demo](../../../../examples/data-solutions/data-platform-foundations/demo)` folder.
+
@@ -132,20 +151,21 @@ terraform apply
| name | description | type | required | default | producer |
|---|---|:---:|:---:|:---:|:---:|
| [billing_account](variables.tf#L17) | Billing account id and organization id ('nnnnnnnn' or null). | object({…})
| ✓ | | 00-globals
|
-| [folder_ids](variables.tf#L45) | Folder to be used for the networking resources in folders/nnnn format. | object({…})
| ✓ | | 01-resman
|
-| [host_project_ids](variables.tf#L63) | Shared VPC project ids. | object({…})
| ✓ | | 02-networking
|
-| [organization](variables.tf#L89) | Organization details. | object({…})
| ✓ | | 00-globals
|
-| [prefix](variables.tf#L105) | Unique prefix used for resource names. Not used for projects if 'project_create' is null. | string
| ✓ | | 00-globals
|
+| [folder_ids](variables.tf#L56) | Folder to be used for the networking resources in folders/nnnn format. | object({…})
| ✓ | | 01-resman
|
+| [host_project_ids](variables.tf#L74) | Shared VPC project ids. | object({…})
| ✓ | | 02-networking
|
+| [organization](variables.tf#L100) | Organization details. | object({…})
| ✓ | | 00-globals
|
+| [prefix](variables.tf#L116) | Unique prefix used for resource names. Not used for projects if 'project_create' is null. | string
| ✓ | | 00-globals
|
| [composer_config](variables.tf#L26) | | object({…})
| | {…}
| |
-| [data_force_destroy](variables.tf#L39) | Flag to set 'force_destroy' on data services like BigQery or Cloud Storage. | bool
| | false
| |
-| [groups](variables.tf#L53) | Groups. | map(string)
| | {…}
| |
-| [network_config_composer](variables.tf#L71) | Network configurations to use for Composer. | object({…})
| | {…}
| |
-| [outputs_location](variables.tf#L99) | Path where providers, tfvars files, and lists for the following stages are written. Leave empty to disable. | string
| | null
| |
-| [project_services](variables.tf#L111) | List of core services enabled on all projects. | list(string)
| | […]
| |
-| [region](variables.tf#L122) | Region used for regional resources. | string
| | "europe-west1"
| |
-| [service_encryption_keys](variables.tf#L128) | Cloud KMS to use to encrypt different services. Key location should match service region. | object({…})
| | null
| |
-| [subnet_self_links](variables.tf#L140) | Shared VPC subnet self links. | object({…})
| | null
| 02-networking
|
-| [vpc_self_links](variables.tf#L149) | Shared VPC self links. | object({…})
| | null
| 02-networking
|
+| [data_catalog_tags](variables.tf#L39) | List of Data Catalog Policy tags to be created with optional IAM binging configuration in {tag => {ROLE => [MEMBERS]}} format. | map(map(list(string)))
| | {…}
| |
+| [data_force_destroy](variables.tf#L50) | Flag to set 'force_destroy' on data services like BigQery or Cloud Storage. | bool
| | false
| |
+| [groups](variables.tf#L64) | Groups. | map(string)
| | {…}
| |
+| [network_config_composer](variables.tf#L82) | Network configurations to use for Composer. | object({…})
| | {…}
| |
+| [outputs_location](variables.tf#L110) | Path where providers, tfvars files, and lists for the following stages are written. Leave empty to disable. | string
| | null
| |
+| [project_services](variables.tf#L122) | List of core services enabled on all projects. | list(string)
| | […]
| |
+| [region](variables.tf#L133) | Region used for regional resources. | string
| | "europe-west1"
| |
+| [service_encryption_keys](variables.tf#L139) | Cloud KMS to use to encrypt different services. Key location should match service region. | object({…})
| | null
| |
+| [subnet_self_links](variables.tf#L151) | Shared VPC subnet self links. | object({…})
| | null
| 02-networking
|
+| [vpc_self_links](variables.tf#L160) | Shared VPC self links. | object({…})
| | null
| 02-networking
|
## Outputs
diff --git a/fast/stages/03-data-platform/dev/main.tf b/fast/stages/03-data-platform/dev/main.tf
index c10380da..536e1873 100644
--- a/fast/stages/03-data-platform/dev/main.tf
+++ b/fast/stages/03-data-platform/dev/main.tf
@@ -21,6 +21,7 @@ module "data-platform" {
billing_account_id = var.billing_account.id
composer_config = var.composer_config
data_force_destroy = var.data_force_destroy
+ data_catalog_tags = var.data_catalog_tags
folder_id = var.folder_ids.data-platform
groups = var.groups
network_config = {
diff --git a/fast/stages/03-data-platform/dev/variables.tf b/fast/stages/03-data-platform/dev/variables.tf
index 1f65cf77..3b1645e4 100644
--- a/fast/stages/03-data-platform/dev/variables.tf
+++ b/fast/stages/03-data-platform/dev/variables.tf
@@ -36,6 +36,17 @@ variable "composer_config" {
}
}
+variable "data_catalog_tags" {
+ description = "List of Data Catalog Policy tags to be created with optional IAM binging configuration in {tag => {ROLE => [MEMBERS]}} format."
+ type = map(map(list(string)))
+ nullable = false
+ default = {
+ "3_Confidential" = null
+ "2_Private" = null
+ "1_Sensitive" = null
+ }
+}
+
variable "data_force_destroy" {
description = "Flag to set 'force_destroy' on data services like BigQery or Cloud Storage."
type = bool
diff --git a/tests/examples/data_solutions/data_platform_foundations/test_plan.py b/tests/examples/data_solutions/data_platform_foundations/test_plan.py
index 4857bf9f..e5db6ffc 100644
--- a/tests/examples/data_solutions/data_platform_foundations/test_plan.py
+++ b/tests/examples/data_solutions/data_platform_foundations/test_plan.py
@@ -23,5 +23,5 @@ FIXTURES_DIR = os.path.join(os.path.dirname(__file__), 'fixture')
def test_resources(e2e_plan_runner):
"Test that plan works and the numbers of resources is as expected."
modules, resources = e2e_plan_runner(FIXTURES_DIR)
- assert len(modules) == 40
- assert len(resources) == 296
+ assert len(modules) == 41
+ assert len(resources) == 313
diff --git a/tools/state_iam.py b/tools/state_iam.py
index 7817a7ee..42f9f76e 100755
--- a/tools/state_iam.py
+++ b/tools/state_iam.py
@@ -65,10 +65,22 @@ def get_bindings(resources, prefix=None, folders=None):
member_type, _, member_id = member.partition(':')
if member_type == 'user':
continue
- member_id = member_id.rpartition('@')[0]
+ member_id, member_domain = member_id.split('@', 1)
+ # Handle Cloud Services Service Account
+ if member_domain == 'cloudservices.gserviceaccount.com':
+ member_id = "PROJECT_CLOUD_SERVICES"
+ # Handle Cloud Service Identity Service Acocunt
+ if re.match("^service-\d{8}", member_id):
+ member_id = "SERVICE_IDENTITY_" + member_domain.split(".", 1)[0]
+ # Handle BQ Cloud Service Identity Service Acocunt
+ if re.match("^bq-\d{8}", member_id):
+ member_id = "IDENTITY_" + member_domain.split(".", 1)[0]
+ resource_type_output = "Service Identity - " + resource_type
+ else:
+ resource_type_output = resource_type
if prefix and member_id.startswith(prefix):
member_id = member_id[len(prefix) + 1:]
- yield Binding(authoritative, resource_type, resource_id, role,
+ yield Binding(authoritative, resource_type_output, resource_id, role,
member_type, member_id, conditions)