Merge pull request #607 from GoogleCloudPlatform/lcaggio/dp-dc-policy-tag
[Data Platform] Add Data Catalog Policy tag
This commit is contained in:
commit
56b89211a7
|
@ -67,6 +67,7 @@ resource "google_composer_environment" "orch-cmp-0" {
|
||||||
env_variables = merge(
|
env_variables = merge(
|
||||||
var.composer_config.env_variables, {
|
var.composer_config.env_variables, {
|
||||||
BQ_LOCATION = var.location
|
BQ_LOCATION = var.location
|
||||||
|
DATA_CAT_TAGS = try(jsonencode(module.common-datacatalog.tags), "{}")
|
||||||
DF_KMS_KEY = try(var.service_encryption_keys.dataflow, "")
|
DF_KMS_KEY = try(var.service_encryption_keys.dataflow, "")
|
||||||
DTL_L0_PRJ = module.lake-0-project.project_id
|
DTL_L0_PRJ = module.lake-0-project.project_id
|
||||||
DTL_L0_BQ_DATASET = module.lake-0-bq-0.dataset_id
|
DTL_L0_BQ_DATASET = module.lake-0-bq-0.dataset_id
|
||||||
|
|
|
@ -23,6 +23,7 @@ locals {
|
||||||
(local.groups.data-analysts) = [
|
(local.groups.data-analysts) = [
|
||||||
"roles/bigquery.dataViewer",
|
"roles/bigquery.dataViewer",
|
||||||
"roles/bigquery.jobUser",
|
"roles/bigquery.jobUser",
|
||||||
|
"roles/bigquery.metadataViewer",
|
||||||
"roles/bigquery.user",
|
"roles/bigquery.user",
|
||||||
"roles/datacatalog.viewer",
|
"roles/datacatalog.viewer",
|
||||||
"roles/datacatalog.tagTemplateViewer",
|
"roles/datacatalog.tagTemplateViewer",
|
||||||
|
@ -37,6 +38,7 @@ locals {
|
||||||
(local.groups.data-analysts) = [
|
(local.groups.data-analysts) = [
|
||||||
"roles/bigquery.dataEditor",
|
"roles/bigquery.dataEditor",
|
||||||
"roles/bigquery.jobUser",
|
"roles/bigquery.jobUser",
|
||||||
|
"roles/bigquery.metadataViewer",
|
||||||
"roles/bigquery.user",
|
"roles/bigquery.user",
|
||||||
"roles/datacatalog.viewer",
|
"roles/datacatalog.viewer",
|
||||||
"roles/datacatalog.tagTemplateViewer",
|
"roles/datacatalog.tagTemplateViewer",
|
||||||
|
@ -44,7 +46,7 @@ locals {
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
lake_0_iam = {
|
lake_0_iam = {
|
||||||
"roles/bigquery.dataEditor" = [
|
"roles/bigquery.dataOwner" = [
|
||||||
module.load-sa-df-0.iam_email,
|
module.load-sa-df-0.iam_email,
|
||||||
module.transf-sa-df-0.iam_email,
|
module.transf-sa-df-0.iam_email,
|
||||||
module.transf-sa-bq-0.iam_email,
|
module.transf-sa-bq-0.iam_email,
|
||||||
|
@ -52,18 +54,24 @@ locals {
|
||||||
"roles/bigquery.jobUser" = [
|
"roles/bigquery.jobUser" = [
|
||||||
module.load-sa-df-0.iam_email,
|
module.load-sa-df-0.iam_email,
|
||||||
]
|
]
|
||||||
|
"roles/datacatalog.categoryAdmin" = [
|
||||||
|
module.transf-sa-bq-0.iam_email
|
||||||
|
]
|
||||||
"roles/storage.objectCreator" = [
|
"roles/storage.objectCreator" = [
|
||||||
module.load-sa-df-0.iam_email,
|
module.load-sa-df-0.iam_email,
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
lake_iam = {
|
lake_iam = {
|
||||||
"roles/bigquery.dataEditor" = [
|
"roles/bigquery.dataOwner" = [
|
||||||
module.transf-sa-df-0.iam_email,
|
module.transf-sa-df-0.iam_email,
|
||||||
module.transf-sa-bq-0.iam_email,
|
module.transf-sa-bq-0.iam_email,
|
||||||
]
|
]
|
||||||
"roles/bigquery.jobUser" = [
|
"roles/bigquery.jobUser" = [
|
||||||
module.transf-sa-bq-0.iam_email,
|
module.transf-sa-bq-0.iam_email,
|
||||||
]
|
]
|
||||||
|
"roles/datacatalog.categoryAdmin" = [
|
||||||
|
module.load-sa-df-0.iam_email
|
||||||
|
]
|
||||||
"roles/storage.objectCreator" = [
|
"roles/storage.objectCreator" = [
|
||||||
module.transf-sa-df-0.iam_email,
|
module.transf-sa-df-0.iam_email,
|
||||||
]
|
]
|
||||||
|
|
|
@ -21,6 +21,9 @@ module "common-project" {
|
||||||
prefix = var.prefix
|
prefix = var.prefix
|
||||||
name = "cmn${local.project_suffix}"
|
name = "cmn${local.project_suffix}"
|
||||||
group_iam = {
|
group_iam = {
|
||||||
|
(local.groups.data-analysts) = [
|
||||||
|
"roles/datacatalog.viewer",
|
||||||
|
]
|
||||||
(local.groups.data-engineers) = [
|
(local.groups.data-engineers) = [
|
||||||
"roles/dlp.reader",
|
"roles/dlp.reader",
|
||||||
"roles/dlp.user",
|
"roles/dlp.user",
|
||||||
|
@ -28,6 +31,7 @@ module "common-project" {
|
||||||
]
|
]
|
||||||
(local.groups.data-security) = [
|
(local.groups.data-security) = [
|
||||||
"roles/dlp.admin",
|
"roles/dlp.admin",
|
||||||
|
"roles/datacatalog.admin"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
iam = {
|
iam = {
|
||||||
|
@ -35,6 +39,17 @@ module "common-project" {
|
||||||
module.load-sa-df-0.iam_email,
|
module.load-sa-df-0.iam_email,
|
||||||
module.transf-sa-df-0.iam_email
|
module.transf-sa-df-0.iam_email
|
||||||
]
|
]
|
||||||
|
"roles/datacatalog.viewer" = [
|
||||||
|
module.load-sa-df-0.iam_email,
|
||||||
|
module.transf-sa-df-0.iam_email,
|
||||||
|
module.transf-sa-bq-0.iam_email
|
||||||
|
]
|
||||||
|
"roles/datacatalog.categoryFineGrainedReader" = [
|
||||||
|
module.transf-sa-df-0.iam_email,
|
||||||
|
module.transf-sa-bq-0.iam_email,
|
||||||
|
# Uncomment if you want to grant access to `data-analyst` to all columns tagged.
|
||||||
|
# local.groups_iam.data-analysts
|
||||||
|
]
|
||||||
}
|
}
|
||||||
services = concat(var.project_services, [
|
services = concat(var.project_services, [
|
||||||
"datacatalog.googleapis.com",
|
"datacatalog.googleapis.com",
|
||||||
|
@ -42,6 +57,16 @@ module "common-project" {
|
||||||
])
|
])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Data Catalog Policy tag
|
||||||
|
|
||||||
|
module "common-datacatalog" {
|
||||||
|
source = "../../../modules/data-catalog-policy-tag"
|
||||||
|
project_id = module.common-project.project_id
|
||||||
|
name = "${var.prefix}-datacatalog-policy-tags"
|
||||||
|
location = var.location
|
||||||
|
tags = var.data_catalog_tags
|
||||||
|
}
|
||||||
|
|
||||||
# To create KMS keys in the common projet: uncomment this section and assigne key links accondingly in local.service_encryption_keys variable
|
# To create KMS keys in the common projet: uncomment this section and assigne key links accondingly in local.service_encryption_keys variable
|
||||||
|
|
||||||
# module "cmn-kms-0" {
|
# module "cmn-kms-0" {
|
||||||
|
|
|
@ -6,45 +6,53 @@ Legend: <code>+</code> additive, <code>•</code> conditional.
|
||||||
|
|
||||||
| members | roles |
|
| members | roles |
|
||||||
|---|---|
|
|---|---|
|
||||||
|
|<b>gcp-data-analysts</b><br><small><i>group</i></small>|[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) |
|
||||||
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/dlp.estimatesAdmin](https://cloud.google.com/iam/docs/understanding-roles#dlp.estimatesAdmin) <br>[roles/dlp.reader](https://cloud.google.com/iam/docs/understanding-roles#dlp.reader) <br>[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) |
|
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/dlp.estimatesAdmin](https://cloud.google.com/iam/docs/understanding-roles#dlp.estimatesAdmin) <br>[roles/dlp.reader](https://cloud.google.com/iam/docs/understanding-roles#dlp.reader) <br>[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) |
|
||||||
|<b>gcp-data-security</b><br><small><i>group</i></small>|[roles/dlp.admin](https://cloud.google.com/iam/docs/understanding-roles#dlp.admin) |
|
|<b>gcp-data-security</b><br><small><i>group</i></small>|[roles/datacatalog.admin](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.admin) <br>[roles/dlp.admin](https://cloud.google.com/iam/docs/understanding-roles#dlp.admin) |
|
||||||
|<b>load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) |
|
|<b>load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) |
|
||||||
|<b>trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) |
|
|<b>trf-bq-0</b><br><small><i>serviceAccount</i></small>|[roles/datacatalog.categoryFineGrainedReader](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryFineGrainedReader) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) |
|
||||||
|
|<b>trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/datacatalog.categoryFineGrainedReader](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryFineGrainedReader) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) |
|
||||||
|
|
||||||
## Project <i>dtl-0</i>
|
## Project <i>dtl-0</i>
|
||||||
|
|
||||||
| members | roles |
|
| members | roles |
|
||||||
|---|---|
|
|---|---|
|
||||||
|<b>gcp-data-analysts</b><br><small><i>group</i></small>|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user) <br>[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|
|<b>gcp-data-analysts</b><br><small><i>group</i></small>|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/bigquery.metadataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.metadataViewer) <br>[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user) <br>[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|
||||||
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) |
|
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) |
|
||||||
|<b>load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator) |
|
|<b>SERVICE_IDENTITY_service-networking</b><br><small><i>serviceAccount</i></small>|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) <code>+</code>|
|
||||||
|<b>trf-bq-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) |
|
|<b>load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator) |
|
||||||
|<b>trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) |
|
|<b>trf-bq-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) <br>[roles/datacatalog.categoryAdmin](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryAdmin) |
|
||||||
|
|<b>trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) |
|
||||||
|
|
||||||
## Project <i>dtl-1</i>
|
## Project <i>dtl-1</i>
|
||||||
|
|
||||||
| members | roles |
|
| members | roles |
|
||||||
|---|---|
|
|---|---|
|
||||||
|<b>gcp-data-analysts</b><br><small><i>group</i></small>|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user) <br>[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|
|<b>gcp-data-analysts</b><br><small><i>group</i></small>|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/bigquery.metadataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.metadataViewer) <br>[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user) <br>[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|
||||||
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) |
|
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) |
|
||||||
|<b>trf-bq-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) |
|
|<b>SERVICE_IDENTITY_service-networking</b><br><small><i>serviceAccount</i></small>|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) <code>+</code>|
|
||||||
|<b>trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|
|<b>load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/datacatalog.categoryAdmin](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryAdmin) |
|
||||||
|
|<b>trf-bq-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) |
|
||||||
|
|<b>trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) <br>[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|
||||||
|
|
||||||
## Project <i>dtl-2</i>
|
## Project <i>dtl-2</i>
|
||||||
|
|
||||||
| members | roles |
|
| members | roles |
|
||||||
|---|---|
|
|---|---|
|
||||||
|<b>gcp-data-analysts</b><br><small><i>group</i></small>|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user) <br>[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|
|<b>gcp-data-analysts</b><br><small><i>group</i></small>|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/bigquery.metadataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.metadataViewer) <br>[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user) <br>[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|
||||||
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) |
|
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) |
|
||||||
|<b>trf-bq-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) |
|
|<b>SERVICE_IDENTITY_service-networking</b><br><small><i>serviceAccount</i></small>|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) <code>+</code>|
|
||||||
|<b>trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|
|<b>load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/datacatalog.categoryAdmin](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryAdmin) |
|
||||||
|
|<b>trf-bq-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) |
|
||||||
|
|<b>trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) <br>[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|
||||||
|
|
||||||
## Project <i>dtl-plg</i>
|
## Project <i>dtl-plg</i>
|
||||||
|
|
||||||
| members | roles |
|
| members | roles |
|
||||||
|---|---|
|
|---|---|
|
||||||
|<b>gcp-data-analysts</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user) <br>[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|
|<b>gcp-data-analysts</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/bigquery.metadataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.metadataViewer) <br>[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user) <br>[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|
||||||
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) |
|
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) |
|
||||||
|
|<b>SERVICE_IDENTITY_service-networking</b><br><small><i>serviceAccount</i></small>|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) <code>+</code>|
|
||||||
|
|
||||||
## Project <i>lnd</i>
|
## Project <i>lnd</i>
|
||||||
|
|
||||||
|
@ -62,6 +70,8 @@ Legend: <code>+</code> additive, <code>•</code> conditional.
|
||||||
| members | roles |
|
| members | roles |
|
||||||
|---|---|
|
|---|---|
|
||||||
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/compute.viewer](https://cloud.google.com/iam/docs/understanding-roles#compute.viewer) <br>[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) <br>[roles/dataflow.developer](https://cloud.google.com/iam/docs/understanding-roles#dataflow.developer) <br>[roles/viewer](https://cloud.google.com/iam/docs/understanding-roles#viewer) |
|
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/compute.viewer](https://cloud.google.com/iam/docs/understanding-roles#compute.viewer) <br>[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) <br>[roles/dataflow.developer](https://cloud.google.com/iam/docs/understanding-roles#dataflow.developer) <br>[roles/viewer](https://cloud.google.com/iam/docs/understanding-roles#viewer) |
|
||||||
|
|<b>SERVICE_IDENTITY_dataflow-service-producer-prod</b><br><small><i>serviceAccount</i></small>|[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|
||||||
|
|<b>SERVICE_IDENTITY_service-networking</b><br><small><i>serviceAccount</i></small>|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) <code>+</code>|
|
||||||
|<b>load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) <br>[roles/dataflow.worker](https://cloud.google.com/iam/docs/understanding-roles#dataflow.worker) <br>[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|
|<b>load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) <br>[roles/dataflow.worker](https://cloud.google.com/iam/docs/understanding-roles#dataflow.worker) <br>[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|
||||||
|<b>orc-cmp-0</b><br><small><i>serviceAccount</i></small>|[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) |
|
|<b>orc-cmp-0</b><br><small><i>serviceAccount</i></small>|[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) |
|
||||||
|
|
||||||
|
@ -69,7 +79,9 @@ Legend: <code>+</code> additive, <code>•</code> conditional.
|
||||||
|
|
||||||
| members | roles |
|
| members | roles |
|
||||||
|---|---|
|
|---|---|
|
||||||
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/cloudbuild.builds.editor](https://cloud.google.com/iam/docs/understanding-roles#cloudbuild.builds.editor) <br>[roles/composer.admin](https://cloud.google.com/iam/docs/understanding-roles#composer.admin) <br>[roles/composer.environmentAndStorageObjectAdmin](https://cloud.google.com/iam/docs/understanding-roles#composer.environmentAndStorageObjectAdmin) <br>[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) <br>[roles/iam.serviceAccountUser](https://cloud.google.com/iam/docs/understanding-roles#iam.serviceAccountUser) <br>[roles/iap.httpsResourceAccessor](https://cloud.google.com/iam/docs/understanding-roles#iap.httpsResourceAccessor) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) <br>[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/cloudbuild.builds.editor](https://cloud.google.com/iam/docs/understanding-roles#cloudbuild.builds.editor) <br>[roles/composer.admin](https://cloud.google.com/iam/docs/understanding-roles#composer.admin) <br>[roles/composer.environmentAndStorageObjectAdmin](https://cloud.google.com/iam/docs/understanding-roles#composer.environmentAndStorageObjectAdmin) <br>[roles/iam.serviceAccountUser](https://cloud.google.com/iam/docs/understanding-roles#iam.serviceAccountUser) <br>[roles/iap.httpsResourceAccessor](https://cloud.google.com/iam/docs/understanding-roles#iap.httpsResourceAccessor) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) <br>[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|
||||||
|
|<b>SERVICE_IDENTITY_cloudcomposer-accounts</b><br><small><i>serviceAccount</i></small>|[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|
||||||
|
|<b>SERVICE_IDENTITY_service-networking</b><br><small><i>serviceAccount</i></small>|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) <code>+</code>|
|
||||||
|<b>load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|
|<b>load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|
||||||
|<b>orc-cmp-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/composer.worker](https://cloud.google.com/iam/docs/understanding-roles#composer.worker) <br>[roles/iam.serviceAccountUser](https://cloud.google.com/iam/docs/understanding-roles#iam.serviceAccountUser) <br>[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|
|<b>orc-cmp-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/composer.worker](https://cloud.google.com/iam/docs/understanding-roles#composer.worker) <br>[roles/iam.serviceAccountUser](https://cloud.google.com/iam/docs/understanding-roles#iam.serviceAccountUser) <br>[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|
||||||
|<b>trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) |
|
|<b>trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) |
|
||||||
|
@ -79,6 +91,8 @@ Legend: <code>+</code> additive, <code>•</code> conditional.
|
||||||
| members | roles |
|
| members | roles |
|
||||||
|---|---|
|
|---|---|
|
||||||
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) |
|
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) |
|
||||||
|
|<b>SERVICE_IDENTITY_dataflow-service-producer-prod</b><br><small><i>serviceAccount</i></small>|[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|
||||||
|
|<b>SERVICE_IDENTITY_service-networking</b><br><small><i>serviceAccount</i></small>|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) <code>+</code>|
|
||||||
|<b>orc-cmp-0</b><br><small><i>serviceAccount</i></small>|[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) |
|
|<b>orc-cmp-0</b><br><small><i>serviceAccount</i></small>|[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) |
|
||||||
|<b>trf-bq-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) |
|
|<b>trf-bq-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) |
|
||||||
|<b>trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/dataflow.worker](https://cloud.google.com/iam/docs/understanding-roles#dataflow.worker) <br>[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|
|<b>trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/dataflow.worker](https://cloud.google.com/iam/docs/understanding-roles#dataflow.worker) <br>[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|
||||||
|
|
|
@ -154,6 +154,19 @@ Cloud Data Loss Prevention resources and templates should be stored in the secur
|
||||||
|
|
||||||
You can find more details and best practices on using DLP to De-identification and re-identification of PII in large-scale datasets in the [GCP documentation](https://cloud.google.com/architecture/de-identification-re-identification-pii-using-cloud-dlp).
|
You can find more details and best practices on using DLP to De-identification and re-identification of PII in large-scale datasets in the [GCP documentation](https://cloud.google.com/architecture/de-identification-re-identification-pii-using-cloud-dlp).
|
||||||
|
|
||||||
|
## Data Catalog
|
||||||
|
|
||||||
|
[Data Catalog](https://cloud.google.com/data-catalog) helps you to document your data entry at scale. Data Catalog relies on [tags](https://cloud.google.com/data-catalog/docs/tags-and-tag-templates#tags) and [tag template](https://cloud.google.com/data-catalog/docs/tags-and-tag-templates#tag-templates) to manage metadata for all data entries in a unified and centralized service. To implement [column-level security](https://cloud.google.com/bigquery/docs/column-level-security-intro) on BigQuery, we suggest to use `Tags` and `Tag templates`.
|
||||||
|
|
||||||
|
The default configuration will implement 3 tags:
|
||||||
|
- `3_Confidential`: policy tag for columns that include very sensitive information, such as credit card numbers.
|
||||||
|
- `2_Private`: policy tag for columns that include sensitive personal identifiable information (PII) information, such as a person's first name.
|
||||||
|
- `1_Sensitive`: policy tag for columns that include data that cannot be made public, such as the credit limit.
|
||||||
|
|
||||||
|
Anything that is not tagged is available to all users who have access to the data warehouse.
|
||||||
|
|
||||||
|
For the porpuse of the example no groups has access to tagged data. You can configure your tags and roles associated by configuring the `data_catalog_tags` variable. We suggest useing the "[Best practices for using policy tags in BigQuery](https://cloud.google.com/bigquery/docs/best-practices-policy-tags)" article as a guide to designing your tags structure and access pattern.
|
||||||
|
|
||||||
## How to run this script
|
## How to run this script
|
||||||
|
|
||||||
To deploy this example on your GCP organization, you will need
|
To deploy this example on your GCP organization, you will need
|
||||||
|
@ -207,17 +220,10 @@ To do this, you need to remove IAM binging at project-level for the `data-analys
|
||||||
|
|
||||||
## Demo pipeline
|
## Demo pipeline
|
||||||
|
|
||||||
The application layer is out of scope of this script, but as a demo, it is provided with a Cloud Composer DAG to mode data from the `landing` area to the `DataLake L2` dataset.
|
The application layer is out of scope of this script. As a demo purpuse only, several Cloud Composer DAGs are provided. Demos will import data from the `landing` area to the `DataLake L2` dataset suing different features.
|
||||||
|
|
||||||
Just follow the commands you find in the `demo_commands` Terraform output, go in the Cloud Composer UI and run the `data_pipeline_dag`.
|
You can find examples in the `[demo](./demo)` folder.
|
||||||
|
|
||||||
Description of commands:
|
|
||||||
|
|
||||||
- 01: copy sample data to a `landing` Cloud Storage bucket impersonating the `load` service account.
|
|
||||||
- 02: copy sample data structure definition in the `orchestration` Cloud Storage bucket impersonating the `orchestration` service account.
|
|
||||||
- 03: copy the Cloud Composer DAG to the Cloud Composer Storage bucket impersonating the `orchestration` service account.
|
|
||||||
- 04: Open the Cloud Composer Airflow UI and run the imported DAG.
|
|
||||||
- 05: Run the BigQuery query to see results.
|
|
||||||
<!-- BEGIN TFDOC -->
|
<!-- BEGIN TFDOC -->
|
||||||
|
|
||||||
## Variables
|
## Variables
|
||||||
|
@ -225,17 +231,18 @@ Description of commands:
|
||||||
| name | description | type | required | default |
|
| name | description | type | required | default |
|
||||||
|---|---|:---:|:---:|:---:|
|
|---|---|:---:|:---:|:---:|
|
||||||
| [billing_account_id](variables.tf#L17) | Billing account id. | <code>string</code> | ✓ | |
|
| [billing_account_id](variables.tf#L17) | Billing account id. | <code>string</code> | ✓ | |
|
||||||
| [folder_id](variables.tf#L42) | Folder to be used for the networking resources in folders/nnnn format. | <code>string</code> | ✓ | |
|
| [folder_id](variables.tf#L53) | Folder to be used for the networking resources in folders/nnnn format. | <code>string</code> | ✓ | |
|
||||||
| [organization_domain](variables.tf#L87) | Organization domain. | <code>string</code> | ✓ | |
|
| [organization_domain](variables.tf#L98) | Organization domain. | <code>string</code> | ✓ | |
|
||||||
| [prefix](variables.tf#L92) | Unique prefix used for resource names. | <code>string</code> | ✓ | |
|
| [prefix](variables.tf#L103) | Unique prefix used for resource names. | <code>string</code> | ✓ | |
|
||||||
| [composer_config](variables.tf#L22) | Cloud Composer config. | <code title="object({ node_count = number airflow_version = string env_variables = map(string) })">object({…})</code> | | <code title="{ node_count = 3 airflow_version = "composer-1.17.5-airflow-2.1.4" env_variables = {} }">{…}</code> |
|
| [composer_config](variables.tf#L22) | Cloud Composer config. | <code title="object({ node_count = number airflow_version = string env_variables = map(string) })">object({…})</code> | | <code title="{ node_count = 3 airflow_version = "composer-1.17.5-airflow-2.1.4" env_variables = {} }">{…}</code> |
|
||||||
| [data_force_destroy](variables.tf#L36) | Flag to set 'force_destroy' on data services like BiguQery or Cloud Storage. | <code>bool</code> | | <code>false</code> |
|
| [data_catalog_tags](variables.tf#L36) | List of Data Catalog Policy tags to be created with optional IAM binging configuration in {tag => {ROLE => [MEMBERS]}} format. | <code>map(map(list(string)))</code> | | <code title="{ "3_Confidential" = null "2_Private" = null "1_Sensitive" = null }">{…}</code> |
|
||||||
| [groups](variables.tf#L53) | User groups. | <code>map(string)</code> | | <code title="{ data-analysts = "gcp-data-analysts" data-engineers = "gcp-data-engineers" data-security = "gcp-data-security" }">{…}</code> |
|
| [data_force_destroy](variables.tf#L47) | Flag to set 'force_destroy' on data services like BiguQery or Cloud Storage. | <code>bool</code> | | <code>false</code> |
|
||||||
| [location](variables.tf#L47) | Location used for multi-regional resources. | <code>string</code> | | <code>"eu"</code> |
|
| [groups](variables.tf#L64) | User groups. | <code>map(string)</code> | | <code title="{ data-analysts = "gcp-data-analysts" data-engineers = "gcp-data-engineers" data-security = "gcp-data-security" }">{…}</code> |
|
||||||
| [network_config](variables.tf#L63) | Shared VPC network configurations to use. If null networks will be created in projects with preconfigured values. | <code title="object({ host_project = string network_self_link = string subnet_self_links = object({ load = string transformation = string orchestration = string }) composer_ip_ranges = object({ cloudsql = string gke_master = string web_server = string }) composer_secondary_ranges = object({ pods = string services = string }) })">object({…})</code> | | <code>null</code> |
|
| [location](variables.tf#L58) | Location used for multi-regional resources. | <code>string</code> | | <code>"eu"</code> |
|
||||||
| [project_services](variables.tf#L97) | List of core services enabled on all projects. | <code>list(string)</code> | | <code title="[ "cloudresourcemanager.googleapis.com", "iam.googleapis.com", "serviceusage.googleapis.com", "stackdriver.googleapis.com" ]">[…]</code> |
|
| [network_config](variables.tf#L74) | Shared VPC network configurations to use. If null networks will be created in projects with preconfigured values. | <code title="object({ host_project = string network_self_link = string subnet_self_links = object({ load = string transformation = string orchestration = string }) composer_ip_ranges = object({ cloudsql = string gke_master = string web_server = string }) composer_secondary_ranges = object({ pods = string services = string }) })">object({…})</code> | | <code>null</code> |
|
||||||
| [project_suffix](variables.tf#L108) | Suffix used only for project ids. | <code>string</code> | | <code>null</code> |
|
| [project_services](variables.tf#L108) | List of core services enabled on all projects. | <code>list(string)</code> | | <code title="[ "cloudresourcemanager.googleapis.com", "iam.googleapis.com", "serviceusage.googleapis.com", "stackdriver.googleapis.com" ]">[…]</code> |
|
||||||
| [region](variables.tf#L114) | Region used for regional resources. | <code>string</code> | | <code>"europe-west1"</code> |
|
| [project_suffix](variables.tf#L119) | Suffix used only for project ids. | <code>string</code> | | <code>null</code> |
|
||||||
|
| [region](variables.tf#L125) | Region used for regional resources. | <code>string</code> | | <code>"europe-west1"</code> |
|
||||||
|
|
||||||
## Outputs
|
## Outputs
|
||||||
|
|
||||||
|
@ -254,13 +261,6 @@ Description of commands:
|
||||||
|
|
||||||
Features to add in future releases:
|
Features to add in future releases:
|
||||||
|
|
||||||
- Add support for Column level access on BigQuery
|
|
||||||
- Add example templates for Data Catalog
|
|
||||||
- Add example on how to use Cloud Data Loss Prevention
|
- Add example on how to use Cloud Data Loss Prevention
|
||||||
- Add solution to handle Tables, Views, and Authorized Views lifecycle
|
- Add solution to handle Tables, Views, and Authorized Views lifecycle
|
||||||
- Add solution to handle Metadata lifecycle
|
- Add solution to handle Metadata lifecycle
|
||||||
|
|
||||||
## To Test/Fix
|
|
||||||
|
|
||||||
- Composer require "Require OS Login" not enforced
|
|
||||||
- External Shared-VPC
|
|
||||||
|
|
|
@ -1,3 +1,32 @@
|
||||||
# Data ingestion Demo
|
# Data ingestion Demo
|
||||||
|
|
||||||
In this folder you can find an example to ingest data on the `data platfoem` instantiated in [here](../). See details in the [README.m](../#demo-pipeline) to run the demo.
|
In this folder, you can find an example to ingest data on the `data platform` instantiated [here](../).
|
||||||
|
|
||||||
|
The example is not intended to be a production-ready code.
|
||||||
|
|
||||||
|
## Demo use case
|
||||||
|
The demo imports purchase data generated by a store.
|
||||||
|
|
||||||
|
## Input files
|
||||||
|
Data are uploaded to the `landing` GCS bucket. File structure:
|
||||||
|
- `customers.csv`: Comma separate value with customer information in the following format: Customer ID, Name, Surname, Registration Timestamp
|
||||||
|
- `purchases.csv`: Comma separate value with customer information in the following format: Item ID, Customer ID, Item, Item price, Purchase Timestamp
|
||||||
|
|
||||||
|
## Data processing pipelines
|
||||||
|
Different data pipelines are provided to highlight different features and patterns. For the purpose of the example, a single pipeline handle all data lifecycles. When adapting them to your real use case, you may want to evaluate the option to handle each functional step on a separate pipeline or a dedicated tool. For example, you may want to use `Dataform` to handle data schemas lifecycle.
|
||||||
|
|
||||||
|
Below you can find a description of each example:
|
||||||
|
- Simple import data: [`datapipeline.py`](./datapipeline.py) is a simple pipeline to import provided data from the `landing` Google Cloud Storage bucket to the Data Hub L2 layer joining `customers` and `purchases` tables into `customerpurchase` table.
|
||||||
|
- Import data with Policy Tags: [`datapipeline_dc_tags.py`](./datapipeline.py) imports provided data from `landing` bucket to the Data Hub L2 layer protecting sensitive data using Data Catalog policy Tags.
|
||||||
|
- Delete tables: [`delete_table.py`](./delete_table.py) deletes BigQuery tables created by import pipelines.
|
||||||
|
|
||||||
|
## Runnin the demo
|
||||||
|
To run demo examples, please follow the following steps:
|
||||||
|
|
||||||
|
- 01: copy sample data to the `landing` Cloud Storage bucket impersonating the `load` service account.
|
||||||
|
- 02: copy sample data structure definition in the `orchestration` Cloud Storage bucket impersonating the `orchestration` service account.
|
||||||
|
- 03: copy the Cloud Composer DAG to the Cloud Composer Storage bucket impersonating the `orchestration` service account.
|
||||||
|
- 04: Open the Cloud Composer Airflow UI and run the imported DAG.
|
||||||
|
- 05: Run the BigQuery query to see results.
|
||||||
|
|
||||||
|
You can find pre-computed commands in the `demo_commands` output variable of the deployed terraform [data pipeline](../).
|
||||||
|
|
|
@ -19,18 +19,21 @@
|
||||||
import csv
|
import csv
|
||||||
import datetime
|
import datetime
|
||||||
import io
|
import io
|
||||||
|
import json
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
|
||||||
from airflow import models
|
from airflow import models
|
||||||
from airflow.contrib.operators.dataflow_operator import DataflowTemplateOperator
|
from airflow.providers.google.cloud.operators.dataflow import DataflowTemplatedJobStartOperator
|
||||||
from airflow.operators import dummy
|
from airflow.operators import dummy
|
||||||
from airflow.providers.google.cloud.operators.bigquery import BigQueryInsertJobOperator
|
from airflow.providers.google.cloud.operators.bigquery import BigQueryInsertJobOperator, BigQueryUpsertTableOperator, BigQueryUpdateTableSchemaOperator
|
||||||
|
from airflow.utils.task_group import TaskGroup
|
||||||
|
|
||||||
# --------------------------------------------------------------------------------
|
# --------------------------------------------------------------------------------
|
||||||
# Set variables
|
# Set variables - Needed for the DEMO
|
||||||
# ------------------------------------------------------------
|
# --------------------------------------------------------------------------------
|
||||||
BQ_LOCATION = os.environ.get("BQ_LOCATION")
|
BQ_LOCATION = os.environ.get("BQ_LOCATION")
|
||||||
|
DATA_CAT_TAGS = json.loads(os.environ.get("DATA_CAT_TAGS"))
|
||||||
DTL_L0_PRJ = os.environ.get("DTL_L0_PRJ")
|
DTL_L0_PRJ = os.environ.get("DTL_L0_PRJ")
|
||||||
DTL_L0_BQ_DATASET = os.environ.get("DTL_L0_BQ_DATASET")
|
DTL_L0_BQ_DATASET = os.environ.get("DTL_L0_BQ_DATASET")
|
||||||
DTL_L0_GCS = os.environ.get("DTL_L0_GCS")
|
DTL_L0_GCS = os.environ.get("DTL_L0_GCS")
|
||||||
|
@ -84,7 +87,6 @@ default_args = {
|
||||||
'retries': 1,
|
'retries': 1,
|
||||||
'retry_delay': datetime.timedelta(minutes=5),
|
'retry_delay': datetime.timedelta(minutes=5),
|
||||||
'dataflow_default_options': {
|
'dataflow_default_options': {
|
||||||
'project': LOD_PRJ,
|
|
||||||
'location': DF_REGION,
|
'location': DF_REGION,
|
||||||
'zone': DF_ZONE,
|
'zone': DF_ZONE,
|
||||||
'stagingLocation': LOD_GCS_STAGING,
|
'stagingLocation': LOD_GCS_STAGING,
|
||||||
|
@ -114,9 +116,13 @@ with models.DAG(
|
||||||
trigger_rule='all_success'
|
trigger_rule='all_success'
|
||||||
)
|
)
|
||||||
|
|
||||||
customers_import = DataflowTemplateOperator(
|
# Bigquery Tables automatically created for demo porpuse.
|
||||||
task_id="dataflow_customer_import",
|
# Consider a dedicated pipeline or tool for a real life scenario.
|
||||||
|
customers_import = DataflowTemplatedJobStartOperator(
|
||||||
|
task_id="dataflow_customers_import",
|
||||||
template="gs://dataflow-templates/latest/GCS_Text_to_BigQuery",
|
template="gs://dataflow-templates/latest/GCS_Text_to_BigQuery",
|
||||||
|
project_id=LOD_PRJ,
|
||||||
|
location=DF_REGION,
|
||||||
parameters={
|
parameters={
|
||||||
"javascriptTextTransformFunctionName": "transform",
|
"javascriptTextTransformFunctionName": "transform",
|
||||||
"JSONPath": ORC_GCS + "/customers_schema.json",
|
"JSONPath": ORC_GCS + "/customers_schema.json",
|
||||||
|
@ -127,9 +133,11 @@ with models.DAG(
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
purchases_import = DataflowTemplateOperator(
|
purchases_import = DataflowTemplatedJobStartOperator(
|
||||||
task_id="dataflow_purchases_import",
|
task_id="dataflow_purchases_import",
|
||||||
template="gs://dataflow-templates/latest/GCS_Text_to_BigQuery",
|
template="gs://dataflow-templates/latest/GCS_Text_to_BigQuery",
|
||||||
|
project_id=LOD_PRJ,
|
||||||
|
location=DF_REGION,
|
||||||
parameters={
|
parameters={
|
||||||
"javascriptTextTransformFunctionName": "transform",
|
"javascriptTextTransformFunctionName": "transform",
|
||||||
"JSONPath": ORC_GCS + "/purchases_schema.json",
|
"JSONPath": ORC_GCS + "/purchases_schema.json",
|
||||||
|
@ -180,13 +188,13 @@ with models.DAG(
|
||||||
'jobType':'QUERY',
|
'jobType':'QUERY',
|
||||||
'query':{
|
'query':{
|
||||||
'query':"""SELECT
|
'query':"""SELECT
|
||||||
customer_id,
|
customer_id,
|
||||||
purchase_id,
|
purchase_id,
|
||||||
name,
|
name,
|
||||||
surname,
|
surname,
|
||||||
item,
|
item,
|
||||||
price,
|
price,
|
||||||
timestamp
|
timestamp
|
||||||
FROM `{dtl_1_prj}.{dtl_1_dataset}.customer_purchase`
|
FROM `{dtl_1_prj}.{dtl_1_dataset}.customer_purchase`
|
||||||
""".format(dtl_1_prj=DTL_L1_PRJ, dtl_1_dataset=DTL_L1_BQ_DATASET, ),
|
""".format(dtl_1_prj=DTL_L1_PRJ, dtl_1_dataset=DTL_L1_BQ_DATASET, ),
|
||||||
'destinationTable':{
|
'destinationTable':{
|
||||||
|
@ -201,4 +209,4 @@ with models.DAG(
|
||||||
impersonation_chain=[TRF_SA_BQ]
|
impersonation_chain=[TRF_SA_BQ]
|
||||||
)
|
)
|
||||||
|
|
||||||
start >> [customers_import, purchases_import] >> join_customer_purchase >> l2_customer_purchase >> end
|
start >> [customers_import, purchases_import] >> join_customer_purchase >> l2_customer_purchase >> end
|
|
@ -0,0 +1,322 @@
|
||||||
|
# Copyright 2022 Google LLC
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# https://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
# --------------------------------------------------------------------------------
|
||||||
|
# Load The Dependencies
|
||||||
|
# --------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
import csv
|
||||||
|
import datetime
|
||||||
|
import io
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
|
||||||
|
from airflow import models
|
||||||
|
from airflow.providers.google.cloud.operators.dataflow import DataflowTemplatedJobStartOperator
|
||||||
|
from airflow.operators import dummy
|
||||||
|
from airflow.providers.google.cloud.operators.bigquery import BigQueryInsertJobOperator, BigQueryUpsertTableOperator, BigQueryUpdateTableSchemaOperator
|
||||||
|
from airflow.utils.task_group import TaskGroup
|
||||||
|
|
||||||
|
# --------------------------------------------------------------------------------
|
||||||
|
# Set variables - Needed for the DEMO
|
||||||
|
# --------------------------------------------------------------------------------
|
||||||
|
BQ_LOCATION = os.environ.get("BQ_LOCATION")
|
||||||
|
DATA_CAT_TAGS = json.loads(os.environ.get("DATA_CAT_TAGS"))
|
||||||
|
DTL_L0_PRJ = os.environ.get("DTL_L0_PRJ")
|
||||||
|
DTL_L0_BQ_DATASET = os.environ.get("DTL_L0_BQ_DATASET")
|
||||||
|
DTL_L0_GCS = os.environ.get("DTL_L0_GCS")
|
||||||
|
DTL_L1_PRJ = os.environ.get("DTL_L1_PRJ")
|
||||||
|
DTL_L1_BQ_DATASET = os.environ.get("DTL_L1_BQ_DATASET")
|
||||||
|
DTL_L1_GCS = os.environ.get("DTL_L1_GCS")
|
||||||
|
DTL_L2_PRJ = os.environ.get("DTL_L2_PRJ")
|
||||||
|
DTL_L2_BQ_DATASET = os.environ.get("DTL_L2_BQ_DATASET")
|
||||||
|
DTL_L2_GCS = os.environ.get("DTL_L2_GCS")
|
||||||
|
DTL_PLG_PRJ = os.environ.get("DTL_PLG_PRJ")
|
||||||
|
DTL_PLG_BQ_DATASET = os.environ.get("DTL_PLG_BQ_DATASET")
|
||||||
|
DTL_PLG_GCS = os.environ.get("DTL_PLG_GCS")
|
||||||
|
GCP_REGION = os.environ.get("GCP_REGION")
|
||||||
|
LND_PRJ = os.environ.get("LND_PRJ")
|
||||||
|
LND_BQ = os.environ.get("LND_BQ")
|
||||||
|
LND_GCS = os.environ.get("LND_GCS")
|
||||||
|
LND_PS = os.environ.get("LND_PS")
|
||||||
|
LOD_PRJ = os.environ.get("LOD_PRJ")
|
||||||
|
LOD_GCS_STAGING = os.environ.get("LOD_GCS_STAGING")
|
||||||
|
LOD_NET_VPC = os.environ.get("LOD_NET_VPC")
|
||||||
|
LOD_NET_SUBNET = os.environ.get("LOD_NET_SUBNET")
|
||||||
|
LOD_SA_DF = os.environ.get("LOD_SA_DF")
|
||||||
|
ORC_PRJ = os.environ.get("ORC_PRJ")
|
||||||
|
ORC_GCS = os.environ.get("ORC_GCS")
|
||||||
|
TRF_PRJ = os.environ.get("TRF_PRJ")
|
||||||
|
TRF_GCS_STAGING = os.environ.get("TRF_GCS_STAGING")
|
||||||
|
TRF_NET_VPC = os.environ.get("TRF_NET_VPC")
|
||||||
|
TRF_NET_SUBNET = os.environ.get("TRF_NET_SUBNET")
|
||||||
|
TRF_SA_DF = os.environ.get("TRF_SA_DF")
|
||||||
|
TRF_SA_BQ = os.environ.get("TRF_SA_BQ")
|
||||||
|
DF_KMS_KEY = os.environ.get("DF_KMS_KEY", "")
|
||||||
|
DF_REGION = os.environ.get("GCP_REGION")
|
||||||
|
DF_ZONE = os.environ.get("GCP_REGION") + "-b"
|
||||||
|
|
||||||
|
# --------------------------------------------------------------------------------
|
||||||
|
# Set default arguments
|
||||||
|
# --------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
# If you are running Airflow in more than one time zone
|
||||||
|
# see https://airflow.apache.org/docs/apache-airflow/stable/timezone.html
|
||||||
|
# for best practices
|
||||||
|
yesterday = datetime.datetime.now() - datetime.timedelta(days=1)
|
||||||
|
|
||||||
|
default_args = {
|
||||||
|
'owner': 'airflow',
|
||||||
|
'start_date': yesterday,
|
||||||
|
'depends_on_past': False,
|
||||||
|
'email': [''],
|
||||||
|
'email_on_failure': False,
|
||||||
|
'email_on_retry': False,
|
||||||
|
'retries': 1,
|
||||||
|
'retry_delay': datetime.timedelta(minutes=5),
|
||||||
|
'dataflow_default_options': {
|
||||||
|
'location': DF_REGION,
|
||||||
|
'zone': DF_ZONE,
|
||||||
|
'stagingLocation': LOD_GCS_STAGING,
|
||||||
|
'tempLocation': LOD_GCS_STAGING + "/tmp",
|
||||||
|
'serviceAccountEmail': LOD_SA_DF,
|
||||||
|
'subnetwork': LOD_NET_SUBNET,
|
||||||
|
'ipConfiguration': "WORKER_IP_PRIVATE",
|
||||||
|
'kmsKeyName' : DF_KMS_KEY
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
# --------------------------------------------------------------------------------
|
||||||
|
# Main DAG
|
||||||
|
# --------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
with models.DAG(
|
||||||
|
'data_pipeline_dc_tags_dag',
|
||||||
|
default_args=default_args,
|
||||||
|
schedule_interval=None) as dag:
|
||||||
|
start = dummy.DummyOperator(
|
||||||
|
task_id='start',
|
||||||
|
trigger_rule='all_success'
|
||||||
|
)
|
||||||
|
|
||||||
|
end = dummy.DummyOperator(
|
||||||
|
task_id='end',
|
||||||
|
trigger_rule='all_success'
|
||||||
|
)
|
||||||
|
|
||||||
|
# Bigquery Tables created here for demo porpuse.
|
||||||
|
# Consider a dedicated pipeline or tool for a real life scenario.
|
||||||
|
with TaskGroup('upsert_table') as upsert_table:
|
||||||
|
upsert_table_customers = BigQueryUpsertTableOperator(
|
||||||
|
task_id="upsert_table_customers",
|
||||||
|
project_id=DTL_L0_PRJ,
|
||||||
|
dataset_id=DTL_L0_BQ_DATASET,
|
||||||
|
impersonation_chain=[TRF_SA_DF],
|
||||||
|
table_resource={
|
||||||
|
"tableReference": {"tableId": "customers"},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
upsert_table_purchases = BigQueryUpsertTableOperator(
|
||||||
|
task_id="upsert_table_purchases",
|
||||||
|
project_id=DTL_L0_PRJ,
|
||||||
|
dataset_id=DTL_L0_BQ_DATASET,
|
||||||
|
impersonation_chain=[TRF_SA_BQ],
|
||||||
|
table_resource={
|
||||||
|
"tableReference": {"tableId": "purchases"}
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
upsert_table_customer_purchase_l1 = BigQueryUpsertTableOperator(
|
||||||
|
task_id="upsert_table_customer_purchase_l1",
|
||||||
|
project_id=DTL_L1_PRJ,
|
||||||
|
dataset_id=DTL_L1_BQ_DATASET,
|
||||||
|
impersonation_chain=[TRF_SA_BQ],
|
||||||
|
table_resource={
|
||||||
|
"tableReference": {"tableId": "customer_purchase"}
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
upsert_table_customer_purchase_l2 = BigQueryUpsertTableOperator(
|
||||||
|
task_id="upsert_table_customer_purchase_l2",
|
||||||
|
project_id=DTL_L2_PRJ,
|
||||||
|
dataset_id=DTL_L2_BQ_DATASET,
|
||||||
|
impersonation_chain=[TRF_SA_BQ],
|
||||||
|
table_resource={
|
||||||
|
"tableReference": {"tableId": "customer_purchase"}
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
# Bigquery Tables schema defined here for demo porpuse.
|
||||||
|
# Consider a dedicated pipeline or tool for a real life scenario.
|
||||||
|
with TaskGroup('update_schema_table') as update_schema_table:
|
||||||
|
update_table_schema_customers = BigQueryUpdateTableSchemaOperator(
|
||||||
|
task_id="update_table_schema_customers",
|
||||||
|
project_id=DTL_L0_PRJ,
|
||||||
|
dataset_id=DTL_L0_BQ_DATASET,
|
||||||
|
table_id="customers",
|
||||||
|
impersonation_chain=[TRF_SA_BQ],
|
||||||
|
include_policy_tags=True,
|
||||||
|
schema_fields_updates=[
|
||||||
|
{ "mode": "REQUIRED", "name": "id", "type": "INTEGER", "description": "ID" },
|
||||||
|
{ "mode": "REQUIRED", "name": "name", "type": "STRING", "description": "Name", "policyTags": { "names": [DATA_CAT_TAGS.get('2_Private', None)]}},
|
||||||
|
{ "mode": "REQUIRED", "name": "surname", "type": "STRING", "description": "Surname", "policyTags": { "names": [DATA_CAT_TAGS.get('2_Private', None)]} },
|
||||||
|
{ "mode": "REQUIRED", "name": "timestamp", "type": "TIMESTAMP", "description": "Timestamp" }
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
update_table_schema_customers = BigQueryUpdateTableSchemaOperator(
|
||||||
|
task_id="update_table_schema_purchases",
|
||||||
|
project_id=DTL_L0_PRJ,
|
||||||
|
dataset_id=DTL_L0_BQ_DATASET,
|
||||||
|
table_id="purchases",
|
||||||
|
impersonation_chain=[TRF_SA_BQ],
|
||||||
|
include_policy_tags=True,
|
||||||
|
schema_fields_updates=[
|
||||||
|
{ "mode": "REQUIRED", "name": "id", "type": "INTEGER", "description": "ID" },
|
||||||
|
{ "mode": "REQUIRED", "name": "customer_id", "type": "INTEGER", "description": "ID" },
|
||||||
|
{ "mode": "REQUIRED", "name": "item", "type": "STRING", "description": "Item Name" },
|
||||||
|
{ "mode": "REQUIRED", "name": "price", "type": "FLOAT", "description": "Item Price" },
|
||||||
|
{ "mode": "REQUIRED", "name": "timestamp", "type": "TIMESTAMP", "description": "Timestamp" }
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
update_table_schema_customer_purchase_l1 = BigQueryUpdateTableSchemaOperator(
|
||||||
|
task_id="update_table_schema_customer_purchase_l1",
|
||||||
|
project_id=DTL_L1_PRJ,
|
||||||
|
dataset_id=DTL_L1_BQ_DATASET,
|
||||||
|
table_id="customer_purchase",
|
||||||
|
impersonation_chain=[TRF_SA_BQ],
|
||||||
|
include_policy_tags=True,
|
||||||
|
schema_fields_updates=[
|
||||||
|
{ "mode": "REQUIRED", "name": "customer_id", "type": "INTEGER", "description": "ID" },
|
||||||
|
{ "mode": "REQUIRED", "name": "purchase_id", "type": "INTEGER", "description": "ID" },
|
||||||
|
{ "mode": "REQUIRED", "name": "name", "type": "STRING", "description": "Name", "policyTags": { "names": [DATA_CAT_TAGS.get('2_Private', None)]}},
|
||||||
|
{ "mode": "REQUIRED", "name": "surname", "type": "STRING", "description": "Surname", "policyTags": { "names": [DATA_CAT_TAGS.get('2_Private', None)]} },
|
||||||
|
{ "mode": "REQUIRED", "name": "item", "type": "STRING", "description": "Item Name" },
|
||||||
|
{ "mode": "REQUIRED", "name": "price", "type": "FLOAT", "description": "Item Price" },
|
||||||
|
{ "mode": "REQUIRED", "name": "timestamp", "type": "TIMESTAMP", "description": "Timestamp" }
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
update_table_schema_customer_purchase_l2 = BigQueryUpdateTableSchemaOperator(
|
||||||
|
task_id="update_table_schema_customer_purchase_l2",
|
||||||
|
project_id=DTL_L2_PRJ,
|
||||||
|
dataset_id=DTL_L2_BQ_DATASET,
|
||||||
|
table_id="customer_purchase",
|
||||||
|
impersonation_chain=[TRF_SA_BQ],
|
||||||
|
include_policy_tags=True,
|
||||||
|
schema_fields_updates=[
|
||||||
|
{ "mode": "REQUIRED", "name": "customer_id", "type": "INTEGER", "description": "ID" },
|
||||||
|
{ "mode": "REQUIRED", "name": "purchase_id", "type": "INTEGER", "description": "ID" },
|
||||||
|
{ "mode": "REQUIRED", "name": "name", "type": "STRING", "description": "Name", "policyTags": { "names": [DATA_CAT_TAGS.get('2_Private', None)]}},
|
||||||
|
{ "mode": "REQUIRED", "name": "surname", "type": "STRING", "description": "Surname", "policyTags": { "names": [DATA_CAT_TAGS.get('2_Private', None)]} },
|
||||||
|
{ "mode": "REQUIRED", "name": "item", "type": "STRING", "description": "Item Name" },
|
||||||
|
{ "mode": "REQUIRED", "name": "price", "type": "FLOAT", "description": "Item Price" },
|
||||||
|
{ "mode": "REQUIRED", "name": "timestamp", "type": "TIMESTAMP", "description": "Timestamp" }
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
customers_import = DataflowTemplatedJobStartOperator(
|
||||||
|
task_id="dataflow_customers_import",
|
||||||
|
template="gs://dataflow-templates/latest/GCS_Text_to_BigQuery",
|
||||||
|
project_id=LOD_PRJ,
|
||||||
|
location=DF_REGION,
|
||||||
|
parameters={
|
||||||
|
"javascriptTextTransformFunctionName": "transform",
|
||||||
|
"JSONPath": ORC_GCS + "/customers_schema.json",
|
||||||
|
"javascriptTextTransformGcsPath": ORC_GCS + "/customers_udf.js",
|
||||||
|
"inputFilePattern": LND_GCS + "/customers.csv",
|
||||||
|
"outputTable": DTL_L0_PRJ + ":"+DTL_L0_BQ_DATASET+".customers",
|
||||||
|
"bigQueryLoadingTemporaryDirectory": LOD_GCS_STAGING + "/tmp/bq/",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
purchases_import = DataflowTemplatedJobStartOperator(
|
||||||
|
task_id="dataflow_purchases_import",
|
||||||
|
template="gs://dataflow-templates/latest/GCS_Text_to_BigQuery",
|
||||||
|
project_id=LOD_PRJ,
|
||||||
|
location=DF_REGION,
|
||||||
|
parameters={
|
||||||
|
"javascriptTextTransformFunctionName": "transform",
|
||||||
|
"JSONPath": ORC_GCS + "/purchases_schema.json",
|
||||||
|
"javascriptTextTransformGcsPath": ORC_GCS + "/purchases_udf.js",
|
||||||
|
"inputFilePattern": LND_GCS + "/purchases.csv",
|
||||||
|
"outputTable": DTL_L0_PRJ + ":"+DTL_L0_BQ_DATASET+".purchases",
|
||||||
|
"bigQueryLoadingTemporaryDirectory": LOD_GCS_STAGING + "/tmp/bq/",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
join_customer_purchase = BigQueryInsertJobOperator(
|
||||||
|
task_id='bq_join_customer_purchase',
|
||||||
|
gcp_conn_id='bigquery_default',
|
||||||
|
project_id=TRF_PRJ,
|
||||||
|
location=BQ_LOCATION,
|
||||||
|
configuration={
|
||||||
|
'jobType':'QUERY',
|
||||||
|
'query':{
|
||||||
|
'query':"""SELECT
|
||||||
|
c.id as customer_id,
|
||||||
|
p.id as purchase_id,
|
||||||
|
c.name as name,
|
||||||
|
c.surname as surname,
|
||||||
|
p.item as item,
|
||||||
|
p.price as price,
|
||||||
|
p.timestamp as timestamp
|
||||||
|
FROM `{dtl_0_prj}.{dtl_0_dataset}.customers` c
|
||||||
|
JOIN `{dtl_0_prj}.{dtl_0_dataset}.purchases` p ON c.id = p.customer_id
|
||||||
|
""".format(dtl_0_prj=DTL_L0_PRJ, dtl_0_dataset=DTL_L0_BQ_DATASET, ),
|
||||||
|
'destinationTable':{
|
||||||
|
'projectId': DTL_L1_PRJ,
|
||||||
|
'datasetId': DTL_L1_BQ_DATASET,
|
||||||
|
'tableId': 'customer_purchase'
|
||||||
|
},
|
||||||
|
'writeDisposition':'WRITE_TRUNCATE',
|
||||||
|
"useLegacySql": False
|
||||||
|
}
|
||||||
|
},
|
||||||
|
impersonation_chain=[TRF_SA_BQ]
|
||||||
|
)
|
||||||
|
|
||||||
|
l2_customer_purchase = BigQueryInsertJobOperator(
|
||||||
|
task_id='bq_l2_customer_purchase',
|
||||||
|
gcp_conn_id='bigquery_default',
|
||||||
|
project_id=TRF_PRJ,
|
||||||
|
location=BQ_LOCATION,
|
||||||
|
configuration={
|
||||||
|
'jobType':'QUERY',
|
||||||
|
'query':{
|
||||||
|
'query':"""SELECT
|
||||||
|
customer_id,
|
||||||
|
purchase_id,
|
||||||
|
name,
|
||||||
|
surname,
|
||||||
|
item,
|
||||||
|
price,
|
||||||
|
timestamp
|
||||||
|
FROM `{dtl_1_prj}.{dtl_1_dataset}.customer_purchase`
|
||||||
|
""".format(dtl_1_prj=DTL_L1_PRJ, dtl_1_dataset=DTL_L1_BQ_DATASET, ),
|
||||||
|
'destinationTable':{
|
||||||
|
'projectId': DTL_L2_PRJ,
|
||||||
|
'datasetId': DTL_L2_BQ_DATASET,
|
||||||
|
'tableId': 'customer_purchase'
|
||||||
|
},
|
||||||
|
'writeDisposition':'WRITE_TRUNCATE',
|
||||||
|
"useLegacySql": False
|
||||||
|
}
|
||||||
|
},
|
||||||
|
impersonation_chain=[TRF_SA_BQ]
|
||||||
|
)
|
||||||
|
start >> upsert_table >> update_schema_table >> [customers_import, purchases_import] >> join_customer_purchase >> l2_customer_purchase >> end
|
|
@ -0,0 +1,146 @@
|
||||||
|
# Copyright 2022 Google LLC
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# https://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
# --------------------------------------------------------------------------------
|
||||||
|
# Load The Dependencies
|
||||||
|
# --------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
import csv
|
||||||
|
import datetime
|
||||||
|
import io
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
|
||||||
|
from airflow import models
|
||||||
|
from airflow.providers.google.cloud.operators.dataflow import DataflowTemplatedJobStartOperator
|
||||||
|
from airflow.operators import dummy
|
||||||
|
from airflow.providers.google.cloud.operators.bigquery import BigQueryDeleteTableOperator
|
||||||
|
from airflow.utils.task_group import TaskGroup
|
||||||
|
|
||||||
|
# --------------------------------------------------------------------------------
|
||||||
|
# Set variables - Needed for the DEMO
|
||||||
|
# --------------------------------------------------------------------------------
|
||||||
|
BQ_LOCATION = os.environ.get("BQ_LOCATION")
|
||||||
|
DATA_CAT_TAGS = json.loads(os.environ.get("DATA_CAT_TAGS"))
|
||||||
|
DTL_L0_PRJ = os.environ.get("DTL_L0_PRJ")
|
||||||
|
DTL_L0_BQ_DATASET = os.environ.get("DTL_L0_BQ_DATASET")
|
||||||
|
DTL_L0_GCS = os.environ.get("DTL_L0_GCS")
|
||||||
|
DTL_L1_PRJ = os.environ.get("DTL_L1_PRJ")
|
||||||
|
DTL_L1_BQ_DATASET = os.environ.get("DTL_L1_BQ_DATASET")
|
||||||
|
DTL_L1_GCS = os.environ.get("DTL_L1_GCS")
|
||||||
|
DTL_L2_PRJ = os.environ.get("DTL_L2_PRJ")
|
||||||
|
DTL_L2_BQ_DATASET = os.environ.get("DTL_L2_BQ_DATASET")
|
||||||
|
DTL_L2_GCS = os.environ.get("DTL_L2_GCS")
|
||||||
|
DTL_PLG_PRJ = os.environ.get("DTL_PLG_PRJ")
|
||||||
|
DTL_PLG_BQ_DATASET = os.environ.get("DTL_PLG_BQ_DATASET")
|
||||||
|
DTL_PLG_GCS = os.environ.get("DTL_PLG_GCS")
|
||||||
|
GCP_REGION = os.environ.get("GCP_REGION")
|
||||||
|
LND_PRJ = os.environ.get("LND_PRJ")
|
||||||
|
LND_BQ = os.environ.get("LND_BQ")
|
||||||
|
LND_GCS = os.environ.get("LND_GCS")
|
||||||
|
LND_PS = os.environ.get("LND_PS")
|
||||||
|
LOD_PRJ = os.environ.get("LOD_PRJ")
|
||||||
|
LOD_GCS_STAGING = os.environ.get("LOD_GCS_STAGING")
|
||||||
|
LOD_NET_VPC = os.environ.get("LOD_NET_VPC")
|
||||||
|
LOD_NET_SUBNET = os.environ.get("LOD_NET_SUBNET")
|
||||||
|
LOD_SA_DF = os.environ.get("LOD_SA_DF")
|
||||||
|
ORC_PRJ = os.environ.get("ORC_PRJ")
|
||||||
|
ORC_GCS = os.environ.get("ORC_GCS")
|
||||||
|
TRF_PRJ = os.environ.get("TRF_PRJ")
|
||||||
|
TRF_GCS_STAGING = os.environ.get("TRF_GCS_STAGING")
|
||||||
|
TRF_NET_VPC = os.environ.get("TRF_NET_VPC")
|
||||||
|
TRF_NET_SUBNET = os.environ.get("TRF_NET_SUBNET")
|
||||||
|
TRF_SA_DF = os.environ.get("TRF_SA_DF")
|
||||||
|
TRF_SA_BQ = os.environ.get("TRF_SA_BQ")
|
||||||
|
DF_KMS_KEY = os.environ.get("DF_KMS_KEY", "")
|
||||||
|
DF_REGION = os.environ.get("GCP_REGION")
|
||||||
|
DF_ZONE = os.environ.get("GCP_REGION") + "-b"
|
||||||
|
|
||||||
|
# --------------------------------------------------------------------------------
|
||||||
|
# Set default arguments
|
||||||
|
# --------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
# If you are running Airflow in more than one time zone
|
||||||
|
# see https://airflow.apache.org/docs/apache-airflow/stable/timezone.html
|
||||||
|
# for best practices
|
||||||
|
yesterday = datetime.datetime.now() - datetime.timedelta(days=1)
|
||||||
|
|
||||||
|
default_args = {
|
||||||
|
'owner': 'airflow',
|
||||||
|
'start_date': yesterday,
|
||||||
|
'depends_on_past': False,
|
||||||
|
'email': [''],
|
||||||
|
'email_on_failure': False,
|
||||||
|
'email_on_retry': False,
|
||||||
|
'retries': 1,
|
||||||
|
'retry_delay': datetime.timedelta(minutes=5),
|
||||||
|
'dataflow_default_options': {
|
||||||
|
'location': DF_REGION,
|
||||||
|
'zone': DF_ZONE,
|
||||||
|
'stagingLocation': LOD_GCS_STAGING,
|
||||||
|
'tempLocation': LOD_GCS_STAGING + "/tmp",
|
||||||
|
'serviceAccountEmail': LOD_SA_DF,
|
||||||
|
'subnetwork': LOD_NET_SUBNET,
|
||||||
|
'ipConfiguration': "WORKER_IP_PRIVATE",
|
||||||
|
'kmsKeyName' : DF_KMS_KEY
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
# --------------------------------------------------------------------------------
|
||||||
|
# Main DAG
|
||||||
|
# --------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
with models.DAG(
|
||||||
|
'delete_tables_dag',
|
||||||
|
default_args=default_args,
|
||||||
|
schedule_interval=None) as dag:
|
||||||
|
start = dummy.DummyOperator(
|
||||||
|
task_id='start',
|
||||||
|
trigger_rule='all_success'
|
||||||
|
)
|
||||||
|
|
||||||
|
end = dummy.DummyOperator(
|
||||||
|
task_id='end',
|
||||||
|
trigger_rule='all_success'
|
||||||
|
)
|
||||||
|
|
||||||
|
# Bigquery Tables deleted here for demo porpuse.
|
||||||
|
# Consider a dedicated pipeline or tool for a real life scenario.
|
||||||
|
with TaskGroup('delete_table') as delte_table:
|
||||||
|
delete_table_customers = BigQueryDeleteTableOperator(
|
||||||
|
task_id="delete_table_customers",
|
||||||
|
deletion_dataset_table=DTL_L0_PRJ+"."+DTL_L0_BQ_DATASET+".customers",
|
||||||
|
impersonation_chain=[TRF_SA_DF]
|
||||||
|
)
|
||||||
|
|
||||||
|
delete_table_purchases = BigQueryDeleteTableOperator(
|
||||||
|
task_id="delete_table_purchases",
|
||||||
|
deletion_dataset_table=DTL_L0_PRJ+"."+DTL_L0_BQ_DATASET+".purchases",
|
||||||
|
impersonation_chain=[TRF_SA_DF]
|
||||||
|
)
|
||||||
|
|
||||||
|
delete_table_customer_purchase_l1 = BigQueryDeleteTableOperator(
|
||||||
|
task_id="delete_table_customer_purchase_l1",
|
||||||
|
deletion_dataset_table=DTL_L1_PRJ+"."+DTL_L1_BQ_DATASET+".customer_purchase",
|
||||||
|
impersonation_chain=[TRF_SA_DF]
|
||||||
|
)
|
||||||
|
|
||||||
|
delete_table_customer_purchase_l2 = BigQueryDeleteTableOperator(
|
||||||
|
task_id="delete_table_customer_purchase_l2",
|
||||||
|
deletion_dataset_table=DTL_L2_PRJ+"."+DTL_L2_BQ_DATASET+".customer_purchase",
|
||||||
|
impersonation_chain=[TRF_SA_DF]
|
||||||
|
)
|
||||||
|
|
||||||
|
start >> delte_table >> end
|
|
@ -98,7 +98,7 @@ output "demo_commands" {
|
||||||
03 = "gsutil -i ${module.orch-sa-cmp-0.email} cp demo/*.py ${google_composer_environment.orch-cmp-0.config[0].dag_gcs_prefix}/"
|
03 = "gsutil -i ${module.orch-sa-cmp-0.email} cp demo/*.py ${google_composer_environment.orch-cmp-0.config[0].dag_gcs_prefix}/"
|
||||||
04 = "Open ${google_composer_environment.orch-cmp-0.config.0.airflow_uri} and run uploaded DAG."
|
04 = "Open ${google_composer_environment.orch-cmp-0.config.0.airflow_uri} and run uploaded DAG."
|
||||||
05 = <<EOT
|
05 = <<EOT
|
||||||
bq query --project_id=${module.lake-2-project.project_id} --use_legacy_sql=false 'SELECT * FROM `${module.lake-2-project.project_id}.${module.lake-2-bq-0.dataset_id}.customer_purchase` LIMIT 1000'"
|
bq query --project_id=${module.lake-2-project.project_id} --use_legacy_sql=false 'SELECT * EXCEPT (name, surname) FROM `${module.lake-2-project.project_id}.${module.lake-2-bq-0.dataset_id}.customer_purchase` LIMIT 1000'"
|
||||||
EOT
|
EOT
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
[{"mode": "REQUIRED", "name": "id", "type": "INTEGER", "description": "ID"}, {"mode": "REQUIRED", "name": "name", "type": "STRING", "description": "Name", "policyTags": {"names": ["projects/yoyo-cmn/locations/eu/taxonomies/3505167253647667255/policyTags/2896949743213674289"]}}, {"mode": "REQUIRED", "name": "surname", "type": "STRING", "description": "Surname", "policyTags": {"names": ["projects/yoyo-cmn/locations/eu/taxonomies/3505167253647667255/policyTags/2896949743213674289"]}}, {"mode": "REQUIRED", "name": "credicard", "type": "INTEGER", "description": "credicard", "policyTags": {"names": ["projects/yoyo-cmn/locations/eu/taxonomies/3505167253647667255/policyTags/1008821537023566954"]}}, {"mode": "REQUIRED", "name": "timestamp", "type": "TIMESTAMP", "description": "Timestamp"}]
|
|
@ -33,6 +33,17 @@ variable "composer_config" {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
variable "data_catalog_tags" {
|
||||||
|
description = "List of Data Catalog Policy tags to be created with optional IAM binging configuration in {tag => {ROLE => [MEMBERS]}} format."
|
||||||
|
type = map(map(list(string)))
|
||||||
|
nullable = false
|
||||||
|
default = {
|
||||||
|
"3_Confidential" = null
|
||||||
|
"2_Private" = null
|
||||||
|
"1_Sensitive" = null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
variable "data_force_destroy" {
|
variable "data_force_destroy" {
|
||||||
description = "Flag to set 'force_destroy' on data services like BiguQery or Cloud Storage."
|
description = "Flag to set 'force_destroy' on data services like BiguQery or Cloud Storage."
|
||||||
type = bool
|
type = bool
|
||||||
|
|
|
@ -6,45 +6,53 @@ Legend: <code>+</code> additive, <code>•</code> conditional.
|
||||||
|
|
||||||
| members | roles |
|
| members | roles |
|
||||||
|---|---|
|
|---|---|
|
||||||
|
|<b>gcp-data-analysts</b><br><small><i>group</i></small>|[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) |
|
||||||
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/dlp.estimatesAdmin](https://cloud.google.com/iam/docs/understanding-roles#dlp.estimatesAdmin) <br>[roles/dlp.reader](https://cloud.google.com/iam/docs/understanding-roles#dlp.reader) <br>[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) |
|
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/dlp.estimatesAdmin](https://cloud.google.com/iam/docs/understanding-roles#dlp.estimatesAdmin) <br>[roles/dlp.reader](https://cloud.google.com/iam/docs/understanding-roles#dlp.reader) <br>[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) |
|
||||||
|<b>gcp-data-security</b><br><small><i>group</i></small>|[roles/dlp.admin](https://cloud.google.com/iam/docs/understanding-roles#dlp.admin) |
|
|<b>gcp-data-security</b><br><small><i>group</i></small>|[roles/datacatalog.admin](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.admin) <br>[roles/dlp.admin](https://cloud.google.com/iam/docs/understanding-roles#dlp.admin) |
|
||||||
|<b>dev-data-load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) |
|
|<b>dev-data-load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) |
|
||||||
|<b>dev-data-trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) |
|
|<b>dev-data-trf-bq-0</b><br><small><i>serviceAccount</i></small>|[roles/datacatalog.categoryFineGrainedReader](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryFineGrainedReader) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) |
|
||||||
|
|<b>dev-data-trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/datacatalog.categoryFineGrainedReader](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryFineGrainedReader) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) |
|
||||||
|
|
||||||
## Project <i>dev-data-dtl-0-0</i>
|
## Project <i>dev-data-dtl-0-0</i>
|
||||||
|
|
||||||
| members | roles |
|
| members | roles |
|
||||||
|---|---|
|
|---|---|
|
||||||
|<b>gcp-data-analysts</b><br><small><i>group</i></small>|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user) <br>[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|
|<b>gcp-data-analysts</b><br><small><i>group</i></small>|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/bigquery.metadataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.metadataViewer) <br>[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user) <br>[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|
||||||
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) |
|
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) |
|
||||||
|<b>dev-data-load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator) |
|
|<b>SERVICE_IDENTITY_service-networking</b><br><small><i>serviceAccount</i></small>|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) <code>+</code>|
|
||||||
|<b>dev-data-trf-bq-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) |
|
|<b>dev-data-load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator) |
|
||||||
|<b>dev-data-trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) |
|
|<b>dev-data-trf-bq-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) <br>[roles/datacatalog.categoryAdmin](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryAdmin) |
|
||||||
|
|<b>dev-data-trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) |
|
||||||
|
|
||||||
## Project <i>dev-data-dtl-1-0</i>
|
## Project <i>dev-data-dtl-1-0</i>
|
||||||
|
|
||||||
| members | roles |
|
| members | roles |
|
||||||
|---|---|
|
|---|---|
|
||||||
|<b>gcp-data-analysts</b><br><small><i>group</i></small>|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user) <br>[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|
|<b>gcp-data-analysts</b><br><small><i>group</i></small>|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/bigquery.metadataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.metadataViewer) <br>[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user) <br>[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|
||||||
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) |
|
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) |
|
||||||
|<b>dev-data-trf-bq-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) |
|
|<b>SERVICE_IDENTITY_service-networking</b><br><small><i>serviceAccount</i></small>|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) <code>+</code>|
|
||||||
|<b>dev-data-trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|
|<b>dev-data-load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/datacatalog.categoryAdmin](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryAdmin) |
|
||||||
|
|<b>dev-data-trf-bq-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) |
|
||||||
|
|<b>dev-data-trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) <br>[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|
||||||
|
|
||||||
## Project <i>dev-data-dtl-2-0</i>
|
## Project <i>dev-data-dtl-2-0</i>
|
||||||
|
|
||||||
| members | roles |
|
| members | roles |
|
||||||
|---|---|
|
|---|---|
|
||||||
|<b>gcp-data-analysts</b><br><small><i>group</i></small>|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user) <br>[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|
|<b>gcp-data-analysts</b><br><small><i>group</i></small>|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/bigquery.metadataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.metadataViewer) <br>[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user) <br>[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|
||||||
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) |
|
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) |
|
||||||
|<b>dev-data-trf-bq-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) |
|
|<b>SERVICE_IDENTITY_service-networking</b><br><small><i>serviceAccount</i></small>|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) <code>+</code>|
|
||||||
|<b>dev-data-trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|
|<b>dev-data-load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/datacatalog.categoryAdmin](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryAdmin) |
|
||||||
|
|<b>dev-data-trf-bq-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) |
|
||||||
|
|<b>dev-data-trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) <br>[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|
||||||
|
|
||||||
## Project <i>dev-data-dtl-plg-0</i>
|
## Project <i>dev-data-dtl-plg-0</i>
|
||||||
|
|
||||||
| members | roles |
|
| members | roles |
|
||||||
|---|---|
|
|---|---|
|
||||||
|<b>gcp-data-analysts</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user) <br>[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|
|<b>gcp-data-analysts</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/bigquery.metadataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.metadataViewer) <br>[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user) <br>[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer) <br>[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) <br>[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|
||||||
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) |
|
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) |
|
||||||
|
|<b>SERVICE_IDENTITY_service-networking</b><br><small><i>serviceAccount</i></small>|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) <code>+</code>|
|
||||||
|
|
||||||
## Project <i>dev-data-lnd-0</i>
|
## Project <i>dev-data-lnd-0</i>
|
||||||
|
|
||||||
|
@ -62,37 +70,40 @@ Legend: <code>+</code> additive, <code>•</code> conditional.
|
||||||
| members | roles |
|
| members | roles |
|
||||||
|---|---|
|
|---|---|
|
||||||
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/compute.viewer](https://cloud.google.com/iam/docs/understanding-roles#compute.viewer) <br>[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) <br>[roles/dataflow.developer](https://cloud.google.com/iam/docs/understanding-roles#dataflow.developer) <br>[roles/viewer](https://cloud.google.com/iam/docs/understanding-roles#viewer) |
|
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/compute.viewer](https://cloud.google.com/iam/docs/understanding-roles#compute.viewer) <br>[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) <br>[roles/dataflow.developer](https://cloud.google.com/iam/docs/understanding-roles#dataflow.developer) <br>[roles/viewer](https://cloud.google.com/iam/docs/understanding-roles#viewer) |
|
||||||
|
|<b>SERVICE_IDENTITY_dataflow-service-producer-prod</b><br><small><i>serviceAccount</i></small>|[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|
||||||
|
|<b>SERVICE_IDENTITY_service-networking</b><br><small><i>serviceAccount</i></small>|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) <code>+</code>|
|
||||||
|<b>dev-data-load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) <br>[roles/dataflow.worker](https://cloud.google.com/iam/docs/understanding-roles#dataflow.worker) <br>[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|
|<b>dev-data-load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) <br>[roles/dataflow.worker](https://cloud.google.com/iam/docs/understanding-roles#dataflow.worker) <br>[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|
||||||
|<b>dev-data-orc-cmp-0</b><br><small><i>serviceAccount</i></small>|[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) |
|
|<b>dev-data-orc-cmp-0</b><br><small><i>serviceAccount</i></small>|[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) |
|
||||||
|<b>service-426128559612</b><br><small><i>serviceAccount</i></small>|[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|
|
||||||
|
|
||||||
## Project <i>dev-data-orc-0</i>
|
## Project <i>dev-data-orc-0</i>
|
||||||
|
|
||||||
| members | roles |
|
| members | roles |
|
||||||
|---|---|
|
|---|---|
|
||||||
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/cloudbuild.builds.editor](https://cloud.google.com/iam/docs/understanding-roles#cloudbuild.builds.editor) <br>[roles/composer.admin](https://cloud.google.com/iam/docs/understanding-roles#composer.admin) <br>[roles/composer.environmentAndStorageObjectAdmin](https://cloud.google.com/iam/docs/understanding-roles#composer.environmentAndStorageObjectAdmin) <br>[roles/iam.serviceAccountUser](https://cloud.google.com/iam/docs/understanding-roles#iam.serviceAccountUser) <br>[roles/iap.httpsResourceAccessor](https://cloud.google.com/iam/docs/understanding-roles#iap.httpsResourceAccessor) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) <br>[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/cloudbuild.builds.editor](https://cloud.google.com/iam/docs/understanding-roles#cloudbuild.builds.editor) <br>[roles/composer.admin](https://cloud.google.com/iam/docs/understanding-roles#composer.admin) <br>[roles/composer.environmentAndStorageObjectAdmin](https://cloud.google.com/iam/docs/understanding-roles#composer.environmentAndStorageObjectAdmin) <br>[roles/iam.serviceAccountUser](https://cloud.google.com/iam/docs/understanding-roles#iam.serviceAccountUser) <br>[roles/iap.httpsResourceAccessor](https://cloud.google.com/iam/docs/understanding-roles#iap.httpsResourceAccessor) <br>[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) <br>[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|
||||||
|
|<b>SERVICE_IDENTITY_cloudcomposer-accounts</b><br><small><i>serviceAccount</i></small>|[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|
||||||
|
|<b>SERVICE_IDENTITY_service-networking</b><br><small><i>serviceAccount</i></small>|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) <code>+</code>|
|
||||||
|<b>dev-data-load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|
|<b>dev-data-load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) <br>[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) |
|
||||||
|<b>dev-data-orc-cmp-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/composer.worker](https://cloud.google.com/iam/docs/understanding-roles#composer.worker) <br>[roles/iam.serviceAccountUser](https://cloud.google.com/iam/docs/understanding-roles#iam.serviceAccountUser) <br>[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|
|<b>dev-data-orc-cmp-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/composer.worker](https://cloud.google.com/iam/docs/understanding-roles#composer.worker) <br>[roles/iam.serviceAccountUser](https://cloud.google.com/iam/docs/understanding-roles#iam.serviceAccountUser) <br>[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|
||||||
|<b>dev-data-trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) |
|
|<b>dev-data-trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) |
|
||||||
|<b>service-36960036774</b><br><small><i>serviceAccount</i></small>|[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|
|
||||||
|
|
||||||
## Project <i>dev-data-trf-0</i>
|
## Project <i>dev-data-trf-0</i>
|
||||||
|
|
||||||
| members | roles |
|
| members | roles |
|
||||||
|---|---|
|
|---|---|
|
||||||
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) |
|
|<b>gcp-data-engineers</b><br><small><i>group</i></small>|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) <br>[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) |
|
||||||
|
|<b>SERVICE_IDENTITY_dataflow-service-producer-prod</b><br><small><i>serviceAccount</i></small>|[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|
||||||
|
|<b>SERVICE_IDENTITY_service-networking</b><br><small><i>serviceAccount</i></small>|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) <code>+</code>|
|
||||||
|<b>dev-data-orc-cmp-0</b><br><small><i>serviceAccount</i></small>|[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) |
|
|<b>dev-data-orc-cmp-0</b><br><small><i>serviceAccount</i></small>|[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) |
|
||||||
|<b>dev-data-trf-bq-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) |
|
|<b>dev-data-trf-bq-0</b><br><small><i>serviceAccount</i></small>|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) |
|
||||||
|<b>dev-data-trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/dataflow.worker](https://cloud.google.com/iam/docs/understanding-roles#dataflow.worker) <br>[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|
|<b>dev-data-trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/dataflow.worker](https://cloud.google.com/iam/docs/understanding-roles#dataflow.worker) <br>[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|
||||||
|<b>service-883871192228</b><br><small><i>serviceAccount</i></small>|[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) |
|
|
||||||
|
|
||||||
## Project <i>dev-net-spoke-0</i>
|
## Project <i>dev-net-spoke-0</i>
|
||||||
|
|
||||||
| members | roles |
|
| members | roles |
|
||||||
|---|---|
|
|---|---|
|
||||||
|<b>36960036774</b><br><small><i>serviceAccount</i></small>|[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) <code>+</code>|
|
|<b>PROJECT_CLOUD_SERVICES</b><br><small><i>serviceAccount</i></small>|[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) <code>+</code>|
|
||||||
|
|<b>SERVICE_IDENTITY_cloudcomposer-accounts</b><br><small><i>serviceAccount</i></small>|[roles/composer.sharedVpcAgent](https://cloud.google.com/iam/docs/understanding-roles#composer.sharedVpcAgent) <code>+</code>|
|
||||||
|
|<b>SERVICE_IDENTITY_container-engine-robot</b><br><small><i>serviceAccount</i></small>|[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) <code>+</code><br>[roles/container.hostServiceAgentUser](https://cloud.google.com/iam/docs/understanding-roles#container.hostServiceAgentUser) <code>+</code>|
|
||||||
|
|<b>SERVICE_IDENTITY_dataflow-service-producer-prod</b><br><small><i>serviceAccount</i></small>|[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) <code>+</code><br>[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) <code>+</code><br>[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) <code>+</code><br>[roles/container.hostServiceAgentUser](https://cloud.google.com/iam/docs/understanding-roles#container.hostServiceAgentUser) <code>+</code>|
|
||||||
|<b>dev-data-load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) <code>+</code>|
|
|<b>dev-data-load-df-0</b><br><small><i>serviceAccount</i></small>|[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) <code>+</code>|
|
||||||
|<b>dev-data-trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) <code>+</code>|
|
|<b>dev-data-trf-df-0</b><br><small><i>serviceAccount</i></small>|[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) <code>+</code>|
|
||||||
|<b>service-36960036774</b><br><small><i>serviceAccount</i></small>|[roles/composer.sharedVpcAgent](https://cloud.google.com/iam/docs/understanding-roles#composer.sharedVpcAgent) <code>+</code><br>[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) <code>+</code><br>[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) <code>+</code><br>[roles/container.hostServiceAgentUser](https://cloud.google.com/iam/docs/understanding-roles#container.hostServiceAgentUser) <code>+</code><br>[roles/container.hostServiceAgentUser](https://cloud.google.com/iam/docs/understanding-roles#container.hostServiceAgentUser) <code>+</code>|
|
|
||||||
|<b>service-426128559612</b><br><small><i>serviceAccount</i></small>|[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) <code>+</code>|
|
|
||||||
|<b>service-883871192228</b><br><small><i>serviceAccount</i></small>|[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) <code>+</code>|
|
|
||||||
|
|
|
@ -50,6 +50,19 @@ Cloud KMS crypto keys can be configured wither from the [FAST security stage](..
|
||||||
|
|
||||||
To configure the use of Cloud KMS on resources, you have to specify the key id on the `service_encryption_keys` variable. Key locations should match resource locations.
|
To configure the use of Cloud KMS on resources, you have to specify the key id on the `service_encryption_keys` variable. Key locations should match resource locations.
|
||||||
|
|
||||||
|
## Data Catalog
|
||||||
|
|
||||||
|
[Data Catalog](https://cloud.google.com/data-catalog) helps you to document your data entry at scale. Data Catalog relies on [tags](https://cloud.google.com/data-catalog/docs/tags-and-tag-templates#tags) and [tag template](https://cloud.google.com/data-catalog/docs/tags-and-tag-templates#tag-templates) to manage metadata for all data entries in a unified and centralized service. To implement [column-level security](https://cloud.google.com/bigquery/docs/column-level-security-intro) on BigQuery, we suggest to use `Tags` and `Tag templates`.
|
||||||
|
|
||||||
|
The default configuration will implement 3 tags:
|
||||||
|
- `3_Confidential`: policy tag for columns that include very sensitive information, such as credit card numbers.
|
||||||
|
- `2_Private`: policy tag for columns that include sensitive personal identifiable information (PII) information, such as a person's first name.
|
||||||
|
- `1_Sensitive`: policy tag for columns that include data that cannot be made public, such as the credit limit.
|
||||||
|
|
||||||
|
Anything that is not tagged is available to all users who have access to the data warehouse.
|
||||||
|
|
||||||
|
You can configure your tags and roles associated by configuring the `data_catalog_tags` variable. We suggest useing the "[Best practices for using policy tags in BigQuery](https://cloud.google.com/bigquery/docs/best-practices-policy-tags)" article as a guide to designing your tags structure and access pattern. By default, no groups has access to tagged data.
|
||||||
|
|
||||||
### VPC-SC
|
### VPC-SC
|
||||||
|
|
||||||
As is often the case in real-world configurations, [VPC-SC](https://cloud.google.com/vpc-service-controls) is needed to mitigate data exfiltration. VPC-SC can be configured from the [FAST security stage](../../02-security). This step is optional, but highly recomended, and depends on customer policies and security best practices.
|
As is often the case in real-world configurations, [VPC-SC](https://cloud.google.com/vpc-service-controls) is needed to mitigate data exfiltration. VPC-SC can be configured from the [FAST security stage](../../02-security). This step is optional, but highly recomended, and depends on customer policies and security best practices.
|
||||||
|
@ -116,6 +129,12 @@ terraform init
|
||||||
terraform apply
|
terraform apply
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Demo pipeline
|
||||||
|
|
||||||
|
The application layer is out of scope of this script. As a demo purpuse only, several Cloud Composer DAGs are provided. Demos will import data from the `landing` area to the `DataLake L2` dataset suing different features.
|
||||||
|
|
||||||
|
You can find examples in the `[demo](../../../../examples/data-solutions/data-platform-foundations/demo)` folder.
|
||||||
|
|
||||||
<!-- TFDOC OPTS files:1 show_extra:1 -->
|
<!-- TFDOC OPTS files:1 show_extra:1 -->
|
||||||
<!-- BEGIN TFDOC -->
|
<!-- BEGIN TFDOC -->
|
||||||
|
|
||||||
|
@ -132,20 +151,21 @@ terraform apply
|
||||||
| name | description | type | required | default | producer |
|
| name | description | type | required | default | producer |
|
||||||
|---|---|:---:|:---:|:---:|:---:|
|
|---|---|:---:|:---:|:---:|:---:|
|
||||||
| [billing_account](variables.tf#L17) | Billing account id and organization id ('nnnnnnnn' or null). | <code title="object({ id = string organization_id = number })">object({…})</code> | ✓ | | <code>00-globals</code> |
|
| [billing_account](variables.tf#L17) | Billing account id and organization id ('nnnnnnnn' or null). | <code title="object({ id = string organization_id = number })">object({…})</code> | ✓ | | <code>00-globals</code> |
|
||||||
| [folder_ids](variables.tf#L45) | Folder to be used for the networking resources in folders/nnnn format. | <code title="object({ data-platform = string })">object({…})</code> | ✓ | | <code>01-resman</code> |
|
| [folder_ids](variables.tf#L56) | Folder to be used for the networking resources in folders/nnnn format. | <code title="object({ data-platform = string })">object({…})</code> | ✓ | | <code>01-resman</code> |
|
||||||
| [host_project_ids](variables.tf#L63) | Shared VPC project ids. | <code title="object({ dev-spoke-0 = string })">object({…})</code> | ✓ | | <code>02-networking</code> |
|
| [host_project_ids](variables.tf#L74) | Shared VPC project ids. | <code title="object({ dev-spoke-0 = string })">object({…})</code> | ✓ | | <code>02-networking</code> |
|
||||||
| [organization](variables.tf#L89) | Organization details. | <code title="object({ domain = string id = number customer_id = string })">object({…})</code> | ✓ | | <code>00-globals</code> |
|
| [organization](variables.tf#L100) | Organization details. | <code title="object({ domain = string id = number customer_id = string })">object({…})</code> | ✓ | | <code>00-globals</code> |
|
||||||
| [prefix](variables.tf#L105) | Unique prefix used for resource names. Not used for projects if 'project_create' is null. | <code>string</code> | ✓ | | <code>00-globals</code> |
|
| [prefix](variables.tf#L116) | Unique prefix used for resource names. Not used for projects if 'project_create' is null. | <code>string</code> | ✓ | | <code>00-globals</code> |
|
||||||
| [composer_config](variables.tf#L26) | | <code title="object({ node_count = number airflow_version = string env_variables = map(string) })">object({…})</code> | | <code title="{ node_count = 3 airflow_version = "composer-1.17.5-airflow-2.1.4" env_variables = {} }">{…}</code> | |
|
| [composer_config](variables.tf#L26) | | <code title="object({ node_count = number airflow_version = string env_variables = map(string) })">object({…})</code> | | <code title="{ node_count = 3 airflow_version = "composer-1.17.5-airflow-2.1.4" env_variables = {} }">{…}</code> | |
|
||||||
| [data_force_destroy](variables.tf#L39) | Flag to set 'force_destroy' on data services like BigQery or Cloud Storage. | <code>bool</code> | | <code>false</code> | |
|
| [data_catalog_tags](variables.tf#L39) | List of Data Catalog Policy tags to be created with optional IAM binging configuration in {tag => {ROLE => [MEMBERS]}} format. | <code>map(map(list(string)))</code> | | <code title="{ "3_Confidential" = null "2_Private" = null "1_Sensitive" = null }">{…}</code> | |
|
||||||
| [groups](variables.tf#L53) | Groups. | <code>map(string)</code> | | <code title="{ data-analysts = "gcp-data-analysts" data-engineers = "gcp-data-engineers" data-security = "gcp-data-security" }">{…}</code> | |
|
| [data_force_destroy](variables.tf#L50) | Flag to set 'force_destroy' on data services like BigQery or Cloud Storage. | <code>bool</code> | | <code>false</code> | |
|
||||||
| [network_config_composer](variables.tf#L71) | Network configurations to use for Composer. | <code title="object({ cloudsql_range = string gke_master_range = string gke_pods_name = string gke_services_name = string web_server_range = string })">object({…})</code> | | <code title="{ cloudsql_range = "192.168.254.0/24" gke_master_range = "192.168.255.0/28" gke_pods_name = "pods" gke_services_name = "services" web_server_range = "192.168.255.16/28" }">{…}</code> | |
|
| [groups](variables.tf#L64) | Groups. | <code>map(string)</code> | | <code title="{ data-analysts = "gcp-data-analysts" data-engineers = "gcp-data-engineers" data-security = "gcp-data-security" }">{…}</code> | |
|
||||||
| [outputs_location](variables.tf#L99) | Path where providers, tfvars files, and lists for the following stages are written. Leave empty to disable. | <code>string</code> | | <code>null</code> | |
|
| [network_config_composer](variables.tf#L82) | Network configurations to use for Composer. | <code title="object({ cloudsql_range = string gke_master_range = string gke_pods_name = string gke_services_name = string web_server_range = string })">object({…})</code> | | <code title="{ cloudsql_range = "192.168.254.0/24" gke_master_range = "192.168.255.0/28" gke_pods_name = "pods" gke_services_name = "services" web_server_range = "192.168.255.16/28" }">{…}</code> | |
|
||||||
| [project_services](variables.tf#L111) | List of core services enabled on all projects. | <code>list(string)</code> | | <code title="[ "cloudresourcemanager.googleapis.com", "iam.googleapis.com", "serviceusage.googleapis.com", "stackdriver.googleapis.com" ]">[…]</code> | |
|
| [outputs_location](variables.tf#L110) | Path where providers, tfvars files, and lists for the following stages are written. Leave empty to disable. | <code>string</code> | | <code>null</code> | |
|
||||||
| [region](variables.tf#L122) | Region used for regional resources. | <code>string</code> | | <code>"europe-west1"</code> | |
|
| [project_services](variables.tf#L122) | List of core services enabled on all projects. | <code>list(string)</code> | | <code title="[ "cloudresourcemanager.googleapis.com", "iam.googleapis.com", "serviceusage.googleapis.com", "stackdriver.googleapis.com" ]">[…]</code> | |
|
||||||
| [service_encryption_keys](variables.tf#L128) | Cloud KMS to use to encrypt different services. Key location should match service region. | <code title="object({ bq = string composer = string dataflow = string storage = string pubsub = string })">object({…})</code> | | <code>null</code> | |
|
| [region](variables.tf#L133) | Region used for regional resources. | <code>string</code> | | <code>"europe-west1"</code> | |
|
||||||
| [subnet_self_links](variables.tf#L140) | Shared VPC subnet self links. | <code title="object({ dev-spoke-0 = map(string) })">object({…})</code> | | <code>null</code> | <code>02-networking</code> |
|
| [service_encryption_keys](variables.tf#L139) | Cloud KMS to use to encrypt different services. Key location should match service region. | <code title="object({ bq = string composer = string dataflow = string storage = string pubsub = string })">object({…})</code> | | <code>null</code> | |
|
||||||
| [vpc_self_links](variables.tf#L149) | Shared VPC self links. | <code title="object({ dev-spoke-0 = string })">object({…})</code> | | <code>null</code> | <code>02-networking</code> |
|
| [subnet_self_links](variables.tf#L151) | Shared VPC subnet self links. | <code title="object({ dev-spoke-0 = map(string) })">object({…})</code> | | <code>null</code> | <code>02-networking</code> |
|
||||||
|
| [vpc_self_links](variables.tf#L160) | Shared VPC self links. | <code title="object({ dev-spoke-0 = string })">object({…})</code> | | <code>null</code> | <code>02-networking</code> |
|
||||||
|
|
||||||
## Outputs
|
## Outputs
|
||||||
|
|
||||||
|
|
|
@ -21,6 +21,7 @@ module "data-platform" {
|
||||||
billing_account_id = var.billing_account.id
|
billing_account_id = var.billing_account.id
|
||||||
composer_config = var.composer_config
|
composer_config = var.composer_config
|
||||||
data_force_destroy = var.data_force_destroy
|
data_force_destroy = var.data_force_destroy
|
||||||
|
data_catalog_tags = var.data_catalog_tags
|
||||||
folder_id = var.folder_ids.data-platform
|
folder_id = var.folder_ids.data-platform
|
||||||
groups = var.groups
|
groups = var.groups
|
||||||
network_config = {
|
network_config = {
|
||||||
|
|
|
@ -36,6 +36,17 @@ variable "composer_config" {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
variable "data_catalog_tags" {
|
||||||
|
description = "List of Data Catalog Policy tags to be created with optional IAM binging configuration in {tag => {ROLE => [MEMBERS]}} format."
|
||||||
|
type = map(map(list(string)))
|
||||||
|
nullable = false
|
||||||
|
default = {
|
||||||
|
"3_Confidential" = null
|
||||||
|
"2_Private" = null
|
||||||
|
"1_Sensitive" = null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
variable "data_force_destroy" {
|
variable "data_force_destroy" {
|
||||||
description = "Flag to set 'force_destroy' on data services like BigQery or Cloud Storage."
|
description = "Flag to set 'force_destroy' on data services like BigQery or Cloud Storage."
|
||||||
type = bool
|
type = bool
|
||||||
|
|
|
@ -23,5 +23,5 @@ FIXTURES_DIR = os.path.join(os.path.dirname(__file__), 'fixture')
|
||||||
def test_resources(e2e_plan_runner):
|
def test_resources(e2e_plan_runner):
|
||||||
"Test that plan works and the numbers of resources is as expected."
|
"Test that plan works and the numbers of resources is as expected."
|
||||||
modules, resources = e2e_plan_runner(FIXTURES_DIR)
|
modules, resources = e2e_plan_runner(FIXTURES_DIR)
|
||||||
assert len(modules) == 40
|
assert len(modules) == 41
|
||||||
assert len(resources) == 296
|
assert len(resources) == 313
|
||||||
|
|
|
@ -65,10 +65,22 @@ def get_bindings(resources, prefix=None, folders=None):
|
||||||
member_type, _, member_id = member.partition(':')
|
member_type, _, member_id = member.partition(':')
|
||||||
if member_type == 'user':
|
if member_type == 'user':
|
||||||
continue
|
continue
|
||||||
member_id = member_id.rpartition('@')[0]
|
member_id, member_domain = member_id.split('@', 1)
|
||||||
|
# Handle Cloud Services Service Account
|
||||||
|
if member_domain == 'cloudservices.gserviceaccount.com':
|
||||||
|
member_id = "PROJECT_CLOUD_SERVICES"
|
||||||
|
# Handle Cloud Service Identity Service Acocunt
|
||||||
|
if re.match("^service-\d{8}", member_id):
|
||||||
|
member_id = "SERVICE_IDENTITY_" + member_domain.split(".", 1)[0]
|
||||||
|
# Handle BQ Cloud Service Identity Service Acocunt
|
||||||
|
if re.match("^bq-\d{8}", member_id):
|
||||||
|
member_id = "IDENTITY_" + member_domain.split(".", 1)[0]
|
||||||
|
resource_type_output = "Service Identity - " + resource_type
|
||||||
|
else:
|
||||||
|
resource_type_output = resource_type
|
||||||
if prefix and member_id.startswith(prefix):
|
if prefix and member_id.startswith(prefix):
|
||||||
member_id = member_id[len(prefix) + 1:]
|
member_id = member_id[len(prefix) + 1:]
|
||||||
yield Binding(authoritative, resource_type, resource_id, role,
|
yield Binding(authoritative, resource_type_output, resource_id, role,
|
||||||
member_type, member_id, conditions)
|
member_type, member_id, conditions)
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue