From d6fb5e4b62a8ecc101a7abc4549e3aa2d153aad0 Mon Sep 17 00:00:00 2001 From: Lorenzo Caggioni Date: Tue, 29 Mar 2022 12:06:45 +0200 Subject: [PATCH 01/15] policy tag --- .../data-platform-foundations/03-composer.tf | 1 + .../data-platform-foundations/06-common.tf | 15 +++++++++++++++ .../data-platform-foundations/variables.tf | 11 +++++++++++ 3 files changed, 27 insertions(+) diff --git a/examples/data-solutions/data-platform-foundations/03-composer.tf b/examples/data-solutions/data-platform-foundations/03-composer.tf index 231d0cc5..5a107534 100644 --- a/examples/data-solutions/data-platform-foundations/03-composer.tf +++ b/examples/data-solutions/data-platform-foundations/03-composer.tf @@ -67,6 +67,7 @@ resource "google_composer_environment" "orch-cmp-0" { env_variables = merge( var.composer_config.env_variables, { BQ_LOCATION = var.location + DATA_CAT_TAGS = try(jsonencode(module.common-datacatalog[0].tags), "{}") DF_KMS_KEY = try(var.service_encryption_keys.dataflow, "") DTL_L0_PRJ = module.lake-0-project.project_id DTL_L0_BQ_DATASET = module.lake-0-bq-0.dataset_id diff --git a/examples/data-solutions/data-platform-foundations/06-common.tf b/examples/data-solutions/data-platform-foundations/06-common.tf index cc18a46f..b17bbba6 100644 --- a/examples/data-solutions/data-platform-foundations/06-common.tf +++ b/examples/data-solutions/data-platform-foundations/06-common.tf @@ -42,6 +42,21 @@ module "common-project" { ]) } +# Data Catalog Policy tag + +module "common-datacatalog" { + count = var.data_catalog_tags != null ? 1 : 0 + source = "../../../modules/data-catalog-policy-tag" + name = "${var.prefix}-datacatalog-policy-tags" + project_id = module.common-project.project_id + location = var.location + tags = var.data_catalog_tags + iam = { + "roles/datacatalog.categoryAdmin" = [local.groups_iam.data-security] + "roles/datacatalog.categoryFineGrainedReader" = [local.groups_iam.data-analysts] + } +} + # To create KMS keys in the common projet: uncomment this section and assigne key links accondingly in local.service_encryption_keys variable # module "cmn-kms-0" { diff --git a/examples/data-solutions/data-platform-foundations/variables.tf b/examples/data-solutions/data-platform-foundations/variables.tf index 39a9cbfb..1fecd04d 100644 --- a/examples/data-solutions/data-platform-foundations/variables.tf +++ b/examples/data-solutions/data-platform-foundations/variables.tf @@ -33,6 +33,17 @@ variable "composer_config" { } } +variable "data_catalog_tags" { + description = "List of Data Catalog Policy tags to be created with optional IAM binging configuration in {tag => {ROLE => [MEMBERS]}} format." + type = map(map(list(string))) + nullable = false + default = { + high = null + medium = null + low = null + } +} + variable "data_force_destroy" { description = "Flag to set 'force_destroy' on data services like BiguQery or Cloud Storage." type = bool From 730d6d50b193ee8b0bb2bd75d5b3ba75822d235b Mon Sep 17 00:00:00 2001 From: Lorenzo Caggioni Date: Wed, 30 Mar 2022 17:53:48 +0200 Subject: [PATCH 02/15] Update README. --- .../data-platform-foundations/README.md | 13 +++++++++++++ .../data-platform-foundations/variables.tf | 6 +++--- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/examples/data-solutions/data-platform-foundations/README.md b/examples/data-solutions/data-platform-foundations/README.md index 8b243251..a776a405 100644 --- a/examples/data-solutions/data-platform-foundations/README.md +++ b/examples/data-solutions/data-platform-foundations/README.md @@ -154,6 +154,19 @@ Cloud Data Loss Prevention resources and templates should be stored in the secur You can find more details and best practices on using DLP to De-identification and re-identification of PII in large-scale datasets in the [GCP documentation](https://cloud.google.com/architecture/de-identification-re-identification-pii-using-cloud-dlp). +## Data Catalog + +[Data Catalog](https://cloud.google.com/data-catalog) helps you to document your data entry at scale. Data Catalog relies on [tags](https://cloud.google.com/data-catalog/docs/tags-and-tag-templates#tags) and [tag template](https://cloud.google.com/data-catalog/docs/tags-and-tag-templates#tag-templates) to manage metadata for all data entries in a unified and centralized service. To implement [column-level security](https://cloud.google.com/bigquery/docs/column-level-security-intro) on BigQuery, we suggest to use `Tags` and `Tag templates`. + +The default configuration will implement 3 tags: + - `3_Confidential`: policy tag for columns that include very sensitive information, such as credit card numbers. + - `2_Private`: policy tag for columns that include sensitive personal identifiable information (PII) information, such as a person's first name. + - `1_Sensitive`: policy tag for columns that include data that cannot be made public, such as the credit limit. + +Anything that is not tagged is available to all users who have access to the data warehouse. + +For the porpuse of the example no groups has access to tagged data. You can configure your tags and roles associated by configuring the `data_catalog_tags` variable. We suggest useing the "[Best practices for using policy tags in BigQuery](https://cloud.google.com/bigquery/docs/best-practices-policy-tags)" article as a guide to designing your tags structure and access pattern. + ## How to run this script To deploy this example on your GCP organization, you will need diff --git a/examples/data-solutions/data-platform-foundations/variables.tf b/examples/data-solutions/data-platform-foundations/variables.tf index 1fecd04d..79d1ae4b 100644 --- a/examples/data-solutions/data-platform-foundations/variables.tf +++ b/examples/data-solutions/data-platform-foundations/variables.tf @@ -38,9 +38,9 @@ variable "data_catalog_tags" { type = map(map(list(string))) nullable = false default = { - high = null - medium = null - low = null + "3_Confidential" = null + "2_Private" = null + "1_Sensitive" = null } } From 14a0fbf33655e5b6001b1e1867f14a4ebb658ea0 Mon Sep 17 00:00:00 2001 From: Lorenzo Caggioni Date: Thu, 31 Mar 2022 09:34:34 +0200 Subject: [PATCH 03/15] Update pipeline example. --- .../demo/datapipeline.py | 44 ++++++++++++++++--- 1 file changed, 39 insertions(+), 5 deletions(-) diff --git a/examples/data-solutions/data-platform-foundations/demo/datapipeline.py b/examples/data-solutions/data-platform-foundations/demo/datapipeline.py index fd633ebd..eb3de182 100644 --- a/examples/data-solutions/data-platform-foundations/demo/datapipeline.py +++ b/examples/data-solutions/data-platform-foundations/demo/datapipeline.py @@ -25,11 +25,11 @@ import os from airflow import models from airflow.contrib.operators.dataflow_operator import DataflowTemplateOperator from airflow.operators import dummy -from airflow.providers.google.cloud.operators.bigquery import BigQueryInsertJobOperator +from airflow.providers.google.cloud.operators.bigquery import BigQueryInsertJobOperator, BigQueryUpsertTableOperator # -------------------------------------------------------------------------------- -# Set variables -# ------------------------------------------------------------ +# Set variables - Needed for the DEMO +# -------------------------------------------------------------------------------- BQ_LOCATION = os.environ.get("BQ_LOCATION") DTL_L0_PRJ = os.environ.get("DTL_L0_PRJ") DTL_L0_BQ_DATASET = os.environ.get("DTL_L0_BQ_DATASET") @@ -114,6 +114,40 @@ with models.DAG( trigger_rule='all_success' ) + upsert_table_customers = BigQueryUpsertTableOperator( + task_id="upsert_table_customers", + project_id=DTL_L0_PRJ, + dataset_id=DTL_L0_BQ_DATASET, + impersonation_chain=[TRF_SA_DF], + table_resource={ + "tableReference": {"tableId": "customers"}, + "schema": { + "field": [ + { "mode": "REQUIRED", "name": "id", "type": "INTEGER", "description": "ID" }, + { "mode": "REQUIRED", "name": "name", "type": "STRING", "description": "Name" }, #, "policyTags": { "names": [] } }, + { "mode": "REQUIRED", "name": "surname", "type": "STRING", "description": "Surname" }, + { "mode": "REQUIRED", "name": "timestamp", "type": "TIMESTAMP", "description": "Timestamp" } + ] + }, + }, + ) + + upsert_table_purchasess = BigQueryUpsertTableOperator( + task_id="upsert_table_purchasess", + project_id=DTL_L0_PRJ, + dataset_id=DTL_L0_BQ_DATASET, + table_resource={ + "tableReference": {"tableId": "purchases"}, + "schema": [ + { "mode": "REQUIRED", "name": "id", "type": "INTEGER", "description": "ID" }, + { "mode": "REQUIRED", "name": "customer_id", "type": "INTEGER", "description": "ID" }, + { "mode": "REQUIRED", "name": "item", "type": "STRING", "description": "Item Name" }, + { "mode": "REQUIRED", "name": "price", "type": "FLOAT", "description": "Item Price" }, + { "mode": "REQUIRED", "name": "timestamp", "type": "TIMESTAMP", "description": "Timestamp" } + ] + }, + ) + customers_import = DataflowTemplateOperator( task_id="dataflow_customer_import", template="gs://dataflow-templates/latest/GCS_Text_to_BigQuery", @@ -200,5 +234,5 @@ with models.DAG( }, impersonation_chain=[TRF_SA_BQ] ) - - start >> [customers_import, purchases_import] >> join_customer_purchase >> l2_customer_purchase >> end + start >> upsert_table_customers >> end + # start >> [customers_import, purchases_import] >> join_customer_purchase >> l2_customer_purchase >> end From 1061c6ebc9a75891054599774f5a430996b293b3 Mon Sep 17 00:00:00 2001 From: Lorenzo Caggioni Date: Thu, 31 Mar 2022 11:14:14 +0200 Subject: [PATCH 04/15] Update DAG pipeline --- .../data-platform-foundations/demo/datapipeline.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/examples/data-solutions/data-platform-foundations/demo/datapipeline.py b/examples/data-solutions/data-platform-foundations/demo/datapipeline.py index eb3de182..5ddce024 100644 --- a/examples/data-solutions/data-platform-foundations/demo/datapipeline.py +++ b/examples/data-solutions/data-platform-foundations/demo/datapipeline.py @@ -19,6 +19,7 @@ import csv import datetime import io +import json import logging import os @@ -31,6 +32,7 @@ from airflow.providers.google.cloud.operators.bigquery import BigQueryInsertJob # Set variables - Needed for the DEMO # -------------------------------------------------------------------------------- BQ_LOCATION = os.environ.get("BQ_LOCATION") +DATA_CAT_TAGS = json.loads(os.environ.get("DATA_CAT_TAGS")) DTL_L0_PRJ = os.environ.get("DTL_L0_PRJ") DTL_L0_BQ_DATASET = os.environ.get("DTL_L0_BQ_DATASET") DTL_L0_GCS = os.environ.get("DTL_L0_GCS") From 3f933bb129b51d0f8a01bd1debf5343ba444b637 Mon Sep 17 00:00:00 2001 From: Lorenzo Caggioni Date: Fri, 1 Apr 2022 18:25:05 +0200 Subject: [PATCH 05/15] Update pipeline example and fix roles. --- .../data-platform-foundations/05-datalake.tf | 12 +- .../data-platform-foundations/06-common.tf | 19 ++- .../data-platform-foundations/demo/README.md | 2 +- .../demo/datapipeline.py | 150 +++++++++++++----- .../data-platform-foundations/outputs.tf | 2 +- .../data-platform-foundations/schema.json | 1 + 6 files changed, 142 insertions(+), 44 deletions(-) create mode 100644 examples/data-solutions/data-platform-foundations/schema.json diff --git a/examples/data-solutions/data-platform-foundations/05-datalake.tf b/examples/data-solutions/data-platform-foundations/05-datalake.tf index 64ec1b24..b163f9e5 100644 --- a/examples/data-solutions/data-platform-foundations/05-datalake.tf +++ b/examples/data-solutions/data-platform-foundations/05-datalake.tf @@ -23,6 +23,7 @@ locals { (local.groups.data-analysts) = [ "roles/bigquery.dataViewer", "roles/bigquery.jobUser", + "roles/bigquery.metadataViewer", "roles/bigquery.user", "roles/datacatalog.viewer", "roles/datacatalog.tagTemplateViewer", @@ -37,6 +38,7 @@ locals { (local.groups.data-analysts) = [ "roles/bigquery.dataEditor", "roles/bigquery.jobUser", + "roles/bigquery.metadataViewer", "roles/bigquery.user", "roles/datacatalog.viewer", "roles/datacatalog.tagTemplateViewer", @@ -44,7 +46,7 @@ locals { ] } lake_0_iam = { - "roles/bigquery.dataEditor" = [ + "roles/bigquery.dataOwner" = [ module.load-sa-df-0.iam_email, module.transf-sa-df-0.iam_email, module.transf-sa-bq-0.iam_email, @@ -52,18 +54,24 @@ locals { "roles/bigquery.jobUser" = [ module.load-sa-df-0.iam_email, ] + "roles/datacatalog.categoryAdmin" = [ + module.transf-sa-bq-0.iam_email + ] "roles/storage.objectCreator" = [ module.load-sa-df-0.iam_email, ] } lake_iam = { - "roles/bigquery.dataEditor" = [ + "roles/bigquery.dataOwner" = [ module.transf-sa-df-0.iam_email, module.transf-sa-bq-0.iam_email, ] "roles/bigquery.jobUser" = [ module.transf-sa-bq-0.iam_email, ] + "roles/datacatalog.categoryAdmin" = [ + module.load-sa-df-0.iam_email + ] "roles/storage.objectCreator" = [ module.transf-sa-df-0.iam_email, ] diff --git a/examples/data-solutions/data-platform-foundations/06-common.tf b/examples/data-solutions/data-platform-foundations/06-common.tf index b17bbba6..b04e2f87 100644 --- a/examples/data-solutions/data-platform-foundations/06-common.tf +++ b/examples/data-solutions/data-platform-foundations/06-common.tf @@ -21,6 +21,9 @@ module "common-project" { prefix = var.prefix name = "cmn${local.project_suffix}" group_iam = { + (local.groups.data-analysts) = [ + "roles/datacatalog.viewer", + ] (local.groups.data-engineers) = [ "roles/dlp.reader", "roles/dlp.user", @@ -28,6 +31,7 @@ module "common-project" { ] (local.groups.data-security) = [ "roles/dlp.admin", + "roles/datacatalog.admin" ] } iam = { @@ -35,6 +39,17 @@ module "common-project" { module.load-sa-df-0.iam_email, module.transf-sa-df-0.iam_email ] + "roles/datacatalog.viewer" = [ + module.load-sa-df-0.iam_email, + module.transf-sa-df-0.iam_email, + module.transf-sa-bq-0.iam_email + ] + "roles/datacatalog.categoryFineGrainedReader" = [ + module.transf-sa-df-0.iam_email, + module.transf-sa-bq-0.iam_email, + # Uncomment if you want to grant access to `data-analyst` to all columns tagged. + # local.groups_iam.data-analysts + ] } services = concat(var.project_services, [ "datacatalog.googleapis.com", @@ -51,10 +66,6 @@ module "common-datacatalog" { project_id = module.common-project.project_id location = var.location tags = var.data_catalog_tags - iam = { - "roles/datacatalog.categoryAdmin" = [local.groups_iam.data-security] - "roles/datacatalog.categoryFineGrainedReader" = [local.groups_iam.data-analysts] - } } # To create KMS keys in the common projet: uncomment this section and assigne key links accondingly in local.service_encryption_keys variable diff --git a/examples/data-solutions/data-platform-foundations/demo/README.md b/examples/data-solutions/data-platform-foundations/demo/README.md index 78297f7a..2cac9a9a 100644 --- a/examples/data-solutions/data-platform-foundations/demo/README.md +++ b/examples/data-solutions/data-platform-foundations/demo/README.md @@ -1,3 +1,3 @@ # Data ingestion Demo -In this folder you can find an example to ingest data on the `data platfoem` instantiated in [here](../). See details in the [README.m](../#demo-pipeline) to run the demo. \ No newline at end of file +In this folder you can find an example to ingest data on the `data platform` instantiated in [here](../). See details in the [README.m](../#demo-pipeline) to run the demo. \ No newline at end of file diff --git a/examples/data-solutions/data-platform-foundations/demo/datapipeline.py b/examples/data-solutions/data-platform-foundations/demo/datapipeline.py index 5ddce024..6a1a06be 100644 --- a/examples/data-solutions/data-platform-foundations/demo/datapipeline.py +++ b/examples/data-solutions/data-platform-foundations/demo/datapipeline.py @@ -24,9 +24,10 @@ import logging import os from airflow import models -from airflow.contrib.operators.dataflow_operator import DataflowTemplateOperator +from airflow.providers.google.cloud.operators.dataflow import DataflowTemplatedJobStartOperator from airflow.operators import dummy -from airflow.providers.google.cloud.operators.bigquery import BigQueryInsertJobOperator, BigQueryUpsertTableOperator +from airflow.providers.google.cloud.operators.bigquery import BigQueryInsertJobOperator, BigQueryUpsertTableOperator, BigQueryUpdateTableSchemaOperator +from airflow.utils.task_group import TaskGroup # -------------------------------------------------------------------------------- # Set variables - Needed for the DEMO @@ -86,7 +87,6 @@ default_args = { 'retries': 1, 'retry_delay': datetime.timedelta(minutes=5), 'dataflow_default_options': { - 'project': LOD_PRJ, 'location': DF_REGION, 'zone': DF_ZONE, 'stagingLocation': LOD_GCS_STAGING, @@ -116,43 +116,120 @@ with models.DAG( trigger_rule='all_success' ) - upsert_table_customers = BigQueryUpsertTableOperator( + with TaskGroup('upsert_table') as upsert_table: + upsert_table_customers = BigQueryUpsertTableOperator( task_id="upsert_table_customers", project_id=DTL_L0_PRJ, dataset_id=DTL_L0_BQ_DATASET, impersonation_chain=[TRF_SA_DF], table_resource={ "tableReference": {"tableId": "customers"}, - "schema": { - "field": [ - { "mode": "REQUIRED", "name": "id", "type": "INTEGER", "description": "ID" }, - { "mode": "REQUIRED", "name": "name", "type": "STRING", "description": "Name" }, #, "policyTags": { "names": [] } }, - { "mode": "REQUIRED", "name": "surname", "type": "STRING", "description": "Surname" }, - { "mode": "REQUIRED", "name": "timestamp", "type": "TIMESTAMP", "description": "Timestamp" } - ] - }, }, - ) + ) - upsert_table_purchasess = BigQueryUpsertTableOperator( - task_id="upsert_table_purchasess", + upsert_table_purchases = BigQueryUpsertTableOperator( + task_id="upsert_table_purchases", project_id=DTL_L0_PRJ, dataset_id=DTL_L0_BQ_DATASET, + impersonation_chain=[TRF_SA_BQ], table_resource={ - "tableReference": {"tableId": "purchases"}, - "schema": [ - { "mode": "REQUIRED", "name": "id", "type": "INTEGER", "description": "ID" }, - { "mode": "REQUIRED", "name": "customer_id", "type": "INTEGER", "description": "ID" }, - { "mode": "REQUIRED", "name": "item", "type": "STRING", "description": "Item Name" }, - { "mode": "REQUIRED", "name": "price", "type": "FLOAT", "description": "Item Price" }, - { "mode": "REQUIRED", "name": "timestamp", "type": "TIMESTAMP", "description": "Timestamp" } - ] + "tableReference": {"tableId": "purchases"} }, - ) + ) - customers_import = DataflowTemplateOperator( - task_id="dataflow_customer_import", + upsert_table_customer_purchase_l1 = BigQueryUpsertTableOperator( + task_id="upsert_table_customer_purchase_l1", + project_id=DTL_L1_PRJ, + dataset_id=DTL_L1_BQ_DATASET, + impersonation_chain=[TRF_SA_BQ], + table_resource={ + "tableReference": {"tableId": "customer_purchase"} + }, + ) + + upsert_table_customer_purchase_l2 = BigQueryUpsertTableOperator( + task_id="upsert_table_customer_purchase_l2", + project_id=DTL_L2_PRJ, + dataset_id=DTL_L2_BQ_DATASET, + impersonation_chain=[TRF_SA_BQ], + table_resource={ + "tableReference": {"tableId": "customer_purchase"} + }, + ) + + with TaskGroup('update_schema_table') as update_schema_table: + update_table_schema_customers = BigQueryUpdateTableSchemaOperator( + task_id="update_table_schema_customers", + project_id=DTL_L0_PRJ, + dataset_id=DTL_L0_BQ_DATASET, + table_id="customers", + impersonation_chain=[TRF_SA_BQ], + include_policy_tags=True, + schema_fields_updates=[ + { "mode": "REQUIRED", "name": "id", "type": "INTEGER", "description": "ID" }, + { "mode": "REQUIRED", "name": "name", "type": "STRING", "description": "Name", "policyTags": { "names": [DATA_CAT_TAGS.get('2_Private', None)]}}, + { "mode": "REQUIRED", "name": "surname", "type": "STRING", "description": "Surname", "policyTags": { "names": [DATA_CAT_TAGS.get('2_Private', None)]} }, + { "mode": "REQUIRED", "name": "timestamp", "type": "TIMESTAMP", "description": "Timestamp" } + ] + ) + + update_table_schema_customers = BigQueryUpdateTableSchemaOperator( + task_id="update_table_schema_purchases", + project_id=DTL_L0_PRJ, + dataset_id=DTL_L0_BQ_DATASET, + table_id="purchases", + impersonation_chain=[TRF_SA_BQ], + include_policy_tags=True, + schema_fields_updates=[ + { "mode": "REQUIRED", "name": "id", "type": "INTEGER", "description": "ID" }, + { "mode": "REQUIRED", "name": "customer_id", "type": "INTEGER", "description": "ID" }, + { "mode": "REQUIRED", "name": "item", "type": "STRING", "description": "Item Name" }, + { "mode": "REQUIRED", "name": "price", "type": "FLOAT", "description": "Item Price" }, + { "mode": "REQUIRED", "name": "timestamp", "type": "TIMESTAMP", "description": "Timestamp" } + ] + ) + + update_table_schema_customer_purchase_l1 = BigQueryUpdateTableSchemaOperator( + task_id="update_table_schema_customer_purchase_l1", + project_id=DTL_L1_PRJ, + dataset_id=DTL_L1_BQ_DATASET, + table_id="customer_purchase", + impersonation_chain=[TRF_SA_BQ], + include_policy_tags=True, + schema_fields_updates=[ + { "mode": "REQUIRED", "name": "customer_id", "type": "INTEGER", "description": "ID" }, + { "mode": "REQUIRED", "name": "purchase_id", "type": "INTEGER", "description": "ID" }, + { "mode": "REQUIRED", "name": "name", "type": "STRING", "description": "Name", "policyTags": { "names": [DATA_CAT_TAGS.get('2_Private', None)]}}, + { "mode": "REQUIRED", "name": "surname", "type": "STRING", "description": "Surname", "policyTags": { "names": [DATA_CAT_TAGS.get('2_Private', None)]} }, + { "mode": "REQUIRED", "name": "item", "type": "STRING", "description": "Item Name" }, + { "mode": "REQUIRED", "name": "price", "type": "FLOAT", "description": "Item Price" }, + { "mode": "REQUIRED", "name": "timestamp", "type": "TIMESTAMP", "description": "Timestamp" } + ] + ) + + update_table_schema_customer_purchase_l2 = BigQueryUpdateTableSchemaOperator( + task_id="update_table_schema_customer_purchase_l2", + project_id=DTL_L2_PRJ, + dataset_id=DTL_L2_BQ_DATASET, + table_id="customer_purchase", + impersonation_chain=[TRF_SA_BQ], + include_policy_tags=True, + schema_fields_updates=[ + { "mode": "REQUIRED", "name": "customer_id", "type": "INTEGER", "description": "ID" }, + { "mode": "REQUIRED", "name": "purchase_id", "type": "INTEGER", "description": "ID" }, + { "mode": "REQUIRED", "name": "name", "type": "STRING", "description": "Name", "policyTags": { "names": [DATA_CAT_TAGS.get('2_Private', None)]}}, + { "mode": "REQUIRED", "name": "surname", "type": "STRING", "description": "Surname", "policyTags": { "names": [DATA_CAT_TAGS.get('2_Private', None)]} }, + { "mode": "REQUIRED", "name": "item", "type": "STRING", "description": "Item Name" }, + { "mode": "REQUIRED", "name": "price", "type": "FLOAT", "description": "Item Price" }, + { "mode": "REQUIRED", "name": "timestamp", "type": "TIMESTAMP", "description": "Timestamp" } + ] + ) + + customers_import = DataflowTemplatedJobStartOperator( + task_id="dataflow_customers_import", template="gs://dataflow-templates/latest/GCS_Text_to_BigQuery", + project_id=LOD_PRJ, + location=DF_REGION, parameters={ "javascriptTextTransformFunctionName": "transform", "JSONPath": ORC_GCS + "/customers_schema.json", @@ -163,9 +240,11 @@ with models.DAG( }, ) - purchases_import = DataflowTemplateOperator( + purchases_import = DataflowTemplatedJobStartOperator( task_id="dataflow_purchases_import", template="gs://dataflow-templates/latest/GCS_Text_to_BigQuery", + project_id=LOD_PRJ, + location=DF_REGION, parameters={ "javascriptTextTransformFunctionName": "transform", "JSONPath": ORC_GCS + "/purchases_schema.json", @@ -216,13 +295,13 @@ with models.DAG( 'jobType':'QUERY', 'query':{ 'query':"""SELECT - customer_id, - purchase_id, - name, - surname, - item, - price, - timestamp + customer_id, + purchase_id, + name, + surname, + item, + price, + timestamp FROM `{dtl_1_prj}.{dtl_1_dataset}.customer_purchase` """.format(dtl_1_prj=DTL_L1_PRJ, dtl_1_dataset=DTL_L1_BQ_DATASET, ), 'destinationTable':{ @@ -236,5 +315,4 @@ with models.DAG( }, impersonation_chain=[TRF_SA_BQ] ) - start >> upsert_table_customers >> end - # start >> [customers_import, purchases_import] >> join_customer_purchase >> l2_customer_purchase >> end + start >> upsert_table >> update_schema_table >> [customers_import, purchases_import] >> join_customer_purchase >> l2_customer_purchase >> end diff --git a/examples/data-solutions/data-platform-foundations/outputs.tf b/examples/data-solutions/data-platform-foundations/outputs.tf index e5a2de3e..32e98fc6 100644 --- a/examples/data-solutions/data-platform-foundations/outputs.tf +++ b/examples/data-solutions/data-platform-foundations/outputs.tf @@ -98,7 +98,7 @@ output "demo_commands" { 03 = "gsutil -i ${module.orch-sa-cmp-0.email} cp demo/*.py ${google_composer_environment.orch-cmp-0.config[0].dag_gcs_prefix}/" 04 = "Open ${google_composer_environment.orch-cmp-0.config.0.airflow_uri} and run uploaded DAG." 05 = < Date: Fri, 1 Apr 2022 18:38:56 +0200 Subject: [PATCH 06/15] Update FAST data foundation integration --- fast/stages/03-data-platform/dev/README.md | 13 +++++++++++++ fast/stages/03-data-platform/dev/main.tf | 1 + fast/stages/03-data-platform/dev/variables.tf | 11 +++++++++++ 3 files changed, 25 insertions(+) diff --git a/fast/stages/03-data-platform/dev/README.md b/fast/stages/03-data-platform/dev/README.md index 19adb068..57010a9a 100644 --- a/fast/stages/03-data-platform/dev/README.md +++ b/fast/stages/03-data-platform/dev/README.md @@ -50,6 +50,19 @@ Cloud KMS crypto keys can be configured wither from the [FAST security stage](.. To configure the use of Cloud KMS on resources, you have to specify the key id on the `service_encryption_keys` variable. Key locations should match resource locations. +## Data Catalog + +[Data Catalog](https://cloud.google.com/data-catalog) helps you to document your data entry at scale. Data Catalog relies on [tags](https://cloud.google.com/data-catalog/docs/tags-and-tag-templates#tags) and [tag template](https://cloud.google.com/data-catalog/docs/tags-and-tag-templates#tag-templates) to manage metadata for all data entries in a unified and centralized service. To implement [column-level security](https://cloud.google.com/bigquery/docs/column-level-security-intro) on BigQuery, we suggest to use `Tags` and `Tag templates`. + +The default configuration will implement 3 tags: + - `3_Confidential`: policy tag for columns that include very sensitive information, such as credit card numbers. + - `2_Private`: policy tag for columns that include sensitive personal identifiable information (PII) information, such as a person's first name. + - `1_Sensitive`: policy tag for columns that include data that cannot be made public, such as the credit limit. + +Anything that is not tagged is available to all users who have access to the data warehouse. + +You can configure your tags and roles associated by configuring the `data_catalog_tags` variable. We suggest useing the "[Best practices for using policy tags in BigQuery](https://cloud.google.com/bigquery/docs/best-practices-policy-tags)" article as a guide to designing your tags structure and access pattern. By default, no groups has access to tagged data. + ### VPC-SC As is often the case in real-world configurations, [VPC-SC](https://cloud.google.com/vpc-service-controls) is needed to mitigate data exfiltration. VPC-SC can be configured from the [FAST security stage](../../02-security). This step is optional, but highly recomended, and depends on customer policies and security best practices. diff --git a/fast/stages/03-data-platform/dev/main.tf b/fast/stages/03-data-platform/dev/main.tf index c10380da..536e1873 100644 --- a/fast/stages/03-data-platform/dev/main.tf +++ b/fast/stages/03-data-platform/dev/main.tf @@ -21,6 +21,7 @@ module "data-platform" { billing_account_id = var.billing_account.id composer_config = var.composer_config data_force_destroy = var.data_force_destroy + data_catalog_tags = var.data_catalog_tags folder_id = var.folder_ids.data-platform groups = var.groups network_config = { diff --git a/fast/stages/03-data-platform/dev/variables.tf b/fast/stages/03-data-platform/dev/variables.tf index 1f65cf77..3b1645e4 100644 --- a/fast/stages/03-data-platform/dev/variables.tf +++ b/fast/stages/03-data-platform/dev/variables.tf @@ -36,6 +36,17 @@ variable "composer_config" { } } +variable "data_catalog_tags" { + description = "List of Data Catalog Policy tags to be created with optional IAM binging configuration in {tag => {ROLE => [MEMBERS]}} format." + type = map(map(list(string))) + nullable = false + default = { + "3_Confidential" = null + "2_Private" = null + "1_Sensitive" = null + } +} + variable "data_force_destroy" { description = "Flag to set 'force_destroy' on data services like BigQery or Cloud Storage." type = bool From 15ba9559c5ff61d9d377e4f75d79d252ae3edf20 Mon Sep 17 00:00:00 2001 From: Lorenzo Caggioni Date: Fri, 1 Apr 2022 18:44:18 +0200 Subject: [PATCH 07/15] Update READMEs --- .../data-platform-foundations/README.md | 21 ++++++++------- fast/stages/03-data-platform/dev/README.md | 27 ++++++++++--------- 2 files changed, 25 insertions(+), 23 deletions(-) diff --git a/examples/data-solutions/data-platform-foundations/README.md b/examples/data-solutions/data-platform-foundations/README.md index a776a405..feaaa02b 100644 --- a/examples/data-solutions/data-platform-foundations/README.md +++ b/examples/data-solutions/data-platform-foundations/README.md @@ -238,17 +238,18 @@ Description of commands: | name | description | type | required | default | |---|---|:---:|:---:|:---:| | [billing_account_id](variables.tf#L17) | Billing account id. | string | ✓ | | -| [folder_id](variables.tf#L42) | Folder to be used for the networking resources in folders/nnnn format. | string | ✓ | | -| [organization_domain](variables.tf#L87) | Organization domain. | string | ✓ | | -| [prefix](variables.tf#L92) | Unique prefix used for resource names. | string | ✓ | | +| [folder_id](variables.tf#L53) | Folder to be used for the networking resources in folders/nnnn format. | string | ✓ | | +| [organization_domain](variables.tf#L98) | Organization domain. | string | ✓ | | +| [prefix](variables.tf#L103) | Unique prefix used for resource names. | string | ✓ | | | [composer_config](variables.tf#L22) | Cloud Composer config. | object({…}) | | {…} | -| [data_force_destroy](variables.tf#L36) | Flag to set 'force_destroy' on data services like BiguQery or Cloud Storage. | bool | | false | -| [groups](variables.tf#L53) | User groups. | map(string) | | {…} | -| [location](variables.tf#L47) | Location used for multi-regional resources. | string | | "eu" | -| [network_config](variables.tf#L63) | Shared VPC network configurations to use. If null networks will be created in projects with preconfigured values. | object({…}) | | null | -| [project_services](variables.tf#L97) | List of core services enabled on all projects. | list(string) | | […] | -| [project_suffix](variables.tf#L108) | Suffix used only for project ids. | string | | null | -| [region](variables.tf#L114) | Region used for regional resources. | string | | "europe-west1" | +| [data_catalog_tags](variables.tf#L36) | List of Data Catalog Policy tags to be created with optional IAM binging configuration in {tag => {ROLE => [MEMBERS]}} format. | map(map(list(string))) | | {…} | +| [data_force_destroy](variables.tf#L47) | Flag to set 'force_destroy' on data services like BiguQery or Cloud Storage. | bool | | false | +| [groups](variables.tf#L64) | User groups. | map(string) | | {…} | +| [location](variables.tf#L58) | Location used for multi-regional resources. | string | | "eu" | +| [network_config](variables.tf#L74) | Shared VPC network configurations to use. If null networks will be created in projects with preconfigured values. | object({…}) | | null | +| [project_services](variables.tf#L108) | List of core services enabled on all projects. | list(string) | | […] | +| [project_suffix](variables.tf#L119) | Suffix used only for project ids. | string | | null | +| [region](variables.tf#L125) | Region used for regional resources. | string | | "europe-west1" | ## Outputs diff --git a/fast/stages/03-data-platform/dev/README.md b/fast/stages/03-data-platform/dev/README.md index 57010a9a..1d833a2a 100644 --- a/fast/stages/03-data-platform/dev/README.md +++ b/fast/stages/03-data-platform/dev/README.md @@ -145,20 +145,21 @@ terraform apply | name | description | type | required | default | producer | |---|---|:---:|:---:|:---:|:---:| | [billing_account](variables.tf#L17) | Billing account id and organization id ('nnnnnnnn' or null). | object({…}) | ✓ | | 00-globals | -| [folder_ids](variables.tf#L45) | Folder to be used for the networking resources in folders/nnnn format. | object({…}) | ✓ | | 01-resman | -| [host_project_ids](variables.tf#L63) | Shared VPC project ids. | object({…}) | ✓ | | 02-networking | -| [organization](variables.tf#L89) | Organization details. | object({…}) | ✓ | | 00-globals | -| [prefix](variables.tf#L105) | Unique prefix used for resource names. Not used for projects if 'project_create' is null. | string | ✓ | | 00-globals | +| [folder_ids](variables.tf#L56) | Folder to be used for the networking resources in folders/nnnn format. | object({…}) | ✓ | | 01-resman | +| [host_project_ids](variables.tf#L74) | Shared VPC project ids. | object({…}) | ✓ | | 02-networking | +| [organization](variables.tf#L100) | Organization details. | object({…}) | ✓ | | 00-globals | +| [prefix](variables.tf#L116) | Unique prefix used for resource names. Not used for projects if 'project_create' is null. | string | ✓ | | 00-globals | | [composer_config](variables.tf#L26) | | object({…}) | | {…} | | -| [data_force_destroy](variables.tf#L39) | Flag to set 'force_destroy' on data services like BigQery or Cloud Storage. | bool | | false | | -| [groups](variables.tf#L53) | Groups. | map(string) | | {…} | | -| [network_config_composer](variables.tf#L71) | Network configurations to use for Composer. | object({…}) | | {…} | | -| [outputs_location](variables.tf#L99) | Path where providers, tfvars files, and lists for the following stages are written. Leave empty to disable. | string | | null | | -| [project_services](variables.tf#L111) | List of core services enabled on all projects. | list(string) | | […] | | -| [region](variables.tf#L122) | Region used for regional resources. | string | | "europe-west1" | | -| [service_encryption_keys](variables.tf#L128) | Cloud KMS to use to encrypt different services. Key location should match service region. | object({…}) | | null | | -| [subnet_self_links](variables.tf#L140) | Shared VPC subnet self links. | object({…}) | | null | 02-networking | -| [vpc_self_links](variables.tf#L149) | Shared VPC self links. | object({…}) | | null | 02-networking | +| [data_catalog_tags](variables.tf#L39) | List of Data Catalog Policy tags to be created with optional IAM binging configuration in {tag => {ROLE => [MEMBERS]}} format. | map(map(list(string))) | | {…} | | +| [data_force_destroy](variables.tf#L50) | Flag to set 'force_destroy' on data services like BigQery or Cloud Storage. | bool | | false | | +| [groups](variables.tf#L64) | Groups. | map(string) | | {…} | | +| [network_config_composer](variables.tf#L82) | Network configurations to use for Composer. | object({…}) | | {…} | | +| [outputs_location](variables.tf#L110) | Path where providers, tfvars files, and lists for the following stages are written. Leave empty to disable. | string | | null | | +| [project_services](variables.tf#L122) | List of core services enabled on all projects. | list(string) | | […] | | +| [region](variables.tf#L133) | Region used for regional resources. | string | | "europe-west1" | | +| [service_encryption_keys](variables.tf#L139) | Cloud KMS to use to encrypt different services. Key location should match service region. | object({…}) | | null | | +| [subnet_self_links](variables.tf#L151) | Shared VPC subnet self links. | object({…}) | | null | 02-networking | +| [vpc_self_links](variables.tf#L160) | Shared VPC self links. | object({…}) | | null | 02-networking | ## Outputs From 98a9ba2c8250058383ef452b1374040431d9f619 Mon Sep 17 00:00:00 2001 From: Lorenzo Caggioni Date: Fri, 1 Apr 2022 18:46:51 +0200 Subject: [PATCH 08/15] Fix example test --- .../data_solutions/data_platform_foundations/test_plan.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/examples/data_solutions/data_platform_foundations/test_plan.py b/tests/examples/data_solutions/data_platform_foundations/test_plan.py index 4857bf9f..01eac0a3 100644 --- a/tests/examples/data_solutions/data_platform_foundations/test_plan.py +++ b/tests/examples/data_solutions/data_platform_foundations/test_plan.py @@ -23,5 +23,5 @@ FIXTURES_DIR = os.path.join(os.path.dirname(__file__), 'fixture') def test_resources(e2e_plan_runner): "Test that plan works and the numbers of resources is as expected." modules, resources = e2e_plan_runner(FIXTURES_DIR) - assert len(modules) == 40 + assert len(modules) == 41 assert len(resources) == 296 From 3d8c9a4b529fdffff1c351af3747a29468e59364 Mon Sep 17 00:00:00 2001 From: Lorenzo Caggioni Date: Fri, 1 Apr 2022 18:52:30 +0200 Subject: [PATCH 09/15] Fix test. --- .../data_solutions/data_platform_foundations/test_plan.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/examples/data_solutions/data_platform_foundations/test_plan.py b/tests/examples/data_solutions/data_platform_foundations/test_plan.py index 01eac0a3..e5db6ffc 100644 --- a/tests/examples/data_solutions/data_platform_foundations/test_plan.py +++ b/tests/examples/data_solutions/data_platform_foundations/test_plan.py @@ -24,4 +24,4 @@ def test_resources(e2e_plan_runner): "Test that plan works and the numbers of resources is as expected." modules, resources = e2e_plan_runner(FIXTURES_DIR) assert len(modules) == 41 - assert len(resources) == 296 + assert len(resources) == 313 From d5bdc2a7e0e07ac0ba5a3286d36c47c3d8b756a6 Mon Sep 17 00:00:00 2001 From: Lorenzo Caggioni Date: Sun, 3 Apr 2022 14:59:00 +0200 Subject: [PATCH 10/15] Update IAM.md --- .../data-platform-foundations/IAM.md | 41 ++++++++++++------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/examples/data-solutions/data-platform-foundations/IAM.md b/examples/data-solutions/data-platform-foundations/IAM.md index aed1c405..e39bb8b2 100644 --- a/examples/data-solutions/data-platform-foundations/IAM.md +++ b/examples/data-solutions/data-platform-foundations/IAM.md @@ -6,45 +6,53 @@ Legend: + additive, conditional. | members | roles | |---|---| +|gcp-data-analysts
group|[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) | |gcp-data-engineers
group|[roles/dlp.estimatesAdmin](https://cloud.google.com/iam/docs/understanding-roles#dlp.estimatesAdmin)
[roles/dlp.reader](https://cloud.google.com/iam/docs/understanding-roles#dlp.reader)
[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) | -|gcp-data-security
group|[roles/dlp.admin](https://cloud.google.com/iam/docs/understanding-roles#dlp.admin) | -|load-df-0
serviceAccount|[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) | -|trf-df-0
serviceAccount|[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) | +|gcp-data-security
group|[roles/datacatalog.admin](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.admin)
[roles/dlp.admin](https://cloud.google.com/iam/docs/understanding-roles#dlp.admin) | +|load-df-0
serviceAccount|[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer)
[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) | +|trf-bq-0
serviceAccount|[roles/datacatalog.categoryFineGrainedReader](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryFineGrainedReader)
[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) | +|trf-df-0
serviceAccount|[roles/datacatalog.categoryFineGrainedReader](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryFineGrainedReader)
[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer)
[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) | ## Project dtl-0 | members | roles | |---|---| -|gcp-data-analysts
group|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user)
[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer)
[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) | +|gcp-data-analysts
group|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/bigquery.metadataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.metadataViewer)
[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user)
[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer)
[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) | |gcp-data-engineers
group|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) | -|load-df-0
serviceAccount|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator) | -|trf-bq-0
serviceAccount|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) | -|trf-df-0
serviceAccount|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) | +|load-df-0
serviceAccount|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator) | +|service-390266833555
serviceAccount|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) +| +|trf-bq-0
serviceAccount|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner)
[roles/datacatalog.categoryAdmin](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryAdmin) | +|trf-df-0
serviceAccount|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) | ## Project dtl-1 | members | roles | |---|---| -|gcp-data-analysts
group|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user)
[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer)
[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) | +|gcp-data-analysts
group|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/bigquery.metadataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.metadataViewer)
[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user)
[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer)
[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) | |gcp-data-engineers
group|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) | -|trf-bq-0
serviceAccount|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) | -|trf-df-0
serviceAccount|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) | +|load-df-0
serviceAccount|[roles/datacatalog.categoryAdmin](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryAdmin) | +|service-914571197251
serviceAccount|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) +| +|trf-bq-0
serviceAccount|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) | +|trf-df-0
serviceAccount|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner)
[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) | ## Project dtl-2 | members | roles | |---|---| -|gcp-data-analysts
group|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user)
[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer)
[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) | +|gcp-data-analysts
group|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/bigquery.metadataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.metadataViewer)
[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user)
[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer)
[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) | |gcp-data-engineers
group|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) | -|trf-bq-0
serviceAccount|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) | -|trf-df-0
serviceAccount|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) | +|load-df-0
serviceAccount|[roles/datacatalog.categoryAdmin](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryAdmin) | +|service-272101441067
serviceAccount|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) +| +|trf-bq-0
serviceAccount|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) | +|trf-df-0
serviceAccount|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner)
[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) | ## Project dtl-plg | members | roles | |---|---| -|gcp-data-analysts
group|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user)
[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer)
[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer)
[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) | +|gcp-data-analysts
group|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/bigquery.metadataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.metadataViewer)
[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user)
[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer)
[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer)
[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) | |gcp-data-engineers
group|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) | +|service-185415295897
serviceAccount|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) +| ## Project lnd @@ -64,14 +72,16 @@ Legend: + additive, conditional. |gcp-data-engineers
group|[roles/compute.viewer](https://cloud.google.com/iam/docs/understanding-roles#compute.viewer)
[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin)
[roles/dataflow.developer](https://cloud.google.com/iam/docs/understanding-roles#dataflow.developer)
[roles/viewer](https://cloud.google.com/iam/docs/understanding-roles#viewer) | |load-df-0
serviceAccount|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin)
[roles/dataflow.worker](https://cloud.google.com/iam/docs/understanding-roles#dataflow.worker)
[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) | |orc-cmp-0
serviceAccount|[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) | +|service-1027982570085
serviceAccount|[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin)
[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) +| ## Project orc | members | roles | |---|---| -|gcp-data-engineers
group|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/cloudbuild.builds.editor](https://cloud.google.com/iam/docs/understanding-roles#cloudbuild.builds.editor)
[roles/composer.admin](https://cloud.google.com/iam/docs/understanding-roles#composer.admin)
[roles/composer.environmentAndStorageObjectAdmin](https://cloud.google.com/iam/docs/understanding-roles#composer.environmentAndStorageObjectAdmin)
[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser)
[roles/iam.serviceAccountUser](https://cloud.google.com/iam/docs/understanding-roles#iam.serviceAccountUser)
[roles/iap.httpsResourceAccessor](https://cloud.google.com/iam/docs/understanding-roles#iap.httpsResourceAccessor)
[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin)
[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) | +|gcp-data-engineers
group|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/cloudbuild.builds.editor](https://cloud.google.com/iam/docs/understanding-roles#cloudbuild.builds.editor)
[roles/composer.admin](https://cloud.google.com/iam/docs/understanding-roles#composer.admin)
[roles/composer.environmentAndStorageObjectAdmin](https://cloud.google.com/iam/docs/understanding-roles#composer.environmentAndStorageObjectAdmin)
[roles/iam.serviceAccountUser](https://cloud.google.com/iam/docs/understanding-roles#iam.serviceAccountUser)
[roles/iap.httpsResourceAccessor](https://cloud.google.com/iam/docs/understanding-roles#iap.httpsResourceAccessor)
[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin)
[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) | |load-df-0
serviceAccount|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) | |orc-cmp-0
serviceAccount|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/composer.worker](https://cloud.google.com/iam/docs/understanding-roles#composer.worker)
[roles/iam.serviceAccountUser](https://cloud.google.com/iam/docs/understanding-roles#iam.serviceAccountUser)
[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) | +|service-466251568699
serviceAccount|[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin)
[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) +| |trf-df-0
serviceAccount|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) | ## Project trf @@ -80,5 +90,6 @@ Legend: + additive, conditional. |---|---| |gcp-data-engineers
group|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) | |orc-cmp-0
serviceAccount|[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) | +|service-838656561422
serviceAccount|[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin)
[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) +| |trf-bq-0
serviceAccount|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) | |trf-df-0
serviceAccount|[roles/dataflow.worker](https://cloud.google.com/iam/docs/understanding-roles#dataflow.worker)
[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) | From e008fde9bb982311a87bdd66d6a4ae1f52239216 Mon Sep 17 00:00:00 2001 From: Lorenzo Caggioni Date: Sun, 3 Apr 2022 22:03:35 +0200 Subject: [PATCH 11/15] Update READMEs and separate demo pipelines --- .../data-platform-foundations/README.md | 13 +- .../data-platform-foundations/demo/README.md | 31 +- .../demo/datapipeline.py | 114 +------ .../demo/datapipeline_dc_tags.py | 322 ++++++++++++++++++ .../demo/delete_table.py | 146 ++++++++ fast/stages/03-data-platform/dev/README.md | 6 + 6 files changed, 510 insertions(+), 122 deletions(-) create mode 100644 examples/data-solutions/data-platform-foundations/demo/datapipeline_dc_tags.py create mode 100644 examples/data-solutions/data-platform-foundations/demo/delete_table.py diff --git a/examples/data-solutions/data-platform-foundations/README.md b/examples/data-solutions/data-platform-foundations/README.md index feaaa02b..99100f17 100644 --- a/examples/data-solutions/data-platform-foundations/README.md +++ b/examples/data-solutions/data-platform-foundations/README.md @@ -220,17 +220,10 @@ To do this, you need to remove IAM binging at project-level for the `data-analys ## Demo pipeline -The application layer is out of scope of this script, but as a demo, it is provided with a Cloud Composer DAG to mode data from the `landing` area to the `DataLake L2` dataset. +The application layer is out of scope of this script. As a demo purpuse only, several Cloud Composer DAGs are provided. Demos will import data from the `landing` area to the `DataLake L2` dataset suing different features. -Just follow the commands you find in the `demo_commands` Terraform output, go in the Cloud Composer UI and run the `data_pipeline_dag`. +You can find examples in the `[demo](./demo)` folder. -Description of commands: - -- 01: copy sample data to a `landing` Cloud Storage bucket impersonating the `load` service account. -- 02: copy sample data structure definition in the `orchestration` Cloud Storage bucket impersonating the `orchestration` service account. -- 03: copy the Cloud Composer DAG to the Cloud Composer Storage bucket impersonating the `orchestration` service account. -- 04: Open the Cloud Composer Airflow UI and run the imported DAG. -- 05: Run the BigQuery query to see results. ## Variables @@ -268,8 +261,6 @@ Description of commands: Features to add in future releases: -- Add support for Column level access on BigQuery -- Add example templates for Data Catalog - Add example on how to use Cloud Data Loss Prevention - Add solution to handle Tables, Views, and Authorized Views lifecycle - Add solution to handle Metadata lifecycle diff --git a/examples/data-solutions/data-platform-foundations/demo/README.md b/examples/data-solutions/data-platform-foundations/demo/README.md index 2cac9a9a..5347b2cf 100644 --- a/examples/data-solutions/data-platform-foundations/demo/README.md +++ b/examples/data-solutions/data-platform-foundations/demo/README.md @@ -1,3 +1,32 @@ # Data ingestion Demo -In this folder you can find an example to ingest data on the `data platform` instantiated in [here](../). See details in the [README.m](../#demo-pipeline) to run the demo. \ No newline at end of file +In this folder, you can find an example to ingest data on the `data platform` instantiated [here](../). + +The example is not intended to be a production-ready code. + +## Demo use case +The demo imports purchase data generated by a store. + +## Input files +Data are uploaded to the `landing` GCS bucket. File structure: + - `customers.csv`: Comma separate value with customer information in the following format: Customer ID, Name, Surname, Registration Timestamp + - `purchases.csv`: Comma separate value with customer information in the following format: Item ID, Customer ID, Item, Item price, Purchase Timestamp + +## Data processing pipelines +Different data pipelines are provided to highlight different features and patterns. For the purpose of the example, a single pipeline handle all data lifecycles. When adapting them to your real use case, you may want to evaluate the option to handle each functional step on a separate pipeline or a dedicated tool. For example, you may want to use `Dataform` to handle data schemas lifecycle. + +Below you can find a description of each example: + - Simple import data: [`datapipeline.py`](./datapipeline.py) is a simple pipeline to import provided data from the `landing` Google Cloud Storage bucket to the Data Hub L2 layer joining `customers` and `purchases` tables into `customerpurchase` table. + - Import data with Policy Tags: [`datapipeline_dc_tags.py`](./datapipeline.py) imports provided data from `landing` bucket to the Data Hub L2 layer protecting sensitive data using Data Catalog policy Tags. + - Delete tables: [`delete_table.py`](./delete_table.py) deletes BigQuery tables created by import pipelines. + +## Runnin the demo +To run demo examples, please follow the following steps: + +- 01: copy sample data to the `landing` Cloud Storage bucket impersonating the `load` service account. +- 02: copy sample data structure definition in the `orchestration` Cloud Storage bucket impersonating the `orchestration` service account. +- 03: copy the Cloud Composer DAG to the Cloud Composer Storage bucket impersonating the `orchestration` service account. +- 04: Open the Cloud Composer Airflow UI and run the imported DAG. +- 05: Run the BigQuery query to see results. + +You can find pre-computed commands in the `demo_commands` output variable of the deployed terraform [data pipeline](../). diff --git a/examples/data-solutions/data-platform-foundations/demo/datapipeline.py b/examples/data-solutions/data-platform-foundations/demo/datapipeline.py index 6a1a06be..1f748c08 100644 --- a/examples/data-solutions/data-platform-foundations/demo/datapipeline.py +++ b/examples/data-solutions/data-platform-foundations/demo/datapipeline.py @@ -116,115 +116,8 @@ with models.DAG( trigger_rule='all_success' ) - with TaskGroup('upsert_table') as upsert_table: - upsert_table_customers = BigQueryUpsertTableOperator( - task_id="upsert_table_customers", - project_id=DTL_L0_PRJ, - dataset_id=DTL_L0_BQ_DATASET, - impersonation_chain=[TRF_SA_DF], - table_resource={ - "tableReference": {"tableId": "customers"}, - }, - ) - - upsert_table_purchases = BigQueryUpsertTableOperator( - task_id="upsert_table_purchases", - project_id=DTL_L0_PRJ, - dataset_id=DTL_L0_BQ_DATASET, - impersonation_chain=[TRF_SA_BQ], - table_resource={ - "tableReference": {"tableId": "purchases"} - }, - ) - - upsert_table_customer_purchase_l1 = BigQueryUpsertTableOperator( - task_id="upsert_table_customer_purchase_l1", - project_id=DTL_L1_PRJ, - dataset_id=DTL_L1_BQ_DATASET, - impersonation_chain=[TRF_SA_BQ], - table_resource={ - "tableReference": {"tableId": "customer_purchase"} - }, - ) - - upsert_table_customer_purchase_l2 = BigQueryUpsertTableOperator( - task_id="upsert_table_customer_purchase_l2", - project_id=DTL_L2_PRJ, - dataset_id=DTL_L2_BQ_DATASET, - impersonation_chain=[TRF_SA_BQ], - table_resource={ - "tableReference": {"tableId": "customer_purchase"} - }, - ) - - with TaskGroup('update_schema_table') as update_schema_table: - update_table_schema_customers = BigQueryUpdateTableSchemaOperator( - task_id="update_table_schema_customers", - project_id=DTL_L0_PRJ, - dataset_id=DTL_L0_BQ_DATASET, - table_id="customers", - impersonation_chain=[TRF_SA_BQ], - include_policy_tags=True, - schema_fields_updates=[ - { "mode": "REQUIRED", "name": "id", "type": "INTEGER", "description": "ID" }, - { "mode": "REQUIRED", "name": "name", "type": "STRING", "description": "Name", "policyTags": { "names": [DATA_CAT_TAGS.get('2_Private', None)]}}, - { "mode": "REQUIRED", "name": "surname", "type": "STRING", "description": "Surname", "policyTags": { "names": [DATA_CAT_TAGS.get('2_Private', None)]} }, - { "mode": "REQUIRED", "name": "timestamp", "type": "TIMESTAMP", "description": "Timestamp" } - ] - ) - - update_table_schema_customers = BigQueryUpdateTableSchemaOperator( - task_id="update_table_schema_purchases", - project_id=DTL_L0_PRJ, - dataset_id=DTL_L0_BQ_DATASET, - table_id="purchases", - impersonation_chain=[TRF_SA_BQ], - include_policy_tags=True, - schema_fields_updates=[ - { "mode": "REQUIRED", "name": "id", "type": "INTEGER", "description": "ID" }, - { "mode": "REQUIRED", "name": "customer_id", "type": "INTEGER", "description": "ID" }, - { "mode": "REQUIRED", "name": "item", "type": "STRING", "description": "Item Name" }, - { "mode": "REQUIRED", "name": "price", "type": "FLOAT", "description": "Item Price" }, - { "mode": "REQUIRED", "name": "timestamp", "type": "TIMESTAMP", "description": "Timestamp" } - ] - ) - - update_table_schema_customer_purchase_l1 = BigQueryUpdateTableSchemaOperator( - task_id="update_table_schema_customer_purchase_l1", - project_id=DTL_L1_PRJ, - dataset_id=DTL_L1_BQ_DATASET, - table_id="customer_purchase", - impersonation_chain=[TRF_SA_BQ], - include_policy_tags=True, - schema_fields_updates=[ - { "mode": "REQUIRED", "name": "customer_id", "type": "INTEGER", "description": "ID" }, - { "mode": "REQUIRED", "name": "purchase_id", "type": "INTEGER", "description": "ID" }, - { "mode": "REQUIRED", "name": "name", "type": "STRING", "description": "Name", "policyTags": { "names": [DATA_CAT_TAGS.get('2_Private', None)]}}, - { "mode": "REQUIRED", "name": "surname", "type": "STRING", "description": "Surname", "policyTags": { "names": [DATA_CAT_TAGS.get('2_Private', None)]} }, - { "mode": "REQUIRED", "name": "item", "type": "STRING", "description": "Item Name" }, - { "mode": "REQUIRED", "name": "price", "type": "FLOAT", "description": "Item Price" }, - { "mode": "REQUIRED", "name": "timestamp", "type": "TIMESTAMP", "description": "Timestamp" } - ] - ) - - update_table_schema_customer_purchase_l2 = BigQueryUpdateTableSchemaOperator( - task_id="update_table_schema_customer_purchase_l2", - project_id=DTL_L2_PRJ, - dataset_id=DTL_L2_BQ_DATASET, - table_id="customer_purchase", - impersonation_chain=[TRF_SA_BQ], - include_policy_tags=True, - schema_fields_updates=[ - { "mode": "REQUIRED", "name": "customer_id", "type": "INTEGER", "description": "ID" }, - { "mode": "REQUIRED", "name": "purchase_id", "type": "INTEGER", "description": "ID" }, - { "mode": "REQUIRED", "name": "name", "type": "STRING", "description": "Name", "policyTags": { "names": [DATA_CAT_TAGS.get('2_Private', None)]}}, - { "mode": "REQUIRED", "name": "surname", "type": "STRING", "description": "Surname", "policyTags": { "names": [DATA_CAT_TAGS.get('2_Private', None)]} }, - { "mode": "REQUIRED", "name": "item", "type": "STRING", "description": "Item Name" }, - { "mode": "REQUIRED", "name": "price", "type": "FLOAT", "description": "Item Price" }, - { "mode": "REQUIRED", "name": "timestamp", "type": "TIMESTAMP", "description": "Timestamp" } - ] - ) - + # Bigquery Tables automatically created for demo porpuse. + # Consider a dedicated pipeline or tool for a real life scenario. customers_import = DataflowTemplatedJobStartOperator( task_id="dataflow_customers_import", template="gs://dataflow-templates/latest/GCS_Text_to_BigQuery", @@ -315,4 +208,5 @@ with models.DAG( }, impersonation_chain=[TRF_SA_BQ] ) - start >> upsert_table >> update_schema_table >> [customers_import, purchases_import] >> join_customer_purchase >> l2_customer_purchase >> end + + start >> [customers_import, purchases_import] >> join_customer_purchase >> l2_customer_purchase >> end \ No newline at end of file diff --git a/examples/data-solutions/data-platform-foundations/demo/datapipeline_dc_tags.py b/examples/data-solutions/data-platform-foundations/demo/datapipeline_dc_tags.py new file mode 100644 index 00000000..2fb88c9e --- /dev/null +++ b/examples/data-solutions/data-platform-foundations/demo/datapipeline_dc_tags.py @@ -0,0 +1,322 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# -------------------------------------------------------------------------------- +# Load The Dependencies +# -------------------------------------------------------------------------------- + +import csv +import datetime +import io +import json +import logging +import os + +from airflow import models +from airflow.providers.google.cloud.operators.dataflow import DataflowTemplatedJobStartOperator +from airflow.operators import dummy +from airflow.providers.google.cloud.operators.bigquery import BigQueryInsertJobOperator, BigQueryUpsertTableOperator, BigQueryUpdateTableSchemaOperator +from airflow.utils.task_group import TaskGroup + +# -------------------------------------------------------------------------------- +# Set variables - Needed for the DEMO +# -------------------------------------------------------------------------------- +BQ_LOCATION = os.environ.get("BQ_LOCATION") +DATA_CAT_TAGS = json.loads(os.environ.get("DATA_CAT_TAGS")) +DTL_L0_PRJ = os.environ.get("DTL_L0_PRJ") +DTL_L0_BQ_DATASET = os.environ.get("DTL_L0_BQ_DATASET") +DTL_L0_GCS = os.environ.get("DTL_L0_GCS") +DTL_L1_PRJ = os.environ.get("DTL_L1_PRJ") +DTL_L1_BQ_DATASET = os.environ.get("DTL_L1_BQ_DATASET") +DTL_L1_GCS = os.environ.get("DTL_L1_GCS") +DTL_L2_PRJ = os.environ.get("DTL_L2_PRJ") +DTL_L2_BQ_DATASET = os.environ.get("DTL_L2_BQ_DATASET") +DTL_L2_GCS = os.environ.get("DTL_L2_GCS") +DTL_PLG_PRJ = os.environ.get("DTL_PLG_PRJ") +DTL_PLG_BQ_DATASET = os.environ.get("DTL_PLG_BQ_DATASET") +DTL_PLG_GCS = os.environ.get("DTL_PLG_GCS") +GCP_REGION = os.environ.get("GCP_REGION") +LND_PRJ = os.environ.get("LND_PRJ") +LND_BQ = os.environ.get("LND_BQ") +LND_GCS = os.environ.get("LND_GCS") +LND_PS = os.environ.get("LND_PS") +LOD_PRJ = os.environ.get("LOD_PRJ") +LOD_GCS_STAGING = os.environ.get("LOD_GCS_STAGING") +LOD_NET_VPC = os.environ.get("LOD_NET_VPC") +LOD_NET_SUBNET = os.environ.get("LOD_NET_SUBNET") +LOD_SA_DF = os.environ.get("LOD_SA_DF") +ORC_PRJ = os.environ.get("ORC_PRJ") +ORC_GCS = os.environ.get("ORC_GCS") +TRF_PRJ = os.environ.get("TRF_PRJ") +TRF_GCS_STAGING = os.environ.get("TRF_GCS_STAGING") +TRF_NET_VPC = os.environ.get("TRF_NET_VPC") +TRF_NET_SUBNET = os.environ.get("TRF_NET_SUBNET") +TRF_SA_DF = os.environ.get("TRF_SA_DF") +TRF_SA_BQ = os.environ.get("TRF_SA_BQ") +DF_KMS_KEY = os.environ.get("DF_KMS_KEY", "") +DF_REGION = os.environ.get("GCP_REGION") +DF_ZONE = os.environ.get("GCP_REGION") + "-b" + +# -------------------------------------------------------------------------------- +# Set default arguments +# -------------------------------------------------------------------------------- + +# If you are running Airflow in more than one time zone +# see https://airflow.apache.org/docs/apache-airflow/stable/timezone.html +# for best practices +yesterday = datetime.datetime.now() - datetime.timedelta(days=1) + +default_args = { + 'owner': 'airflow', + 'start_date': yesterday, + 'depends_on_past': False, + 'email': [''], + 'email_on_failure': False, + 'email_on_retry': False, + 'retries': 1, + 'retry_delay': datetime.timedelta(minutes=5), + 'dataflow_default_options': { + 'location': DF_REGION, + 'zone': DF_ZONE, + 'stagingLocation': LOD_GCS_STAGING, + 'tempLocation': LOD_GCS_STAGING + "/tmp", + 'serviceAccountEmail': LOD_SA_DF, + 'subnetwork': LOD_NET_SUBNET, + 'ipConfiguration': "WORKER_IP_PRIVATE", + 'kmsKeyName' : DF_KMS_KEY + }, +} + +# -------------------------------------------------------------------------------- +# Main DAG +# -------------------------------------------------------------------------------- + +with models.DAG( + 'data_pipeline_dc_tags_dag', + default_args=default_args, + schedule_interval=None) as dag: + start = dummy.DummyOperator( + task_id='start', + trigger_rule='all_success' + ) + + end = dummy.DummyOperator( + task_id='end', + trigger_rule='all_success' + ) + + # Bigquery Tables created here for demo porpuse. + # Consider a dedicated pipeline or tool for a real life scenario. + with TaskGroup('upsert_table') as upsert_table: + upsert_table_customers = BigQueryUpsertTableOperator( + task_id="upsert_table_customers", + project_id=DTL_L0_PRJ, + dataset_id=DTL_L0_BQ_DATASET, + impersonation_chain=[TRF_SA_DF], + table_resource={ + "tableReference": {"tableId": "customers"}, + }, + ) + + upsert_table_purchases = BigQueryUpsertTableOperator( + task_id="upsert_table_purchases", + project_id=DTL_L0_PRJ, + dataset_id=DTL_L0_BQ_DATASET, + impersonation_chain=[TRF_SA_BQ], + table_resource={ + "tableReference": {"tableId": "purchases"} + }, + ) + + upsert_table_customer_purchase_l1 = BigQueryUpsertTableOperator( + task_id="upsert_table_customer_purchase_l1", + project_id=DTL_L1_PRJ, + dataset_id=DTL_L1_BQ_DATASET, + impersonation_chain=[TRF_SA_BQ], + table_resource={ + "tableReference": {"tableId": "customer_purchase"} + }, + ) + + upsert_table_customer_purchase_l2 = BigQueryUpsertTableOperator( + task_id="upsert_table_customer_purchase_l2", + project_id=DTL_L2_PRJ, + dataset_id=DTL_L2_BQ_DATASET, + impersonation_chain=[TRF_SA_BQ], + table_resource={ + "tableReference": {"tableId": "customer_purchase"} + }, + ) + + # Bigquery Tables schema defined here for demo porpuse. + # Consider a dedicated pipeline or tool for a real life scenario. + with TaskGroup('update_schema_table') as update_schema_table: + update_table_schema_customers = BigQueryUpdateTableSchemaOperator( + task_id="update_table_schema_customers", + project_id=DTL_L0_PRJ, + dataset_id=DTL_L0_BQ_DATASET, + table_id="customers", + impersonation_chain=[TRF_SA_BQ], + include_policy_tags=True, + schema_fields_updates=[ + { "mode": "REQUIRED", "name": "id", "type": "INTEGER", "description": "ID" }, + { "mode": "REQUIRED", "name": "name", "type": "STRING", "description": "Name", "policyTags": { "names": [DATA_CAT_TAGS.get('2_Private', None)]}}, + { "mode": "REQUIRED", "name": "surname", "type": "STRING", "description": "Surname", "policyTags": { "names": [DATA_CAT_TAGS.get('2_Private', None)]} }, + { "mode": "REQUIRED", "name": "timestamp", "type": "TIMESTAMP", "description": "Timestamp" } + ] + ) + + update_table_schema_customers = BigQueryUpdateTableSchemaOperator( + task_id="update_table_schema_purchases", + project_id=DTL_L0_PRJ, + dataset_id=DTL_L0_BQ_DATASET, + table_id="purchases", + impersonation_chain=[TRF_SA_BQ], + include_policy_tags=True, + schema_fields_updates=[ + { "mode": "REQUIRED", "name": "id", "type": "INTEGER", "description": "ID" }, + { "mode": "REQUIRED", "name": "customer_id", "type": "INTEGER", "description": "ID" }, + { "mode": "REQUIRED", "name": "item", "type": "STRING", "description": "Item Name" }, + { "mode": "REQUIRED", "name": "price", "type": "FLOAT", "description": "Item Price" }, + { "mode": "REQUIRED", "name": "timestamp", "type": "TIMESTAMP", "description": "Timestamp" } + ] + ) + + update_table_schema_customer_purchase_l1 = BigQueryUpdateTableSchemaOperator( + task_id="update_table_schema_customer_purchase_l1", + project_id=DTL_L1_PRJ, + dataset_id=DTL_L1_BQ_DATASET, + table_id="customer_purchase", + impersonation_chain=[TRF_SA_BQ], + include_policy_tags=True, + schema_fields_updates=[ + { "mode": "REQUIRED", "name": "customer_id", "type": "INTEGER", "description": "ID" }, + { "mode": "REQUIRED", "name": "purchase_id", "type": "INTEGER", "description": "ID" }, + { "mode": "REQUIRED", "name": "name", "type": "STRING", "description": "Name", "policyTags": { "names": [DATA_CAT_TAGS.get('2_Private', None)]}}, + { "mode": "REQUIRED", "name": "surname", "type": "STRING", "description": "Surname", "policyTags": { "names": [DATA_CAT_TAGS.get('2_Private', None)]} }, + { "mode": "REQUIRED", "name": "item", "type": "STRING", "description": "Item Name" }, + { "mode": "REQUIRED", "name": "price", "type": "FLOAT", "description": "Item Price" }, + { "mode": "REQUIRED", "name": "timestamp", "type": "TIMESTAMP", "description": "Timestamp" } + ] + ) + + update_table_schema_customer_purchase_l2 = BigQueryUpdateTableSchemaOperator( + task_id="update_table_schema_customer_purchase_l2", + project_id=DTL_L2_PRJ, + dataset_id=DTL_L2_BQ_DATASET, + table_id="customer_purchase", + impersonation_chain=[TRF_SA_BQ], + include_policy_tags=True, + schema_fields_updates=[ + { "mode": "REQUIRED", "name": "customer_id", "type": "INTEGER", "description": "ID" }, + { "mode": "REQUIRED", "name": "purchase_id", "type": "INTEGER", "description": "ID" }, + { "mode": "REQUIRED", "name": "name", "type": "STRING", "description": "Name", "policyTags": { "names": [DATA_CAT_TAGS.get('2_Private', None)]}}, + { "mode": "REQUIRED", "name": "surname", "type": "STRING", "description": "Surname", "policyTags": { "names": [DATA_CAT_TAGS.get('2_Private', None)]} }, + { "mode": "REQUIRED", "name": "item", "type": "STRING", "description": "Item Name" }, + { "mode": "REQUIRED", "name": "price", "type": "FLOAT", "description": "Item Price" }, + { "mode": "REQUIRED", "name": "timestamp", "type": "TIMESTAMP", "description": "Timestamp" } + ] + ) + + customers_import = DataflowTemplatedJobStartOperator( + task_id="dataflow_customers_import", + template="gs://dataflow-templates/latest/GCS_Text_to_BigQuery", + project_id=LOD_PRJ, + location=DF_REGION, + parameters={ + "javascriptTextTransformFunctionName": "transform", + "JSONPath": ORC_GCS + "/customers_schema.json", + "javascriptTextTransformGcsPath": ORC_GCS + "/customers_udf.js", + "inputFilePattern": LND_GCS + "/customers.csv", + "outputTable": DTL_L0_PRJ + ":"+DTL_L0_BQ_DATASET+".customers", + "bigQueryLoadingTemporaryDirectory": LOD_GCS_STAGING + "/tmp/bq/", + }, + ) + + purchases_import = DataflowTemplatedJobStartOperator( + task_id="dataflow_purchases_import", + template="gs://dataflow-templates/latest/GCS_Text_to_BigQuery", + project_id=LOD_PRJ, + location=DF_REGION, + parameters={ + "javascriptTextTransformFunctionName": "transform", + "JSONPath": ORC_GCS + "/purchases_schema.json", + "javascriptTextTransformGcsPath": ORC_GCS + "/purchases_udf.js", + "inputFilePattern": LND_GCS + "/purchases.csv", + "outputTable": DTL_L0_PRJ + ":"+DTL_L0_BQ_DATASET+".purchases", + "bigQueryLoadingTemporaryDirectory": LOD_GCS_STAGING + "/tmp/bq/", + }, + ) + + join_customer_purchase = BigQueryInsertJobOperator( + task_id='bq_join_customer_purchase', + gcp_conn_id='bigquery_default', + project_id=TRF_PRJ, + location=BQ_LOCATION, + configuration={ + 'jobType':'QUERY', + 'query':{ + 'query':"""SELECT + c.id as customer_id, + p.id as purchase_id, + c.name as name, + c.surname as surname, + p.item as item, + p.price as price, + p.timestamp as timestamp + FROM `{dtl_0_prj}.{dtl_0_dataset}.customers` c + JOIN `{dtl_0_prj}.{dtl_0_dataset}.purchases` p ON c.id = p.customer_id + """.format(dtl_0_prj=DTL_L0_PRJ, dtl_0_dataset=DTL_L0_BQ_DATASET, ), + 'destinationTable':{ + 'projectId': DTL_L1_PRJ, + 'datasetId': DTL_L1_BQ_DATASET, + 'tableId': 'customer_purchase' + }, + 'writeDisposition':'WRITE_TRUNCATE', + "useLegacySql": False + } + }, + impersonation_chain=[TRF_SA_BQ] + ) + + l2_customer_purchase = BigQueryInsertJobOperator( + task_id='bq_l2_customer_purchase', + gcp_conn_id='bigquery_default', + project_id=TRF_PRJ, + location=BQ_LOCATION, + configuration={ + 'jobType':'QUERY', + 'query':{ + 'query':"""SELECT + customer_id, + purchase_id, + name, + surname, + item, + price, + timestamp + FROM `{dtl_1_prj}.{dtl_1_dataset}.customer_purchase` + """.format(dtl_1_prj=DTL_L1_PRJ, dtl_1_dataset=DTL_L1_BQ_DATASET, ), + 'destinationTable':{ + 'projectId': DTL_L2_PRJ, + 'datasetId': DTL_L2_BQ_DATASET, + 'tableId': 'customer_purchase' + }, + 'writeDisposition':'WRITE_TRUNCATE', + "useLegacySql": False + } + }, + impersonation_chain=[TRF_SA_BQ] + ) + start >> upsert_table >> update_schema_table >> [customers_import, purchases_import] >> join_customer_purchase >> l2_customer_purchase >> end diff --git a/examples/data-solutions/data-platform-foundations/demo/delete_table.py b/examples/data-solutions/data-platform-foundations/demo/delete_table.py new file mode 100644 index 00000000..a2585a68 --- /dev/null +++ b/examples/data-solutions/data-platform-foundations/demo/delete_table.py @@ -0,0 +1,146 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# -------------------------------------------------------------------------------- +# Load The Dependencies +# -------------------------------------------------------------------------------- + +import csv +import datetime +import io +import json +import logging +import os + +from airflow import models +from airflow.providers.google.cloud.operators.dataflow import DataflowTemplatedJobStartOperator +from airflow.operators import dummy +from airflow.providers.google.cloud.operators.bigquery import BigQueryDeleteTableOperator +from airflow.utils.task_group import TaskGroup + +# -------------------------------------------------------------------------------- +# Set variables - Needed for the DEMO +# -------------------------------------------------------------------------------- +BQ_LOCATION = os.environ.get("BQ_LOCATION") +DATA_CAT_TAGS = json.loads(os.environ.get("DATA_CAT_TAGS")) +DTL_L0_PRJ = os.environ.get("DTL_L0_PRJ") +DTL_L0_BQ_DATASET = os.environ.get("DTL_L0_BQ_DATASET") +DTL_L0_GCS = os.environ.get("DTL_L0_GCS") +DTL_L1_PRJ = os.environ.get("DTL_L1_PRJ") +DTL_L1_BQ_DATASET = os.environ.get("DTL_L1_BQ_DATASET") +DTL_L1_GCS = os.environ.get("DTL_L1_GCS") +DTL_L2_PRJ = os.environ.get("DTL_L2_PRJ") +DTL_L2_BQ_DATASET = os.environ.get("DTL_L2_BQ_DATASET") +DTL_L2_GCS = os.environ.get("DTL_L2_GCS") +DTL_PLG_PRJ = os.environ.get("DTL_PLG_PRJ") +DTL_PLG_BQ_DATASET = os.environ.get("DTL_PLG_BQ_DATASET") +DTL_PLG_GCS = os.environ.get("DTL_PLG_GCS") +GCP_REGION = os.environ.get("GCP_REGION") +LND_PRJ = os.environ.get("LND_PRJ") +LND_BQ = os.environ.get("LND_BQ") +LND_GCS = os.environ.get("LND_GCS") +LND_PS = os.environ.get("LND_PS") +LOD_PRJ = os.environ.get("LOD_PRJ") +LOD_GCS_STAGING = os.environ.get("LOD_GCS_STAGING") +LOD_NET_VPC = os.environ.get("LOD_NET_VPC") +LOD_NET_SUBNET = os.environ.get("LOD_NET_SUBNET") +LOD_SA_DF = os.environ.get("LOD_SA_DF") +ORC_PRJ = os.environ.get("ORC_PRJ") +ORC_GCS = os.environ.get("ORC_GCS") +TRF_PRJ = os.environ.get("TRF_PRJ") +TRF_GCS_STAGING = os.environ.get("TRF_GCS_STAGING") +TRF_NET_VPC = os.environ.get("TRF_NET_VPC") +TRF_NET_SUBNET = os.environ.get("TRF_NET_SUBNET") +TRF_SA_DF = os.environ.get("TRF_SA_DF") +TRF_SA_BQ = os.environ.get("TRF_SA_BQ") +DF_KMS_KEY = os.environ.get("DF_KMS_KEY", "") +DF_REGION = os.environ.get("GCP_REGION") +DF_ZONE = os.environ.get("GCP_REGION") + "-b" + +# -------------------------------------------------------------------------------- +# Set default arguments +# -------------------------------------------------------------------------------- + +# If you are running Airflow in more than one time zone +# see https://airflow.apache.org/docs/apache-airflow/stable/timezone.html +# for best practices +yesterday = datetime.datetime.now() - datetime.timedelta(days=1) + +default_args = { + 'owner': 'airflow', + 'start_date': yesterday, + 'depends_on_past': False, + 'email': [''], + 'email_on_failure': False, + 'email_on_retry': False, + 'retries': 1, + 'retry_delay': datetime.timedelta(minutes=5), + 'dataflow_default_options': { + 'location': DF_REGION, + 'zone': DF_ZONE, + 'stagingLocation': LOD_GCS_STAGING, + 'tempLocation': LOD_GCS_STAGING + "/tmp", + 'serviceAccountEmail': LOD_SA_DF, + 'subnetwork': LOD_NET_SUBNET, + 'ipConfiguration': "WORKER_IP_PRIVATE", + 'kmsKeyName' : DF_KMS_KEY + }, +} + +# -------------------------------------------------------------------------------- +# Main DAG +# -------------------------------------------------------------------------------- + +with models.DAG( + 'delete_tables_dag', + default_args=default_args, + schedule_interval=None) as dag: + start = dummy.DummyOperator( + task_id='start', + trigger_rule='all_success' + ) + + end = dummy.DummyOperator( + task_id='end', + trigger_rule='all_success' + ) + + # Bigquery Tables deleted here for demo porpuse. + # Consider a dedicated pipeline or tool for a real life scenario. + with TaskGroup('delete_table') as delte_table: + delete_table_customers = BigQueryDeleteTableOperator( + task_id="delete_table_customers", + deletion_dataset_table=DTL_L0_PRJ+"."+DTL_L0_BQ_DATASET+".customers", + impersonation_chain=[TRF_SA_DF] + ) + + delete_table_purchases = BigQueryDeleteTableOperator( + task_id="delete_table_purchases", + deletion_dataset_table=DTL_L0_PRJ+"."+DTL_L0_BQ_DATASET+".purchases", + impersonation_chain=[TRF_SA_DF] + ) + + delete_table_customer_purchase_l1 = BigQueryDeleteTableOperator( + task_id="delete_table_customer_purchase_l1", + deletion_dataset_table=DTL_L1_PRJ+"."+DTL_L1_BQ_DATASET+".customer_purchase", + impersonation_chain=[TRF_SA_DF] + ) + + delete_table_customer_purchase_l2 = BigQueryDeleteTableOperator( + task_id="delete_table_customer_purchase_l2", + deletion_dataset_table=DTL_L2_PRJ+"."+DTL_L2_BQ_DATASET+".customer_purchase", + impersonation_chain=[TRF_SA_DF] + ) + + start >> delte_table >> end diff --git a/fast/stages/03-data-platform/dev/README.md b/fast/stages/03-data-platform/dev/README.md index 1d833a2a..5ecc2ad5 100644 --- a/fast/stages/03-data-platform/dev/README.md +++ b/fast/stages/03-data-platform/dev/README.md @@ -129,6 +129,12 @@ terraform init terraform apply ``` +## Demo pipeline + +The application layer is out of scope of this script. As a demo purpuse only, several Cloud Composer DAGs are provided. Demos will import data from the `landing` area to the `DataLake L2` dataset suing different features. + +You can find examples in the `[demo](../../../../examples/data-solutions/data-platform-foundations/demo)` folder. + From 6f00caaaabfe3e79daa0c215e4631a564fa296cb Mon Sep 17 00:00:00 2001 From: Lorenzo Caggioni Date: Mon, 4 Apr 2022 00:38:34 +0200 Subject: [PATCH 12/15] Handle Service Identity SA. Update IAM.md --- .../data-platform-foundations/IAM.md | 17 +++--- fast/stages/03-data-platform/dev/IAM.md | 53 +++++++++++-------- tools/state_iam.py | 16 +++++- 3 files changed, 56 insertions(+), 30 deletions(-) diff --git a/examples/data-solutions/data-platform-foundations/IAM.md b/examples/data-solutions/data-platform-foundations/IAM.md index e39bb8b2..d6ccbecb 100644 --- a/examples/data-solutions/data-platform-foundations/IAM.md +++ b/examples/data-solutions/data-platform-foundations/IAM.md @@ -19,8 +19,8 @@ Legend: + additive, conditional. |---|---| |gcp-data-analysts
group|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/bigquery.metadataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.metadataViewer)
[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user)
[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer)
[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) | |gcp-data-engineers
group|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) | +|SERVICE_IDENTITY_service-networking
serviceAccount|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) +| |load-df-0
serviceAccount|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator) | -|service-390266833555
serviceAccount|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) +| |trf-bq-0
serviceAccount|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner)
[roles/datacatalog.categoryAdmin](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryAdmin) | |trf-df-0
serviceAccount|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) | @@ -30,8 +30,8 @@ Legend: + additive, conditional. |---|---| |gcp-data-analysts
group|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/bigquery.metadataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.metadataViewer)
[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user)
[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer)
[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) | |gcp-data-engineers
group|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) | +|SERVICE_IDENTITY_service-networking
serviceAccount|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) +| |load-df-0
serviceAccount|[roles/datacatalog.categoryAdmin](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryAdmin) | -|service-914571197251
serviceAccount|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) +| |trf-bq-0
serviceAccount|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) | |trf-df-0
serviceAccount|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner)
[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) | @@ -41,8 +41,8 @@ Legend: + additive, conditional. |---|---| |gcp-data-analysts
group|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/bigquery.metadataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.metadataViewer)
[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user)
[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer)
[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) | |gcp-data-engineers
group|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) | +|SERVICE_IDENTITY_service-networking
serviceAccount|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) +| |load-df-0
serviceAccount|[roles/datacatalog.categoryAdmin](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryAdmin) | -|service-272101441067
serviceAccount|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) +| |trf-bq-0
serviceAccount|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) | |trf-df-0
serviceAccount|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner)
[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) | @@ -52,7 +52,7 @@ Legend: + additive, conditional. |---|---| |gcp-data-analysts
group|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/bigquery.metadataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.metadataViewer)
[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user)
[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer)
[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer)
[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) | |gcp-data-engineers
group|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) | -|service-185415295897
serviceAccount|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) +| +|SERVICE_IDENTITY_service-networking
serviceAccount|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) +| ## Project lnd @@ -70,18 +70,20 @@ Legend: + additive, conditional. | members | roles | |---|---| |gcp-data-engineers
group|[roles/compute.viewer](https://cloud.google.com/iam/docs/understanding-roles#compute.viewer)
[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin)
[roles/dataflow.developer](https://cloud.google.com/iam/docs/understanding-roles#dataflow.developer)
[roles/viewer](https://cloud.google.com/iam/docs/understanding-roles#viewer) | +|SERVICE_IDENTITY_dataflow-service-producer-prod
serviceAccount|[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) | +|SERVICE_IDENTITY_service-networking
serviceAccount|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) +| |load-df-0
serviceAccount|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin)
[roles/dataflow.worker](https://cloud.google.com/iam/docs/understanding-roles#dataflow.worker)
[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) | |orc-cmp-0
serviceAccount|[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) | -|service-1027982570085
serviceAccount|[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin)
[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) +| ## Project orc | members | roles | |---|---| |gcp-data-engineers
group|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/cloudbuild.builds.editor](https://cloud.google.com/iam/docs/understanding-roles#cloudbuild.builds.editor)
[roles/composer.admin](https://cloud.google.com/iam/docs/understanding-roles#composer.admin)
[roles/composer.environmentAndStorageObjectAdmin](https://cloud.google.com/iam/docs/understanding-roles#composer.environmentAndStorageObjectAdmin)
[roles/iam.serviceAccountUser](https://cloud.google.com/iam/docs/understanding-roles#iam.serviceAccountUser)
[roles/iap.httpsResourceAccessor](https://cloud.google.com/iam/docs/understanding-roles#iap.httpsResourceAccessor)
[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin)
[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) | +|SERVICE_IDENTITY_cloudcomposer-accounts
serviceAccount|[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) | +|SERVICE_IDENTITY_service-networking
serviceAccount|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) +| |load-df-0
serviceAccount|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) | |orc-cmp-0
serviceAccount|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/composer.worker](https://cloud.google.com/iam/docs/understanding-roles#composer.worker)
[roles/iam.serviceAccountUser](https://cloud.google.com/iam/docs/understanding-roles#iam.serviceAccountUser)
[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) | -|service-466251568699
serviceAccount|[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin)
[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) +| |trf-df-0
serviceAccount|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) | ## Project trf @@ -89,7 +91,8 @@ Legend: + additive, conditional. | members | roles | |---|---| |gcp-data-engineers
group|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) | +|SERVICE_IDENTITY_dataflow-service-producer-prod
serviceAccount|[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) | +|SERVICE_IDENTITY_service-networking
serviceAccount|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) +| |orc-cmp-0
serviceAccount|[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) | -|service-838656561422
serviceAccount|[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin)
[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) +| |trf-bq-0
serviceAccount|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) | |trf-df-0
serviceAccount|[roles/dataflow.worker](https://cloud.google.com/iam/docs/understanding-roles#dataflow.worker)
[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) | diff --git a/fast/stages/03-data-platform/dev/IAM.md b/fast/stages/03-data-platform/dev/IAM.md index 2fa6fbd9..70622c2e 100644 --- a/fast/stages/03-data-platform/dev/IAM.md +++ b/fast/stages/03-data-platform/dev/IAM.md @@ -6,45 +6,53 @@ Legend: + additive, conditional. | members | roles | |---|---| +|gcp-data-analysts
group|[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) | |gcp-data-engineers
group|[roles/dlp.estimatesAdmin](https://cloud.google.com/iam/docs/understanding-roles#dlp.estimatesAdmin)
[roles/dlp.reader](https://cloud.google.com/iam/docs/understanding-roles#dlp.reader)
[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) | -|gcp-data-security
group|[roles/dlp.admin](https://cloud.google.com/iam/docs/understanding-roles#dlp.admin) | -|dev-data-load-df-0
serviceAccount|[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) | -|dev-data-trf-df-0
serviceAccount|[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) | +|gcp-data-security
group|[roles/datacatalog.admin](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.admin)
[roles/dlp.admin](https://cloud.google.com/iam/docs/understanding-roles#dlp.admin) | +|dev-data-load-df-0
serviceAccount|[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer)
[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) | +|dev-data-trf-bq-0
serviceAccount|[roles/datacatalog.categoryFineGrainedReader](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryFineGrainedReader)
[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer) | +|dev-data-trf-df-0
serviceAccount|[roles/datacatalog.categoryFineGrainedReader](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryFineGrainedReader)
[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer)
[roles/dlp.user](https://cloud.google.com/iam/docs/understanding-roles#dlp.user) | ## Project dev-data-dtl-0-0 | members | roles | |---|---| -|gcp-data-analysts
group|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user)
[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer)
[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) | +|gcp-data-analysts
group|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/bigquery.metadataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.metadataViewer)
[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user)
[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer)
[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) | |gcp-data-engineers
group|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) | -|dev-data-load-df-0
serviceAccount|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator) | -|dev-data-trf-bq-0
serviceAccount|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) | -|dev-data-trf-df-0
serviceAccount|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) | +|SERVICE_IDENTITY_service-networking
serviceAccount|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) +| +|dev-data-load-df-0
serviceAccount|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator) | +|dev-data-trf-bq-0
serviceAccount|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner)
[roles/datacatalog.categoryAdmin](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryAdmin) | +|dev-data-trf-df-0
serviceAccount|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner) | ## Project dev-data-dtl-1-0 | members | roles | |---|---| -|gcp-data-analysts
group|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user)
[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer)
[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) | +|gcp-data-analysts
group|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/bigquery.metadataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.metadataViewer)
[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user)
[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer)
[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) | |gcp-data-engineers
group|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) | -|dev-data-trf-bq-0
serviceAccount|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) | -|dev-data-trf-df-0
serviceAccount|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) | +|SERVICE_IDENTITY_service-networking
serviceAccount|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) +| +|dev-data-load-df-0
serviceAccount|[roles/datacatalog.categoryAdmin](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryAdmin) | +|dev-data-trf-bq-0
serviceAccount|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) | +|dev-data-trf-df-0
serviceAccount|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner)
[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) | ## Project dev-data-dtl-2-0 | members | roles | |---|---| -|gcp-data-analysts
group|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user)
[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer)
[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) | +|gcp-data-analysts
group|[roles/bigquery.dataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataViewer)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/bigquery.metadataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.metadataViewer)
[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user)
[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer)
[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) | |gcp-data-engineers
group|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) | -|dev-data-trf-bq-0
serviceAccount|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) | -|dev-data-trf-df-0
serviceAccount|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) | +|SERVICE_IDENTITY_service-networking
serviceAccount|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) +| +|dev-data-load-df-0
serviceAccount|[roles/datacatalog.categoryAdmin](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.categoryAdmin) | +|dev-data-trf-bq-0
serviceAccount|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) | +|dev-data-trf-df-0
serviceAccount|[roles/bigquery.dataOwner](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataOwner)
[roles/storage.objectCreator](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) | ## Project dev-data-dtl-plg-0 | members | roles | |---|---| -|gcp-data-analysts
group|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user)
[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer)
[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer)
[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) | +|gcp-data-analysts
group|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/bigquery.metadataViewer](https://cloud.google.com/iam/docs/understanding-roles#bigquery.metadataViewer)
[roles/bigquery.user](https://cloud.google.com/iam/docs/understanding-roles#bigquery.user)
[roles/datacatalog.tagTemplateViewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.tagTemplateViewer)
[roles/datacatalog.viewer](https://cloud.google.com/iam/docs/understanding-roles#datacatalog.viewer)
[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) | |gcp-data-engineers
group|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) | +|SERVICE_IDENTITY_service-networking
serviceAccount|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) +| ## Project dev-data-lnd-0 @@ -62,37 +70,40 @@ Legend: + additive, conditional. | members | roles | |---|---| |gcp-data-engineers
group|[roles/compute.viewer](https://cloud.google.com/iam/docs/understanding-roles#compute.viewer)
[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin)
[roles/dataflow.developer](https://cloud.google.com/iam/docs/understanding-roles#dataflow.developer)
[roles/viewer](https://cloud.google.com/iam/docs/understanding-roles#viewer) | +|SERVICE_IDENTITY_dataflow-service-producer-prod
serviceAccount|[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) | +|SERVICE_IDENTITY_service-networking
serviceAccount|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) +| |dev-data-load-df-0
serviceAccount|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin)
[roles/dataflow.worker](https://cloud.google.com/iam/docs/understanding-roles#dataflow.worker)
[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) | |dev-data-orc-cmp-0
serviceAccount|[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) | -|service-426128559612
serviceAccount|[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) | ## Project dev-data-orc-0 | members | roles | |---|---| |gcp-data-engineers
group|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/cloudbuild.builds.editor](https://cloud.google.com/iam/docs/understanding-roles#cloudbuild.builds.editor)
[roles/composer.admin](https://cloud.google.com/iam/docs/understanding-roles#composer.admin)
[roles/composer.environmentAndStorageObjectAdmin](https://cloud.google.com/iam/docs/understanding-roles#composer.environmentAndStorageObjectAdmin)
[roles/iam.serviceAccountUser](https://cloud.google.com/iam/docs/understanding-roles#iam.serviceAccountUser)
[roles/iap.httpsResourceAccessor](https://cloud.google.com/iam/docs/understanding-roles#iap.httpsResourceAccessor)
[roles/storage.admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin)
[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) | +|SERVICE_IDENTITY_cloudcomposer-accounts
serviceAccount|[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) | +|SERVICE_IDENTITY_service-networking
serviceAccount|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) +| |dev-data-load-df-0
serviceAccount|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor)
[roles/storage.objectViewer](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) | |dev-data-orc-cmp-0
serviceAccount|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/composer.worker](https://cloud.google.com/iam/docs/understanding-roles#composer.worker)
[roles/iam.serviceAccountUser](https://cloud.google.com/iam/docs/understanding-roles#iam.serviceAccountUser)
[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) | |dev-data-trf-df-0
serviceAccount|[roles/bigquery.dataEditor](https://cloud.google.com/iam/docs/understanding-roles#bigquery.dataEditor) | -|service-36960036774
serviceAccount|[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) | ## Project dev-data-trf-0 | members | roles | |---|---| |gcp-data-engineers
group|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser)
[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) | +|SERVICE_IDENTITY_dataflow-service-producer-prod
serviceAccount|[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) | +|SERVICE_IDENTITY_service-networking
serviceAccount|[roles/servicenetworking.serviceAgent](https://cloud.google.com/iam/docs/understanding-roles#servicenetworking.serviceAgent) +| |dev-data-orc-cmp-0
serviceAccount|[roles/dataflow.admin](https://cloud.google.com/iam/docs/understanding-roles#dataflow.admin) | |dev-data-trf-bq-0
serviceAccount|[roles/bigquery.jobUser](https://cloud.google.com/iam/docs/understanding-roles#bigquery.jobUser) | |dev-data-trf-df-0
serviceAccount|[roles/dataflow.worker](https://cloud.google.com/iam/docs/understanding-roles#dataflow.worker)
[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) | -|service-883871192228
serviceAccount|[roles/storage.objectAdmin](https://cloud.google.com/iam/docs/understanding-roles#storage.objectAdmin) | ## Project dev-net-spoke-0 | members | roles | |---|---| -|36960036774
serviceAccount|[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) +| +|PROJECT_CLOUD_SERVICES
serviceAccount|[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) +| +|SERVICE_IDENTITY_cloudcomposer-accounts
serviceAccount|[roles/composer.sharedVpcAgent](https://cloud.google.com/iam/docs/understanding-roles#composer.sharedVpcAgent) +| +|SERVICE_IDENTITY_container-engine-robot
serviceAccount|[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) +
[roles/container.hostServiceAgentUser](https://cloud.google.com/iam/docs/understanding-roles#container.hostServiceAgentUser) +| +|SERVICE_IDENTITY_dataflow-service-producer-prod
serviceAccount|[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) +
[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) +
[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) +
[roles/container.hostServiceAgentUser](https://cloud.google.com/iam/docs/understanding-roles#container.hostServiceAgentUser) +| |dev-data-load-df-0
serviceAccount|[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) +| |dev-data-trf-df-0
serviceAccount|[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) +| -|service-36960036774
serviceAccount|[roles/composer.sharedVpcAgent](https://cloud.google.com/iam/docs/understanding-roles#composer.sharedVpcAgent) +
[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) +
[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) +
[roles/container.hostServiceAgentUser](https://cloud.google.com/iam/docs/understanding-roles#container.hostServiceAgentUser) +
[roles/container.hostServiceAgentUser](https://cloud.google.com/iam/docs/understanding-roles#container.hostServiceAgentUser) +| -|service-426128559612
serviceAccount|[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) +| -|service-883871192228
serviceAccount|[roles/compute.networkUser](https://cloud.google.com/iam/docs/understanding-roles#compute.networkUser) +| diff --git a/tools/state_iam.py b/tools/state_iam.py index 7817a7ee..547c94b4 100755 --- a/tools/state_iam.py +++ b/tools/state_iam.py @@ -65,10 +65,22 @@ def get_bindings(resources, prefix=None, folders=None): member_type, _, member_id = member.partition(':') if member_type == 'user': continue - member_id = member_id.rpartition('@')[0] + member_id,member_domain = member_id.split('@',1) + # Handle Cloud Services Service Account + if member_domain == 'cloudservices.gserviceaccount.com': + member_id = "PROJECT_CLOUD_SERVICES" + # Handle Cloud Service Identity Service Acocunt + if re.match("^service-\d{8}", member_id): + member_id = "SERVICE_IDENTITY_" + member_domain.split(".", 1)[0] + # Handle BQ Cloud Service Identity Service Acocunt + if re.match("^bq-\d{8}", member_id): + member_id = "IDENTITY_" + member_domain.split(".", 1)[0] + resource_type_output = "Service Identity - " + resource_type + else: + resource_type_output = resource_type if prefix and member_id.startswith(prefix): member_id = member_id[len(prefix) + 1:] - yield Binding(authoritative, resource_type, resource_id, role, + yield Binding(authoritative, resource_type_output, resource_id, role, member_type, member_id, conditions) From 9011e370b887cb4457775c312aa2624f20ecb6c6 Mon Sep 17 00:00:00 2001 From: Lorenzo Caggioni Date: Mon, 4 Apr 2022 00:45:27 +0200 Subject: [PATCH 13/15] Fix linting --- tools/state_iam.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/state_iam.py b/tools/state_iam.py index 547c94b4..42f9f76e 100755 --- a/tools/state_iam.py +++ b/tools/state_iam.py @@ -65,7 +65,7 @@ def get_bindings(resources, prefix=None, folders=None): member_type, _, member_id = member.partition(':') if member_type == 'user': continue - member_id,member_domain = member_id.split('@',1) + member_id, member_domain = member_id.split('@', 1) # Handle Cloud Services Service Account if member_domain == 'cloudservices.gserviceaccount.com': member_id = "PROJECT_CLOUD_SERVICES" @@ -75,7 +75,7 @@ def get_bindings(resources, prefix=None, folders=None): # Handle BQ Cloud Service Identity Service Acocunt if re.match("^bq-\d{8}", member_id): member_id = "IDENTITY_" + member_domain.split(".", 1)[0] - resource_type_output = "Service Identity - " + resource_type + resource_type_output = "Service Identity - " + resource_type else: resource_type_output = resource_type if prefix and member_id.startswith(prefix): From d5fd15ae17dd5844d3a55ec3c2be688386165650 Mon Sep 17 00:00:00 2001 From: Lorenzo Caggioni Date: Mon, 4 Apr 2022 08:59:05 +0200 Subject: [PATCH 14/15] Update README --- examples/data-solutions/data-platform-foundations/README.md | 5 ----- 1 file changed, 5 deletions(-) diff --git a/examples/data-solutions/data-platform-foundations/README.md b/examples/data-solutions/data-platform-foundations/README.md index 99100f17..09f8e63a 100644 --- a/examples/data-solutions/data-platform-foundations/README.md +++ b/examples/data-solutions/data-platform-foundations/README.md @@ -264,8 +264,3 @@ Features to add in future releases: - Add example on how to use Cloud Data Loss Prevention - Add solution to handle Tables, Views, and Authorized Views lifecycle - Add solution to handle Metadata lifecycle - -## To Test/Fix - -- Composer require "Require OS Login" not enforced -- External Shared-VPC From f1a404a5d4381dfe9c99571083c89845bbfb5135 Mon Sep 17 00:00:00 2001 From: Lorenzo Caggioni Date: Mon, 4 Apr 2022 15:38:36 +0200 Subject: [PATCH 15/15] PR review fixes. --- .../data-solutions/data-platform-foundations/03-composer.tf | 2 +- examples/data-solutions/data-platform-foundations/06-common.tf | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/examples/data-solutions/data-platform-foundations/03-composer.tf b/examples/data-solutions/data-platform-foundations/03-composer.tf index 5a107534..fac47ec5 100644 --- a/examples/data-solutions/data-platform-foundations/03-composer.tf +++ b/examples/data-solutions/data-platform-foundations/03-composer.tf @@ -67,7 +67,7 @@ resource "google_composer_environment" "orch-cmp-0" { env_variables = merge( var.composer_config.env_variables, { BQ_LOCATION = var.location - DATA_CAT_TAGS = try(jsonencode(module.common-datacatalog[0].tags), "{}") + DATA_CAT_TAGS = try(jsonencode(module.common-datacatalog.tags), "{}") DF_KMS_KEY = try(var.service_encryption_keys.dataflow, "") DTL_L0_PRJ = module.lake-0-project.project_id DTL_L0_BQ_DATASET = module.lake-0-bq-0.dataset_id diff --git a/examples/data-solutions/data-platform-foundations/06-common.tf b/examples/data-solutions/data-platform-foundations/06-common.tf index b04e2f87..80451500 100644 --- a/examples/data-solutions/data-platform-foundations/06-common.tf +++ b/examples/data-solutions/data-platform-foundations/06-common.tf @@ -60,10 +60,9 @@ module "common-project" { # Data Catalog Policy tag module "common-datacatalog" { - count = var.data_catalog_tags != null ? 1 : 0 source = "../../../modules/data-catalog-policy-tag" - name = "${var.prefix}-datacatalog-policy-tags" project_id = module.common-project.project_id + name = "${var.prefix}-datacatalog-policy-tags" location = var.location tags = var.data_catalog_tags }