From d46312a7f1f5ba533ea9791efdcdc62790203bc5 Mon Sep 17 00:00:00 2001 From: lcaggio Date: Mon, 24 Jul 2023 10:52:07 +0200 Subject: [PATCH] Improve Dataplex (#1519) * First commit. * Implement fixes. * fix google_dataplex_zone_iam_binding --- modules/cloud-dataplex/README.md | 98 +++++++++++++++++++++++------ modules/cloud-dataplex/main.tf | 64 +++++++++++++++---- modules/cloud-dataplex/outputs.tf | 8 +-- modules/cloud-dataplex/variables.tf | 20 +++++- modules/project/README.md | 1 + modules/project/service-agents.yaml | 1 + 6 files changed, 155 insertions(+), 37 deletions(-) diff --git a/modules/cloud-dataplex/README.md b/modules/cloud-dataplex/README.md index 6849db84..c3c02a7c 100644 --- a/modules/cloud-dataplex/README.md +++ b/modules/cloud-dataplex/README.md @@ -1,7 +1,6 @@ # Cloud Dataplex instance with lake, zone & assests -This module manages the creation of Cloud Dataplex instance along with lake, zone & assets in single regions. - +This module manages the creation of Cloud Dataplex instance along with lake, zone & assets in single regions. ## Simple example @@ -16,27 +15,27 @@ module "dataplex" { project_id = "myproject" region = "europe-west2" zones = { - zone_1 = { + landing = { type = "RAW" discovery = true assets = { - asset_1 = { - bucket_name = "asset_1" + gcs_1 = { + resource_name = "gcs_bucket" cron_schedule = "15 15 * * *" discovery_spec_enabled = true resource_spec_type = "STORAGE_BUCKET" } } }, - zone_2 = { + curated = { type = "CURATED" - discovery = true + discovery = false assets = { - asset_2 = { - bucket_name = "asset_2" - cron_schedule = "15 15 * * *" - discovery_spec_enabled = true - resource_spec_type = "STORAGE_BUCKET" + bq_1 = { + resource_name = "bq_dataset" + cron_schedule = null + discovery_spec_enabled = false + resource_spec_type = "BIGQUERY_DATASET" } } } @@ -45,10 +44,68 @@ module "dataplex" { # tftest modules=1 resources=5 ``` + +## IAM + +This example shows how to setup a Cloud Dataplex instance, lake, zone & asset creation in GCP project assigning IAM roles at lake and zone level. + +```hcl + +module "dataplex" { + source = "./fabric/modules/cloud-dataplex" + name = "lake" + prefix = "test" + project_id = "myproject" + region = "europe-west2" + iam = { + "roles/dataplex.viewer" = [ + "group:analysts@example.com", + "group:analysts_sensitive@example.com" + ] + } + zones = { + landing = { + type = "RAW" + discovery = true + assets = { + gcs_1 = { + resource_name = "gcs_bucket" + cron_schedule = "15 15 * * *" + discovery_spec_enabled = true + resource_spec_type = "STORAGE_BUCKET" + } + } + }, + curated = { + type = "CURATED" + discovery = false + iam = { + "roles/viewer" = [ + "group:analysts@example.com", + "group:analysts_sensitive@example.com" + ] + "roles/dataplex.dataReader" = [ + "group:analysts@example.com", + "group:analysts_sensitive@example.com" + ] + } + assets = { + bq_1 = { + resource_name = "bq_dataset" + cron_schedule = null + discovery_spec_enabled = false + resource_spec_type = "BIGQUERY_DATASET" + } + } + } + } +} + +# tftest modules=1 resources=8 +``` + ## TODO -- [ ] Add IAM support -- [ ] support different type of assets - [ ] support multi-regions @@ -56,12 +113,13 @@ module "dataplex" { | name | description | type | required | default | |---|---|:---:|:---:|:---:| -| [name](variables.tf#L23) | Name of Dataplex Lake. | string | ✓ | | -| [prefix](variables.tf#L28) | Optional prefix used to generate Dataplex Lake. | string | ✓ | | -| [project_id](variables.tf#L33) | The ID of the project where this Dataplex Lake will be created. | string | ✓ | | -| [region](variables.tf#L38) | Region of the Dataplax Lake. | string | ✓ | | -| [zones](variables.tf#L43) | Dataplex lake zones, such as `RAW` and `CURATED`. | map(object({…})) | ✓ | | -| [location_type](variables.tf#L17) | The location type of the Dataplax Lake. | string | | "SINGLE_REGION" | +| [name](variables.tf#L30) | Name of Dataplex Lake. | string | ✓ | | +| [project_id](variables.tf#L41) | The ID of the project where this Dataplex Lake will be created. | string | ✓ | | +| [region](variables.tf#L46) | Region of the Dataplax Lake. | string | ✓ | | +| [zones](variables.tf#L51) | Dataplex lake zones, such as `RAW` and `CURATED`. | map(object({…})) | ✓ | | +| [iam](variables.tf#L17) | Dataplex lake IAM bindings in {ROLE => [MEMBERS]} format. | map(list(string)) | | {} | +| [location_type](variables.tf#L24) | The location type of the Dataplax Lake. | string | | "SINGLE_REGION" | +| [prefix](variables.tf#L35) | Optional prefix used to generate Dataplex Lake. | string | | null | ## Outputs diff --git a/modules/cloud-dataplex/main.tf b/modules/cloud-dataplex/main.tf index af5ef018..b78ca548 100644 --- a/modules/cloud-dataplex/main.tf +++ b/modules/cloud-dataplex/main.tf @@ -21,28 +21,54 @@ locals { for asset, asset_data in zones_info.assets : { zone_name = zone asset_name = asset - bucket_name = asset_data.bucket_name - cron_schedule = asset_data.cron_schedule + resource_name = asset_data.resource_name + resource_project = coalesce(asset_data.resource_project, var.project_id) + cron_schedule = asset_data.discovery_spec_enabled ? asset_data.cron_schedule : null discovery_spec_enabled = asset_data.discovery_spec_enabled resource_spec_type = asset_data.resource_spec_type } ] ]) + + zone_iam = flatten([ + for zone, zone_details in var.zones : [ + for role, members in zone_details.iam : { + "zone" = zone + "role" = role + "members" = members + } + ] if zone_details.iam != null + ]) + + resource_type_mapping = { + "STORAGE_BUCKET" : "buckets", + "BIGQUERY_DATASET" : "datasets" + } } -resource "google_dataplex_lake" "basic_lake" { +resource "google_dataplex_lake" "lake" { name = "${local.prefix}${var.name}" location = var.region provider = google-beta project = var.project_id } -resource "google_dataplex_zone" "basic_zone" { +resource "google_dataplex_lake_iam_binding" "binding" { + for_each = var.iam + project = var.project_id + location = var.region + lake = google_dataplex_lake.lake.name + role = each.key + members = each.value +} + +resource "google_dataplex_zone" "zone" { for_each = var.zones + provider = google-beta + project = var.project_id name = each.key location = var.region - provider = google-beta - lake = google_dataplex_lake.basic_lake.name + lake = google_dataplex_lake.lake.name type = each.value.type discovery_spec { @@ -52,11 +78,21 @@ resource "google_dataplex_zone" "basic_zone" { resource_spec { location_type = var.location_type } - - project = var.project_id } -resource "google_dataplex_asset" "primary" { +resource "google_dataplex_zone_iam_binding" "binding" { + for_each = { + for zone_role in local.zone_iam : "${zone_role.zone}-${zone_role.role}" => zone_role + } + project = var.project_id + location = var.region + lake = google_dataplex_lake.lake.name + dataplex_zone = google_dataplex_zone.zone[each.value.zone].name + role = each.value.role + members = each.value.members +} + +resource "google_dataplex_asset" "asset" { for_each = { for tm in local.zone_assets : "${tm.zone_name}-${tm.asset_name}" => tm } @@ -64,8 +100,8 @@ resource "google_dataplex_asset" "primary" { location = var.region provider = google-beta - lake = google_dataplex_lake.basic_lake.name - dataplex_zone = google_dataplex_zone.basic_zone[each.value.zone_name].name + lake = google_dataplex_lake.lake.name + dataplex_zone = google_dataplex_zone.zone[each.value.zone_name].name discovery_spec { enabled = each.value.discovery_spec_enabled @@ -73,7 +109,11 @@ resource "google_dataplex_asset" "primary" { } resource_spec { - name = "projects/${var.project_id}/buckets/${each.value.bucket_name}" + name = format("projects/%s/%s/%s", + each.value.resource_project, + local.resource_type_mapping[each.value.resource_spec_type], + each.value.resource_name + ) type = each.value.resource_spec_type } project = var.project_id diff --git a/modules/cloud-dataplex/outputs.tf b/modules/cloud-dataplex/outputs.tf index 7a68ff28..0da4fcc2 100644 --- a/modules/cloud-dataplex/outputs.tf +++ b/modules/cloud-dataplex/outputs.tf @@ -16,21 +16,21 @@ output "assets" { description = "Assets attached to the lake of Dataplex Lake." - value = local.zone_assets[*]["asset_name"] + value = local.zone_assets[*] } output "id" { description = "Fully qualified Dataplex Lake id." - value = google_dataplex_lake.basic_lake.id + value = google_dataplex_lake.lake.id } output "lake" { description = "The lake name of Dataplex Lake." - value = google_dataplex_lake.basic_lake.name + value = google_dataplex_lake.lake.name } output "zones" { description = "The zone name of Dataplex Lake." - value = local.zone_assets[*]["zone_name"] + value = distinct(local.zone_assets[*]["zone_name"]) } diff --git a/modules/cloud-dataplex/variables.tf b/modules/cloud-dataplex/variables.tf index bbbba1c7..fa4e6521 100644 --- a/modules/cloud-dataplex/variables.tf +++ b/modules/cloud-dataplex/variables.tf @@ -14,6 +14,13 @@ * limitations under the License. */ +variable "iam" { + description = "Dataplex lake IAM bindings in {ROLE => [MEMBERS]} format." + type = map(list(string)) + default = {} + nullable = false +} + variable "location_type" { description = "The location type of the Dataplax Lake." type = string @@ -28,6 +35,7 @@ variable "name" { variable "prefix" { description = "Optional prefix used to generate Dataplex Lake." type = string + default = null } variable "project_id" { @@ -45,11 +53,21 @@ variable "zones" { type = map(object({ type = string discovery = optional(bool, true) + iam = optional(map(list(string)), null) assets = map(object({ - bucket_name = string + resource_name = string + resource_project = optional(string) cron_schedule = optional(string, "15 15 * * *") discovery_spec_enabled = optional(bool, true) resource_spec_type = optional(string, "STORAGE_BUCKET") })) })) + validation { + condition = alltrue(flatten([ + for k, v in var.zones : [ + for kk, vv in v.assets : contains(["BIGQUERY_DATASET", "STORAGE_BUCKET"], vv.resource_spec_type) + ] + ])) + error_message = "Asset spect type must be one of 'BIGQUERY_DATASET' or 'STORAGE_BUCKET'." + } } diff --git a/modules/project/README.md b/modules/project/README.md index d44aca57..127f215f 100644 --- a/modules/project/README.md +++ b/modules/project/README.md @@ -209,6 +209,7 @@ This table lists all affected services and roles that you need to grant to servi | artifactregistry.googleapis.com | artifactregistry | roles/artifactregistry.serviceAgent | | cloudasset.googleapis.com | cloudasset | roles/cloudasset.serviceAgent | | cloudbuild.googleapis.com | cloudbuild | roles/cloudbuild.builds.builder | +| dataplex.googleapis.com | dataplex | roles/dataplex.serviceAgent | | gkehub.googleapis.com | fleet | roles/gkehub.serviceAgent | | meshconfig.googleapis.com | servicemesh | roles/anthosservicemesh.serviceAgent | | multiclusteringress.googleapis.com | multicluster-ingress | roles/multiclusteringress.serviceAgent | diff --git a/modules/project/service-agents.yaml b/modules/project/service-agents.yaml index 856c650b..4ef3cafd 100644 --- a/modules/project/service-agents.yaml +++ b/modules/project/service-agents.yaml @@ -155,6 +155,7 @@ service_agent: "service-%s@gcp-sa-datapipelines.iam.gserviceaccount.com" - name: "dataplex" service_agent: "service-%s@gcp-sa-dataplex.iam.gserviceaccount.com" + jit: true # roles/dataplex.serviceAgent - name: "dataproc" service_agent: "service-%s@dataproc-accounts.iam.gserviceaccount.com" - name: "datastream"