Improve Dataplex (#1519)

* First commit. * Implement fixes. * fix google_dataplex_zone_iam_binding
2023-07-24 10:52:07 +02:00 · 2023-07-24 10:52:07 +02:00 · d46312a7f1
parent dea6b5ef7c
commit d46312a7f1
6 changed files with 155 additions and 37 deletions
--- a/modules/cloud-dataplex/README.md
+++ b/modules/cloud-dataplex/README.md
@ -1,7 +1,6 @@
 # Cloud Dataplex instance with lake, zone & assests

-This module manages the creation of Cloud Dataplex instance along with lake, zone & assets in single regions. 
-
+This module manages the creation of Cloud Dataplex instance along with lake, zone & assets in single regions.

 ## Simple example

@ -16,27 +15,27 @@ module "dataplex" {
  project_id = "myproject"
  region     = "europe-west2"
  zones = {
-    zone_1 = {
+    landing = {
      type      = "RAW"
      discovery = true
      assets = {
-        asset_1 = {
-          bucket_name            = "asset_1"
+        gcs_1 = {
+          resource_name          = "gcs_bucket"
          cron_schedule          = "15 15 * * *"
          discovery_spec_enabled = true
          resource_spec_type     = "STORAGE_BUCKET"
        }
      }
    },
-    zone_2 = {
+    curated = {
      type      = "CURATED"
-      discovery = true
+      discovery = false
      assets = {
-        asset_2 = {
-          bucket_name            = "asset_2"
-          cron_schedule          = "15 15 * * *"
-          discovery_spec_enabled = true
-          resource_spec_type     = "STORAGE_BUCKET"
+        bq_1 = {
+          resource_name          = "bq_dataset"
+          cron_schedule          = null
+          discovery_spec_enabled = false
+          resource_spec_type     = "BIGQUERY_DATASET"
        }
      }
    }
@ -45,10 +44,68 @@ module "dataplex" {

 # tftest modules=1 resources=5
 ```
+
+## IAM
+
+This example shows how to setup a Cloud Dataplex instance, lake, zone & asset creation in GCP project assigning IAM roles at lake and zone level.
+
+```hcl
+
+module "dataplex" {
+  source     = "./fabric/modules/cloud-dataplex"
+  name       = "lake"
+  prefix     = "test"
+  project_id = "myproject"
+  region     = "europe-west2"
+  iam = {
+    "roles/dataplex.viewer" = [
+      "group:analysts@example.com",
+      "group:analysts_sensitive@example.com"
+    ]
+  }
+  zones = {
+    landing = {
+      type      = "RAW"
+      discovery = true
+      assets = {
+        gcs_1 = {
+          resource_name          = "gcs_bucket"
+          cron_schedule          = "15 15 * * *"
+          discovery_spec_enabled = true
+          resource_spec_type     = "STORAGE_BUCKET"
+        }
+      }
+    },
+    curated = {
+      type      = "CURATED"
+      discovery = false
+      iam = {
+        "roles/viewer" = [
+          "group:analysts@example.com",
+          "group:analysts_sensitive@example.com"
+        ]
+        "roles/dataplex.dataReader" = [
+          "group:analysts@example.com",
+          "group:analysts_sensitive@example.com"
+        ]
+      }
+      assets = {
+        bq_1 = {
+          resource_name          = "bq_dataset"
+          cron_schedule          = null
+          discovery_spec_enabled = false
+          resource_spec_type     = "BIGQUERY_DATASET"
+        }
+      }
+    }
+  }
+}
+
+# tftest modules=1 resources=8
+```
+
 ## TODO

- [ ] Add IAM support
- [ ] support different type of assets
 - [ ] support multi-regions
 <!-- BEGIN TFDOC -->

@ -56,12 +113,13 @@ module "dataplex" {

 | name | description | type | required | default |
 |---|---|:---:|:---:|:---:|
-| [name](variables.tf#L23) | Name of Dataplex Lake. | <code>string</code> | ✓ |  |
-| [prefix](variables.tf#L28) | Optional prefix used to generate Dataplex Lake. | <code>string</code> | ✓ |  |
-| [project_id](variables.tf#L33) | The ID of the project where this Dataplex Lake will be created. | <code>string</code> | ✓ |  |
-| [region](variables.tf#L38) | Region of the Dataplax Lake. | <code>string</code> | ✓ |  |
-| [zones](variables.tf#L43) | Dataplex lake zones, such as `RAW` and `CURATED`. | <code title="map&#40;object&#40;&#123;&#10;  type      &#61; string&#10;  discovery &#61; optional&#40;bool, true&#41;&#10;  assets &#61; map&#40;object&#40;&#123;&#10;    bucket_name            &#61; string&#10;    cron_schedule          &#61; optional&#40;string, &#34;15 15 &#42; &#42; &#42;&#34;&#41;&#10;    discovery_spec_enabled &#61; optional&#40;bool, true&#41;&#10;    resource_spec_type     &#61; optional&#40;string, &#34;STORAGE_BUCKET&#34;&#41;&#10;  &#125;&#41;&#41;&#10;&#125;&#41;&#41;">map&#40;object&#40;&#123;&#8230;&#125;&#41;&#41;</code> | ✓ |  |
-| [location_type](variables.tf#L17) | The location type of the Dataplax Lake. | <code>string</code> |  | <code>&#34;SINGLE_REGION&#34;</code> |
+| [name](variables.tf#L30) | Name of Dataplex Lake. | <code>string</code> | ✓ |  |
+| [project_id](variables.tf#L41) | The ID of the project where this Dataplex Lake will be created. | <code>string</code> | ✓ |  |
+| [region](variables.tf#L46) | Region of the Dataplax Lake. | <code>string</code> | ✓ |  |
+| [zones](variables.tf#L51) | Dataplex lake zones, such as `RAW` and `CURATED`. | <code title="map&#40;object&#40;&#123;&#10;  type      &#61; string&#10;  discovery &#61; optional&#40;bool, true&#41;&#10;  iam       &#61; optional&#40;map&#40;list&#40;string&#41;&#41;, null&#41;&#10;  assets &#61; map&#40;object&#40;&#123;&#10;    resource_name          &#61; string&#10;    resource_project       &#61; optional&#40;string&#41;&#10;    cron_schedule          &#61; optional&#40;string, &#34;15 15 &#42; &#42; &#42;&#34;&#41;&#10;    discovery_spec_enabled &#61; optional&#40;bool, true&#41;&#10;    resource_spec_type     &#61; optional&#40;string, &#34;STORAGE_BUCKET&#34;&#41;&#10;  &#125;&#41;&#41;&#10;&#125;&#41;&#41;">map&#40;object&#40;&#123;&#8230;&#125;&#41;&#41;</code> | ✓ |  |
+| [iam](variables.tf#L17) | Dataplex lake IAM bindings in {ROLE => [MEMBERS]} format. | <code>map&#40;list&#40;string&#41;&#41;</code> |  | <code>&#123;&#125;</code> |
+| [location_type](variables.tf#L24) | The location type of the Dataplax Lake. | <code>string</code> |  | <code>&#34;SINGLE_REGION&#34;</code> |
+| [prefix](variables.tf#L35) | Optional prefix used to generate Dataplex Lake. | <code>string</code> |  | <code>null</code> |

 ## Outputs

--- a/modules/cloud-dataplex/main.tf
+++ b/modules/cloud-dataplex/main.tf
@ -21,28 +21,54 @@ locals {
      for asset, asset_data in zones_info.assets : {
        zone_name              = zone
        asset_name             = asset
-        bucket_name            = asset_data.bucket_name
-        cron_schedule          = asset_data.cron_schedule
+        resource_name          = asset_data.resource_name
+        resource_project       = coalesce(asset_data.resource_project, var.project_id)
+        cron_schedule          = asset_data.discovery_spec_enabled ? asset_data.cron_schedule : null
        discovery_spec_enabled = asset_data.discovery_spec_enabled
        resource_spec_type     = asset_data.resource_spec_type
      }
    ]
  ])
+
+  zone_iam = flatten([
+    for zone, zone_details in var.zones : [
+      for role, members in zone_details.iam : {
+        "zone"    = zone
+        "role"    = role
+        "members" = members
+      }
+    ] if zone_details.iam != null
+  ])
+
+  resource_type_mapping = {
+    "STORAGE_BUCKET" : "buckets",
+    "BIGQUERY_DATASET" : "datasets"
+  }
 }

-resource "google_dataplex_lake" "basic_lake" {
+resource "google_dataplex_lake" "lake" {
  name     = "${local.prefix}${var.name}"
  location = var.region
  provider = google-beta
  project  = var.project_id
 }

-resource "google_dataplex_zone" "basic_zone" {
+resource "google_dataplex_lake_iam_binding" "binding" {
+  for_each = var.iam
+  project  = var.project_id
+  location = var.region
+  lake     = google_dataplex_lake.lake.name
+  role     = each.key
+  members  = each.value
+}
+
+resource "google_dataplex_zone" "zone" {
  for_each = var.zones
+  provider = google-beta
+  project  = var.project_id
  name     = each.key
  location = var.region
-  provider = google-beta
-  lake     = google_dataplex_lake.basic_lake.name
+  lake     = google_dataplex_lake.lake.name
  type     = each.value.type

  discovery_spec {
@ -52,11 +78,21 @@ resource "google_dataplex_zone" "basic_zone" {
  resource_spec {
    location_type = var.location_type
  }
-
-  project = var.project_id
 }

-resource "google_dataplex_asset" "primary" {
+resource "google_dataplex_zone_iam_binding" "binding" {
+  for_each = {
+    for zone_role in local.zone_iam : "${zone_role.zone}-${zone_role.role}" => zone_role
+  }
+  project       = var.project_id
+  location      = var.region
+  lake          = google_dataplex_lake.lake.name
+  dataplex_zone = google_dataplex_zone.zone[each.value.zone].name
+  role          = each.value.role
+  members       = each.value.members
+}
+
+resource "google_dataplex_asset" "asset" {
  for_each = {
    for tm in local.zone_assets : "${tm.zone_name}-${tm.asset_name}" => tm
  }
@ -64,8 +100,8 @@ resource "google_dataplex_asset" "primary" {
  location = var.region
  provider = google-beta

-  lake          = google_dataplex_lake.basic_lake.name
-  dataplex_zone = google_dataplex_zone.basic_zone[each.value.zone_name].name
+  lake          = google_dataplex_lake.lake.name
+  dataplex_zone = google_dataplex_zone.zone[each.value.zone_name].name

  discovery_spec {
    enabled  = each.value.discovery_spec_enabled
@ -73,7 +109,11 @@ resource "google_dataplex_asset" "primary" {
  }

  resource_spec {
-    name = "projects/${var.project_id}/buckets/${each.value.bucket_name}"
+    name = format("projects/%s/%s/%s",
+      each.value.resource_project,
+      local.resource_type_mapping[each.value.resource_spec_type],
+      each.value.resource_name
+    )
    type = each.value.resource_spec_type
  }
  project = var.project_id
--- a/modules/cloud-dataplex/outputs.tf
+++ b/modules/cloud-dataplex/outputs.tf
@ -16,21 +16,21 @@

 output "assets" {
  description = "Assets attached to the lake of Dataplex Lake."
-  value       = local.zone_assets[*]["asset_name"]
+  value       = local.zone_assets[*]
 }

 output "id" {
  description = "Fully qualified Dataplex Lake id."
-  value       = google_dataplex_lake.basic_lake.id
+  value       = google_dataplex_lake.lake.id
 }

 output "lake" {
  description = "The lake name of Dataplex Lake."
-  value       = google_dataplex_lake.basic_lake.name
+  value       = google_dataplex_lake.lake.name
 }

 output "zones" {
  description = "The zone name of Dataplex Lake."
-  value       = local.zone_assets[*]["zone_name"]
+  value       = distinct(local.zone_assets[*]["zone_name"])
 }

--- a/modules/cloud-dataplex/variables.tf
+++ b/modules/cloud-dataplex/variables.tf
@ -14,6 +14,13 @@
 * limitations under the License.
 */

+variable "iam" {
+  description = "Dataplex lake IAM bindings in {ROLE => [MEMBERS]} format."
+  type        = map(list(string))
+  default     = {}
+  nullable    = false
+}
+
 variable "location_type" {
  description = "The location type of the Dataplax Lake."
  type        = string
@ -28,6 +35,7 @@ variable "name" {
 variable "prefix" {
  description = "Optional prefix used to generate Dataplex Lake."
  type        = string
+  default     = null
 }

 variable "project_id" {
@ -45,11 +53,21 @@ variable "zones" {
  type = map(object({
    type      = string
    discovery = optional(bool, true)
+    iam       = optional(map(list(string)), null)
    assets = map(object({
-      bucket_name            = string
+      resource_name          = string
+      resource_project       = optional(string)
      cron_schedule          = optional(string, "15 15 * * *")
      discovery_spec_enabled = optional(bool, true)
      resource_spec_type     = optional(string, "STORAGE_BUCKET")
    }))
  }))
+  validation {
+    condition = alltrue(flatten([
+      for k, v in var.zones : [
+        for kk, vv in v.assets : contains(["BIGQUERY_DATASET", "STORAGE_BUCKET"], vv.resource_spec_type)
+      ]
+    ]))
+    error_message = "Asset spect type must be one of 'BIGQUERY_DATASET' or 'STORAGE_BUCKET'."
+  }
 }
--- a/modules/project/README.md
+++ b/modules/project/README.md
@ -209,6 +209,7 @@ This table lists all affected services and roles that you need to grant to servi
 | artifactregistry.googleapis.com | artifactregistry | roles/artifactregistry.serviceAgent |
 | cloudasset.googleapis.com | cloudasset | roles/cloudasset.serviceAgent |
 | cloudbuild.googleapis.com | cloudbuild | roles/cloudbuild.builds.builder |
+| dataplex.googleapis.com | dataplex | roles/dataplex.serviceAgent |
 | gkehub.googleapis.com | fleet | roles/gkehub.serviceAgent |
 | meshconfig.googleapis.com | servicemesh | roles/anthosservicemesh.serviceAgent |
 | multiclusteringress.googleapis.com | multicluster-ingress | roles/multiclusteringress.serviceAgent |
--- a/modules/project/service-agents.yaml
+++ b/modules/project/service-agents.yaml
@ -155,6 +155,7 @@
  service_agent: "service-%s@gcp-sa-datapipelines.iam.gserviceaccount.com"
 - name: "dataplex"
  service_agent: "service-%s@gcp-sa-dataplex.iam.gserviceaccount.com"
+  jit: true # roles/dataplex.serviceAgent
 - name: "dataproc"
  service_agent: "service-%s@dataproc-accounts.iam.gserviceaccount.com"
 - name: "datastream"