Improve Dataplex (#1519)

* First commit.

* Implement fixes.

* fix google_dataplex_zone_iam_binding
This commit is contained in:
lcaggio 2023-07-24 10:52:07 +02:00 committed by GitHub
parent dea6b5ef7c
commit d46312a7f1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 155 additions and 37 deletions

View File

@ -1,7 +1,6 @@
# Cloud Dataplex instance with lake, zone & assests # Cloud Dataplex instance with lake, zone & assests
This module manages the creation of Cloud Dataplex instance along with lake, zone & assets in single regions. This module manages the creation of Cloud Dataplex instance along with lake, zone & assets in single regions.
## Simple example ## Simple example
@ -16,27 +15,27 @@ module "dataplex" {
project_id = "myproject" project_id = "myproject"
region = "europe-west2" region = "europe-west2"
zones = { zones = {
zone_1 = { landing = {
type = "RAW" type = "RAW"
discovery = true discovery = true
assets = { assets = {
asset_1 = { gcs_1 = {
bucket_name = "asset_1" resource_name = "gcs_bucket"
cron_schedule = "15 15 * * *" cron_schedule = "15 15 * * *"
discovery_spec_enabled = true discovery_spec_enabled = true
resource_spec_type = "STORAGE_BUCKET" resource_spec_type = "STORAGE_BUCKET"
} }
} }
}, },
zone_2 = { curated = {
type = "CURATED" type = "CURATED"
discovery = true discovery = false
assets = { assets = {
asset_2 = { bq_1 = {
bucket_name = "asset_2" resource_name = "bq_dataset"
cron_schedule = "15 15 * * *" cron_schedule = null
discovery_spec_enabled = true discovery_spec_enabled = false
resource_spec_type = "STORAGE_BUCKET" resource_spec_type = "BIGQUERY_DATASET"
} }
} }
} }
@ -45,10 +44,68 @@ module "dataplex" {
# tftest modules=1 resources=5 # tftest modules=1 resources=5
``` ```
## IAM
This example shows how to setup a Cloud Dataplex instance, lake, zone & asset creation in GCP project assigning IAM roles at lake and zone level.
```hcl
module "dataplex" {
source = "./fabric/modules/cloud-dataplex"
name = "lake"
prefix = "test"
project_id = "myproject"
region = "europe-west2"
iam = {
"roles/dataplex.viewer" = [
"group:analysts@example.com",
"group:analysts_sensitive@example.com"
]
}
zones = {
landing = {
type = "RAW"
discovery = true
assets = {
gcs_1 = {
resource_name = "gcs_bucket"
cron_schedule = "15 15 * * *"
discovery_spec_enabled = true
resource_spec_type = "STORAGE_BUCKET"
}
}
},
curated = {
type = "CURATED"
discovery = false
iam = {
"roles/viewer" = [
"group:analysts@example.com",
"group:analysts_sensitive@example.com"
]
"roles/dataplex.dataReader" = [
"group:analysts@example.com",
"group:analysts_sensitive@example.com"
]
}
assets = {
bq_1 = {
resource_name = "bq_dataset"
cron_schedule = null
discovery_spec_enabled = false
resource_spec_type = "BIGQUERY_DATASET"
}
}
}
}
}
# tftest modules=1 resources=8
```
## TODO ## TODO
- [ ] Add IAM support
- [ ] support different type of assets
- [ ] support multi-regions - [ ] support multi-regions
<!-- BEGIN TFDOC --> <!-- BEGIN TFDOC -->
@ -56,12 +113,13 @@ module "dataplex" {
| name | description | type | required | default | | name | description | type | required | default |
|---|---|:---:|:---:|:---:| |---|---|:---:|:---:|:---:|
| [name](variables.tf#L23) | Name of Dataplex Lake. | <code>string</code> | ✓ | | | [name](variables.tf#L30) | Name of Dataplex Lake. | <code>string</code> | ✓ | |
| [prefix](variables.tf#L28) | Optional prefix used to generate Dataplex Lake. | <code>string</code> | ✓ | | | [project_id](variables.tf#L41) | The ID of the project where this Dataplex Lake will be created. | <code>string</code> | ✓ | |
| [project_id](variables.tf#L33) | The ID of the project where this Dataplex Lake will be created. | <code>string</code> | ✓ | | | [region](variables.tf#L46) | Region of the Dataplax Lake. | <code>string</code> | ✓ | |
| [region](variables.tf#L38) | Region of the Dataplax Lake. | <code>string</code> | ✓ | | | [zones](variables.tf#L51) | Dataplex lake zones, such as `RAW` and `CURATED`. | <code title="map&#40;object&#40;&#123;&#10; type &#61; string&#10; discovery &#61; optional&#40;bool, true&#41;&#10; iam &#61; optional&#40;map&#40;list&#40;string&#41;&#41;, null&#41;&#10; assets &#61; map&#40;object&#40;&#123;&#10; resource_name &#61; string&#10; resource_project &#61; optional&#40;string&#41;&#10; cron_schedule &#61; optional&#40;string, &#34;15 15 &#42; &#42; &#42;&#34;&#41;&#10; discovery_spec_enabled &#61; optional&#40;bool, true&#41;&#10; resource_spec_type &#61; optional&#40;string, &#34;STORAGE_BUCKET&#34;&#41;&#10; &#125;&#41;&#41;&#10;&#125;&#41;&#41;">map&#40;object&#40;&#123;&#8230;&#125;&#41;&#41;</code> | ✓ | |
| [zones](variables.tf#L43) | Dataplex lake zones, such as `RAW` and `CURATED`. | <code title="map&#40;object&#40;&#123;&#10; type &#61; string&#10; discovery &#61; optional&#40;bool, true&#41;&#10; assets &#61; map&#40;object&#40;&#123;&#10; bucket_name &#61; string&#10; cron_schedule &#61; optional&#40;string, &#34;15 15 &#42; &#42; &#42;&#34;&#41;&#10; discovery_spec_enabled &#61; optional&#40;bool, true&#41;&#10; resource_spec_type &#61; optional&#40;string, &#34;STORAGE_BUCKET&#34;&#41;&#10; &#125;&#41;&#41;&#10;&#125;&#41;&#41;">map&#40;object&#40;&#123;&#8230;&#125;&#41;&#41;</code> | ✓ | | | [iam](variables.tf#L17) | Dataplex lake IAM bindings in {ROLE => [MEMBERS]} format. | <code>map&#40;list&#40;string&#41;&#41;</code> | | <code>&#123;&#125;</code> |
| [location_type](variables.tf#L17) | The location type of the Dataplax Lake. | <code>string</code> | | <code>&#34;SINGLE_REGION&#34;</code> | | [location_type](variables.tf#L24) | The location type of the Dataplax Lake. | <code>string</code> | | <code>&#34;SINGLE_REGION&#34;</code> |
| [prefix](variables.tf#L35) | Optional prefix used to generate Dataplex Lake. | <code>string</code> | | <code>null</code> |
## Outputs ## Outputs

View File

@ -21,28 +21,54 @@ locals {
for asset, asset_data in zones_info.assets : { for asset, asset_data in zones_info.assets : {
zone_name = zone zone_name = zone
asset_name = asset asset_name = asset
bucket_name = asset_data.bucket_name resource_name = asset_data.resource_name
cron_schedule = asset_data.cron_schedule resource_project = coalesce(asset_data.resource_project, var.project_id)
cron_schedule = asset_data.discovery_spec_enabled ? asset_data.cron_schedule : null
discovery_spec_enabled = asset_data.discovery_spec_enabled discovery_spec_enabled = asset_data.discovery_spec_enabled
resource_spec_type = asset_data.resource_spec_type resource_spec_type = asset_data.resource_spec_type
} }
] ]
]) ])
zone_iam = flatten([
for zone, zone_details in var.zones : [
for role, members in zone_details.iam : {
"zone" = zone
"role" = role
"members" = members
}
] if zone_details.iam != null
])
resource_type_mapping = {
"STORAGE_BUCKET" : "buckets",
"BIGQUERY_DATASET" : "datasets"
}
} }
resource "google_dataplex_lake" "basic_lake" { resource "google_dataplex_lake" "lake" {
name = "${local.prefix}${var.name}" name = "${local.prefix}${var.name}"
location = var.region location = var.region
provider = google-beta provider = google-beta
project = var.project_id project = var.project_id
} }
resource "google_dataplex_zone" "basic_zone" { resource "google_dataplex_lake_iam_binding" "binding" {
for_each = var.iam
project = var.project_id
location = var.region
lake = google_dataplex_lake.lake.name
role = each.key
members = each.value
}
resource "google_dataplex_zone" "zone" {
for_each = var.zones for_each = var.zones
provider = google-beta
project = var.project_id
name = each.key name = each.key
location = var.region location = var.region
provider = google-beta lake = google_dataplex_lake.lake.name
lake = google_dataplex_lake.basic_lake.name
type = each.value.type type = each.value.type
discovery_spec { discovery_spec {
@ -52,11 +78,21 @@ resource "google_dataplex_zone" "basic_zone" {
resource_spec { resource_spec {
location_type = var.location_type location_type = var.location_type
} }
project = var.project_id
} }
resource "google_dataplex_asset" "primary" { resource "google_dataplex_zone_iam_binding" "binding" {
for_each = {
for zone_role in local.zone_iam : "${zone_role.zone}-${zone_role.role}" => zone_role
}
project = var.project_id
location = var.region
lake = google_dataplex_lake.lake.name
dataplex_zone = google_dataplex_zone.zone[each.value.zone].name
role = each.value.role
members = each.value.members
}
resource "google_dataplex_asset" "asset" {
for_each = { for_each = {
for tm in local.zone_assets : "${tm.zone_name}-${tm.asset_name}" => tm for tm in local.zone_assets : "${tm.zone_name}-${tm.asset_name}" => tm
} }
@ -64,8 +100,8 @@ resource "google_dataplex_asset" "primary" {
location = var.region location = var.region
provider = google-beta provider = google-beta
lake = google_dataplex_lake.basic_lake.name lake = google_dataplex_lake.lake.name
dataplex_zone = google_dataplex_zone.basic_zone[each.value.zone_name].name dataplex_zone = google_dataplex_zone.zone[each.value.zone_name].name
discovery_spec { discovery_spec {
enabled = each.value.discovery_spec_enabled enabled = each.value.discovery_spec_enabled
@ -73,7 +109,11 @@ resource "google_dataplex_asset" "primary" {
} }
resource_spec { resource_spec {
name = "projects/${var.project_id}/buckets/${each.value.bucket_name}" name = format("projects/%s/%s/%s",
each.value.resource_project,
local.resource_type_mapping[each.value.resource_spec_type],
each.value.resource_name
)
type = each.value.resource_spec_type type = each.value.resource_spec_type
} }
project = var.project_id project = var.project_id

View File

@ -16,21 +16,21 @@
output "assets" { output "assets" {
description = "Assets attached to the lake of Dataplex Lake." description = "Assets attached to the lake of Dataplex Lake."
value = local.zone_assets[*]["asset_name"] value = local.zone_assets[*]
} }
output "id" { output "id" {
description = "Fully qualified Dataplex Lake id." description = "Fully qualified Dataplex Lake id."
value = google_dataplex_lake.basic_lake.id value = google_dataplex_lake.lake.id
} }
output "lake" { output "lake" {
description = "The lake name of Dataplex Lake." description = "The lake name of Dataplex Lake."
value = google_dataplex_lake.basic_lake.name value = google_dataplex_lake.lake.name
} }
output "zones" { output "zones" {
description = "The zone name of Dataplex Lake." description = "The zone name of Dataplex Lake."
value = local.zone_assets[*]["zone_name"] value = distinct(local.zone_assets[*]["zone_name"])
} }

View File

@ -14,6 +14,13 @@
* limitations under the License. * limitations under the License.
*/ */
variable "iam" {
description = "Dataplex lake IAM bindings in {ROLE => [MEMBERS]} format."
type = map(list(string))
default = {}
nullable = false
}
variable "location_type" { variable "location_type" {
description = "The location type of the Dataplax Lake." description = "The location type of the Dataplax Lake."
type = string type = string
@ -28,6 +35,7 @@ variable "name" {
variable "prefix" { variable "prefix" {
description = "Optional prefix used to generate Dataplex Lake." description = "Optional prefix used to generate Dataplex Lake."
type = string type = string
default = null
} }
variable "project_id" { variable "project_id" {
@ -45,11 +53,21 @@ variable "zones" {
type = map(object({ type = map(object({
type = string type = string
discovery = optional(bool, true) discovery = optional(bool, true)
iam = optional(map(list(string)), null)
assets = map(object({ assets = map(object({
bucket_name = string resource_name = string
resource_project = optional(string)
cron_schedule = optional(string, "15 15 * * *") cron_schedule = optional(string, "15 15 * * *")
discovery_spec_enabled = optional(bool, true) discovery_spec_enabled = optional(bool, true)
resource_spec_type = optional(string, "STORAGE_BUCKET") resource_spec_type = optional(string, "STORAGE_BUCKET")
})) }))
})) }))
validation {
condition = alltrue(flatten([
for k, v in var.zones : [
for kk, vv in v.assets : contains(["BIGQUERY_DATASET", "STORAGE_BUCKET"], vv.resource_spec_type)
]
]))
error_message = "Asset spect type must be one of 'BIGQUERY_DATASET' or 'STORAGE_BUCKET'."
}
} }

View File

@ -209,6 +209,7 @@ This table lists all affected services and roles that you need to grant to servi
| artifactregistry.googleapis.com | artifactregistry | roles/artifactregistry.serviceAgent | | artifactregistry.googleapis.com | artifactregistry | roles/artifactregistry.serviceAgent |
| cloudasset.googleapis.com | cloudasset | roles/cloudasset.serviceAgent | | cloudasset.googleapis.com | cloudasset | roles/cloudasset.serviceAgent |
| cloudbuild.googleapis.com | cloudbuild | roles/cloudbuild.builds.builder | | cloudbuild.googleapis.com | cloudbuild | roles/cloudbuild.builds.builder |
| dataplex.googleapis.com | dataplex | roles/dataplex.serviceAgent |
| gkehub.googleapis.com | fleet | roles/gkehub.serviceAgent | | gkehub.googleapis.com | fleet | roles/gkehub.serviceAgent |
| meshconfig.googleapis.com | servicemesh | roles/anthosservicemesh.serviceAgent | | meshconfig.googleapis.com | servicemesh | roles/anthosservicemesh.serviceAgent |
| multiclusteringress.googleapis.com | multicluster-ingress | roles/multiclusteringress.serviceAgent | | multiclusteringress.googleapis.com | multicluster-ingress | roles/multiclusteringress.serviceAgent |

View File

@ -155,6 +155,7 @@
service_agent: "service-%s@gcp-sa-datapipelines.iam.gserviceaccount.com" service_agent: "service-%s@gcp-sa-datapipelines.iam.gserviceaccount.com"
- name: "dataplex" - name: "dataplex"
service_agent: "service-%s@gcp-sa-dataplex.iam.gserviceaccount.com" service_agent: "service-%s@gcp-sa-dataplex.iam.gserviceaccount.com"
jit: true # roles/dataplex.serviceAgent
- name: "dataproc" - name: "dataproc"
service_agent: "service-%s@dataproc-accounts.iam.gserviceaccount.com" service_agent: "service-%s@dataproc-accounts.iam.gserviceaccount.com"
- name: "datastream" - name: "datastream"