add dataplex datascan base module (#1545)

This commit is contained in:
Thinh Ha 2023-08-02 12:16:32 +01:00 committed by GitHub
parent 1b17786634
commit 43e73aba9a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 1686 additions and 1 deletions

View File

@ -32,7 +32,7 @@ Currently available modules:
- **foundational** - [billing budget](./modules/billing-budget), [Cloud Identity group](./modules/cloud-identity-group/), [folder](./modules/folder), [service accounts](./modules/iam-service-account), [logging bucket](./modules/logging-bucket), [organization](./modules/organization), [project](./modules/project), [projects-data-source](./modules/projects-data-source)
- **networking** - [DNS](./modules/dns), [DNS Response Policy](./modules/dns-response-policy/), [Cloud Endpoints](./modules/endpoints), [address reservation](./modules/net-address), [NAT](./modules/net-cloudnat), [VLAN Attachment](./modules/net-vlan-attachment/), [External Application LB](./modules/net-lb-app-ext/), [External Passthrough Network LB](./modules/net-lb-ext), [Internal Application LB](./modules/net-lb-app-int), [Internal Passthrough Network LB](./modules/net-lb-int), [Internal Proxy Network LB](./modules/net-lb-proxy-int), [IPSec over Interconnect](./modules/net-ipsec-over-interconnect), [VPC](./modules/net-vpc), [VPC firewall](./modules/net-vpc-firewall), [VPC firewall policy](./modules/net-vpc-firewall-policy), [VPC peering](./modules/net-vpc-peering), [VPN dynamic](./modules/net-vpn-dynamic), [HA VPN](./modules/net-vpn-ha), [VPN static](./modules/net-vpn-static), [Service Directory](./modules/service-directory), [Secure Web Proxy](./modules/net-swp)
- **compute** - [VM/VM group](./modules/compute-vm), [MIG](./modules/compute-mig), [COS container](./modules/cloud-config-container/cos-generic-metadata/) (coredns, mysql, onprem, squid), [GKE cluster](./modules/gke-cluster-standard), [GKE hub](./modules/gke-hub), [GKE nodepool](./modules/gke-nodepool)
- **data** - [AlloyDB instance](./modules/alloydb-instance), [BigQuery dataset](./modules/bigquery-dataset), [Bigtable instance](./modules/bigtable-instance), [Dataplex](./modules/dataplex), [Cloud SQL instance](./modules/cloudsql-instance), [Data Catalog Policy Tag](./modules/data-catalog-policy-tag), [Datafusion](./modules/datafusion), [Dataproc](./modules/dataproc), [GCS](./modules/gcs), [Pub/Sub](./modules/pubsub)
- **data** - [AlloyDB instance](./modules/alloydb-instance), [BigQuery dataset](./modules/bigquery-dataset), [Bigtable instance](./modules/bigtable-instance), [Dataplex](./modules/dataplex), [Dataplex DataScan](./modules/dataplex-datascan/), [Cloud SQL instance](./modules/cloudsql-instance), [Data Catalog Policy Tag](./modules/data-catalog-policy-tag), [Datafusion](./modules/datafusion), [Dataproc](./modules/dataproc), [GCS](./modules/gcs), [Pub/Sub](./modules/pubsub)
- **development** - [API Gateway](./modules/api-gateway), [Apigee](./modules/apigee), [Artifact Registry](./modules/artifact-registry), [Container Registry](./modules/container-registry), [Cloud Source Repository](./modules/source-repository)
- **security** - [Binauthz](./modules/binauthz/), [KMS](./modules/kms), [SecretManager](./modules/secret-manager), [VPC Service Control](./modules/vpc-sc)
- **serverless** - [Cloud Function v1](./modules/cloud-function-v1), [Cloud Function v2](./modules/cloud-function-v2), [Cloud Run](./modules/cloud-run)

View File

@ -77,6 +77,7 @@ These modules are used in the examples included in this repository. If you are u
- [BigQuery dataset](./bigquery-dataset)
- [Bigtable instance](./bigtable-instance)
- [Dataplex](./dataplex)
- [Dataplex DataScan](./dataplex-datascan/)
- [Cloud SQL instance](./cloudsql-instance)
- [Data Catalog Policy Tag](./data-catalog-policy-tag)
- [Datafusion](./datafusion)

View File

@ -0,0 +1,443 @@
# Dataplex DataScan
This module manages the creation of Dataplex DataScan resources.
## Data Profiling
This example shows how to create a Data Profiling scan. To create an Data Profiling scan, provide the `data_profile_spec` input arguments as documented in https://cloud.google.com/dataplex/docs/reference/rest/v1/DataProfileSpec.
```hcl
module "dataplex-datascan" {
source = "./fabric/modules/dataplex-datascan"
name = "datascan"
prefix = "test"
project_id = "my-project-name"
region = "us-central1"
labels = {
billing_id = "a"
}
data = {
resource = "//bigquery.googleapis.com/projects/bigquery-public-data/datasets/austin_bikeshare/tables/bikeshare_stations"
}
data_profile_spec = {
sampling_percent = 100
row_filter = "station_id > 1000"
}
incremental_field = "modified_date"
}
# tftest modules=1 resources=1 inventory=datascan_profiling.yaml
```
## Data Quality
To create an Data Quality scan, provide the `data_quality_spec` input arguments as documented in https://cloud.google.com/dataplex/docs/reference/rest/v1/DataQualitySpec.
Documentation for the supported rule types and rule specifications can be found in https://cloud.example.com/dataplex/docs/reference/rest/v1/DataQualityRule.
This example shows how to create a Data Quality scan.
```hcl
module "dataplex-datascan" {
source = "./fabric/modules/dataplex-datascan"
name = "datascan"
prefix = "test"
project_id = "my-project-name"
region = "us-central1"
labels = {
billing_id = "a"
}
execution_schedule = "TZ=America/New_York 0 1 * * *"
data = {
resource = "//bigquery.googleapis.com/projects/bigquery-public-data/datasets/austin_bikeshare/tables/bikeshare_stations"
}
incremental_field = "modified_date"
data_quality_spec = {
sampling_percent = 100
row_filter = "station_id > 1000"
rules = [
{
dimension = "VALIDITY"
non_null_expectation = {}
column = "address"
threshold = 0.99
},
{
column = "council_district"
dimension = "VALIDITY"
ignore_null = true
threshold = 0.9
range_expectation = {
min_value = 1
max_value = 10
strict_min_enabled = true
strict_max_enabled = false
}
},
{
column = "council_district"
dimension = "VALIDITY"
threshold = 0.8
range_expectation = {
min_value = 3
max_value = 9
}
},
{
column = "power_type"
dimension = "VALIDITY"
ignore_null = false
regex_expectation = {
regex = ".*solar.*"
}
},
{
column = "property_type"
dimension = "VALIDITY"
ignore_null = false
set_expectation = {
values = ["sidewalk", "parkland"]
}
},
{
column = "address"
dimension = "UNIQUENESS"
uniqueness_expectation = {}
},
{
column = "number_of_docks"
dimension = "VALIDITY"
statistic_range_expectation = {
statistic = "MEAN"
min_value = 5
max_value = 15
strict_min_enabled = true
strict_max_enabled = true
}
},
{
column = "footprint_length"
dimension = "VALIDITY"
row_condition_expectation = {
sql_expression = "footprint_length > 0 AND footprint_length <= 10"
}
},
{
dimension = "VALIDITY"
table_condition_expectation = {
sql_expression = "COUNT(*) > 0"
}
}
]
}
}
# tftest modules=1 resources=1 inventory=datascan_dq.yaml
```
This example shows how you can pass the rules configurations as a separate YAML file into the module. This should produce the same DataScan configuration as the previous example.
```hcl
module "dataplex-datascan" {
source = "./fabric/modules/dataplex-datascan"
name = "datascan"
prefix = "test"
project_id = "my-project-name"
region = "us-central1"
labels = {
billing_id = "a"
}
execution_schedule = "TZ=America/New_York 0 1 * * *"
data = {
resource = "//bigquery.googleapis.com/projects/bigquery-public-data/datasets/austin_bikeshare/tables/bikeshare_stations"
}
incremental_field = "modified_date"
data_quality_spec_file = {
path = "config/data_quality_spec.yaml"
}
}
# tftest modules=1 resources=1 files=data_quality_spec inventory=datascan_dq.yaml
```
The content of the `config/data_quality_spec.yaml` files is as follows:
```yaml
# tftest-file id=data_quality_spec path=config/data_quality_spec.yaml
sampling_percent: 100
row_filter: "station_id > 1000"
rules:
- column: address
dimension: VALIDITY
ignore_null: null
non_null_expectation: {}
threshold: 0.99
- column: council_district
dimension: VALIDITY
ignore_null: true
threshold: 0.9
range_expectation:
max_value: '10'
min_value: '1'
strict_max_enabled: false
strict_min_enabled: true
- column: council_district
dimension: VALIDITY
range_expectation:
max_value: '9'
min_value: '3'
threshold: 0.8
- column: power_type
dimension: VALIDITY
ignore_null: false
regex_expectation:
regex: .*solar.*
- column: property_type
dimension: VALIDITY
ignore_null: false
set_expectation:
values:
- sidewalk
- parkland
- column: address
dimension: UNIQUENESS
uniqueness_expectation: {}
- column: number_of_docks
dimension: VALIDITY
statistic_range_expectation:
max_value: '15'
min_value: '5'
statistic: MEAN
strict_max_enabled: true
strict_min_enabled: true
- column: footprint_length
dimension: VALIDITY
row_condition_expectation:
sql_expression: footprint_length > 0 AND footprint_length <= 10
- dimension: VALIDITY
table_condition_expectation:
sql_expression: COUNT(*) > 0
```
While the module only accepts input in snake_case, the YAML file provided to the `data_quality_spec_file` variable can use either camelCase or snake_case. This example below should also produce the same DataScan configuration as the previous examples.
```hcl
module "dataplex-datascan" {
source = "./fabric/modules/dataplex-datascan"
name = "datascan"
prefix = "test"
project_id = "my-project-name"
region = "us-central1"
labels = {
billing_id = "a"
}
execution_schedule = "TZ=America/New_York 0 1 * * *"
data = {
resource = "//bigquery.googleapis.com/projects/bigquery-public-data/datasets/austin_bikeshare/tables/bikeshare_stations"
}
incremental_field = "modified_date"
data_quality_spec_file = {
path = "config/data_quality_spec_camel_case.yaml"
}
}
# tftest modules=1 resources=1 files=data_quality_spec_camel_case inventory=datascan_dq.yaml
```
The content of the `config/data_quality_spec_camel_case.yaml` files is as follows:
```yaml
# tftest-file id=data_quality_spec_camel_case path=config/data_quality_spec_camel_case.yaml
samplingPercent: 100
rowFilter: "station_id > 1000"
rules:
- column: address
dimension: VALIDITY
ignoreNull: null
nonNullExpectation: {}
threshold: 0.99
- column: council_district
dimension: VALIDITY
ignoreNull: true
threshold: 0.9
rangeExpectation:
maxValue: '10'
minValue: '1'
strictMaxEnabled: false
strictMinEnabled: true
- column: council_district
dimension: VALIDITY
rangeExpectation:
maxValue: '9'
minValue: '3'
threshold: 0.8
- column: power_type
dimension: VALIDITY
ignoreNull: false
regexExpectation:
regex: .*solar.*
- column: property_type
dimension: VALIDITY
ignoreNull: false
setExpectation:
values:
- sidewalk
- parkland
- column: address
dimension: UNIQUENESS
uniquenessExpectation: {}
- column: number_of_docks
dimension: VALIDITY
statisticRangeExpectation:
maxValue: '15'
minValue: '5'
statistic: MEAN
strictMaxEnabled: true
strictMinEnabled: true
- column: footprint_length
dimension: VALIDITY
rowConditionExpectation:
sqlExpression: footprint_length > 0 AND footprint_length <= 10
- dimension: VALIDITY
tableConditionExpectation:
sqlExpression: COUNT(*) > 0
```
## Data Source
The input variable 'data' is required to create a DataScan. This value is immutable. Once it is set, you cannot change the DataScan to another source.
The input variable 'data' should be an object containing a single key-value pair that can be one of:
* `entity`: The Dataplex entity that represents the data source (e.g. BigQuery table) for DataScan, of the form: `projects/{project_number}/locations/{locationId}/lakes/{lakeId}/zones/{zoneId}/entities/{entityId}`.
* `resource`: The service-qualified full resource name of the cloud resource for a DataScan job to scan against. The field could be: BigQuery table of type "TABLE" for DataProfileScan/DataQualityScan format, e.g: `//bigquery.googleapis.com/projects/PROJECT_ID/datasets/DATASET_ID/tables/TABLE_ID`.
The example below shows how to specify the data source for DataScan of type `resource`:
```hcl
module "dataplex-datascan" {
source = "./fabric/modules/dataplex-datascan"
name = "datascan"
prefix = "test"
project_id = "my-project-name"
region = "us-central1"
data = {
resource = "//bigquery.googleapis.com/projects/bigquery-public-data/datasets/austin_bikeshare/tables/bikeshare_stations"
}
data_profile_spec = {}
}
# tftest modules=1 resources=1
```
The example below shows how to specify the data source for DataScan of type `entity`:
```hcl
module "dataplex-datascan" {
source = "./fabric/modules/dataplex-datascan"
name = "datascan"
prefix = "test"
project_id = "my-project-name"
region = "us-central1"
data = {
entity = "projects/<project_number>/locations/<locationId>/lakes/<lakeId>/zones/<zoneId>/entities/<entityId>"
}
data_profile_spec = {}
}
# tftest modules=1 resources=1 inventory=datascan_entity.yaml
```
## Execution Schedule
The input variable 'execution_schedule' specifies when a scan should be triggered, based on a cron schedule expression.
If not specified, the default is `on_demand`, which means the scan will not run until the user calls `dataScans.run` API.
The following example shows how to schedule the DataScan at 1AM everyday using 'America/New_York' timezone.
```hcl
module "dataplex-datascan" {
source = "./fabric/modules/dataplex-datascan"
name = "datascan"
prefix = "test"
project_id = "my-project-name"
region = "us-central1"
execution_schedule = "TZ=America/New_York 0 1 * * *"
data = {
resource = "//bigquery.googleapis.com/projects/bigquery-public-data/datasets/austin_bikeshare/tables/bikeshare_stations"
}
data_profile_spec = {}
}
# tftest modules=1 resources=1 inventory=datascan_cron.yaml
```
## IAM
There are three mutually exclusive ways of managing IAM in this module
- non-authoritative via the `iam_additive` and `iam_additive_members` variables, where bindings created outside this module will coexist with those managed here
- authoritative via the `group_iam` and `iam` variables, where bindings created outside this module (eg in the console) will be removed at each `terraform apply` cycle if the same role is also managed here
- authoritative policy via the `iam_policy` variable, where any binding created outside this module (eg in the console) will be removed at each `terraform apply` cycle regardless of the role
The authoritative and additive approaches can be used together, provided different roles are managed by each. The IAM policy is incompatible with the other approaches, and must be used with extreme care.
Some care must also be taken with the `group_iam` variable (and in some situations with the additive variables) to ensure that variable keys are static values, so that Terraform is able to compute the dependency graph.
An example is provided beow for using `group_iam` and `iam` variables.
```hcl
module "dataplex-datascan" {
source = "./fabric/modules/dataplex-datascan"
name = "datascan"
prefix = "test"
project_id = "my-project-name"
region = "us-central1"
data = {
resource = "//bigquery.googleapis.com/projects/bigquery-public-data/datasets/austin_bikeshare/tables/bikeshare_stations"
}
data_profile_spec = {}
iam = {
"roles/dataplex.dataScanAdmin" = [
"serviceAccount:svc-1@project-id.iam.gserviceaccount.com"
],
"roles/dataplex.dataScanEditor" = [
"user:admin-user@example.com"
]
}
group_iam = {
"user-group@example.com" = [
"roles/dataplex.dataScanViewer"
]
}
}
# tftest modules=1 resources=4 inventory=datascan_iam.yaml
```
## TODO
<!-- BEGIN TFDOC -->
## Variables
| name | description | type | required | default |
|---|---|:---:|:---:|:---:|
| [data](variables.tf#L17) | The data source for DataScan. The source can be either a Dataplex `entity` or a BigQuery `resource`. | <code title="object&#40;&#123;&#10; entity &#61; optional&#40;string&#41;&#10; resource &#61; optional&#40;string&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | ✓ | |
| [name](variables.tf#L146) | Name of Dataplex Scan. | <code>string</code> | ✓ | |
| [project_id](variables.tf#L157) | The ID of the project where the Dataplex DataScan will be created. | <code>string</code> | ✓ | |
| [region](variables.tf#L162) | Region for the Dataplex DataScan. | <code>string</code> | ✓ | |
| [data_profile_spec](variables.tf#L29) | DataProfileScan related setting. Variable descriptions are provided in https://cloud.google.com/dataplex/docs/reference/rest/v1/DataProfileSpec. | <code title="object&#40;&#123;&#10; sampling_percent &#61; optional&#40;number&#41;&#10; row_filter &#61; optional&#40;string&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | | <code>null</code> |
| [data_quality_spec](variables.tf#L38) | DataQualityScan related setting. Variable descriptions are provided in https://cloud.google.com/dataplex/docs/reference/rest/v1/DataQualitySpec. | <code title="object&#40;&#123;&#10; sampling_percent &#61; optional&#40;number&#41;&#10; row_filter &#61; optional&#40;string&#41;&#10; rules &#61; list&#40;object&#40;&#123;&#10; column &#61; optional&#40;string&#41;&#10; ignore_null &#61; optional&#40;bool, null&#41;&#10; dimension &#61; string&#10; threshold &#61; optional&#40;number&#41;&#10; non_null_expectation &#61; optional&#40;object&#40;&#123;&#125;&#41;&#41;&#10; range_expectation &#61; optional&#40;object&#40;&#123;&#10; min_value &#61; optional&#40;number&#41;&#10; max_value &#61; optional&#40;number&#41;&#10; strict_min_enabled &#61; optional&#40;bool&#41;&#10; strict_max_enabled &#61; optional&#40;bool&#41;&#10; &#125;&#41;&#41;&#10; regex_expectation &#61; optional&#40;object&#40;&#123;&#10; regex &#61; string&#10; &#125;&#41;&#41;&#10; set_expectation &#61; optional&#40;object&#40;&#123;&#10; values &#61; list&#40;string&#41;&#10; &#125;&#41;&#41;&#10; uniqueness_expectation &#61; optional&#40;object&#40;&#123;&#125;&#41;&#41;&#10; statistic_range_expectation &#61; optional&#40;object&#40;&#123;&#10; statistic &#61; string&#10; min_value &#61; optional&#40;number&#41;&#10; max_value &#61; optional&#40;number&#41;&#10; strict_min_enabled &#61; optional&#40;bool&#41;&#10; strict_max_enabled &#61; optional&#40;bool&#41;&#10; &#125;&#41;&#41;&#10; row_condition_expectation &#61; optional&#40;object&#40;&#123;&#10; sql_expression &#61; string&#10; &#125;&#41;&#41;&#10; table_condition_expectation &#61; optional&#40;object&#40;&#123;&#10; sql_expression &#61; string&#10; &#125;&#41;&#41;&#10; &#125;&#41;&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | | <code>null</code> |
| [data_quality_spec_file](variables.tf#L80) | Path to a YAML file containing DataQualityScan related setting. Input content can use either camelCase or snake_case. Variables description are provided in https://cloud.google.com/dataplex/docs/reference/rest/v1/DataQualitySpec. | <code title="object&#40;&#123;&#10; path &#61; string&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | | <code>null</code> |
| [description](variables.tf#L88) | Custom description for DataScan. | <code>string</code> | | <code>null</code> |
| [execution_schedule](variables.tf#L94) | Schedule DataScan to run periodically based on a cron schedule expression. If not specified, the DataScan is created with `on_demand` schedule, which means it will not run until the user calls `dataScans.run` API. | <code>string</code> | | <code>null</code> |
| [group_iam](variables.tf#L100) | Authoritative IAM binding for organization groups, in {GROUP_EMAIL => [ROLES]} format. Group emails need to be static. Can be used in combination with the `iam` variable. | <code>map&#40;list&#40;string&#41;&#41;</code> | | <code>&#123;&#125;</code> |
| [iam](variables.tf#L107) | Dataplex DataScan IAM bindings in {ROLE => [MEMBERS]} format. | <code>map&#40;list&#40;string&#41;&#41;</code> | | <code>&#123;&#125;</code> |
| [iam_additive](variables.tf#L114) | IAM additive bindings in {ROLE => [MEMBERS]} format. | <code>map&#40;list&#40;string&#41;&#41;</code> | | <code>&#123;&#125;</code> |
| [iam_additive_members](variables.tf#L121) | IAM additive bindings in {MEMBERS => [ROLE]} format. This might break if members are dynamic values. | <code>map&#40;list&#40;string&#41;&#41;</code> | | <code>&#123;&#125;</code> |
| [iam_policy](variables.tf#L127) | IAM authoritative policy in {ROLE => [MEMBERS]} format. Roles and members not explicitly listed will be cleared, use with extreme caution. | <code>map&#40;list&#40;string&#41;&#41;</code> | | <code>null</code> |
| [incremental_field](variables.tf#L133) | The unnested field (of type Date or Timestamp) that contains values which monotonically increase over time. If not specified, a data scan will run for all data in the table. | <code>string</code> | | <code>null</code> |
| [labels](variables.tf#L139) | Resource labels. | <code>map&#40;string&#41;</code> | | <code>&#123;&#125;</code> |
| [prefix](variables.tf#L151) | Optional prefix used to generate Dataplex DataScan ID. | <code>string</code> | | <code>null</code> |
## Outputs
| name | description | sensitive |
|---|---|:---:|
| [data_scan_id](outputs.tf#L17) | Dataplex DataScan ID. | |
| [id](outputs.tf#L22) | A fully qualified Dataplex DataScan identifier for the resource with format projects/{{project}}/locations/{{location}}/dataScans/{{data_scan_id}}. | |
| [name](outputs.tf#L27) | The relative resource name of the scan, of the form: projects/{project}/locations/{locationId}/dataScans/{datascan_id}, where project refers to a project_id or project_number and locationId refers to a GCP region. | |
| [type](outputs.tf#L32) | The type of DataScan. | |
<!-- END TFDOC -->

View File

@ -0,0 +1,89 @@
/**
* Copyright 2023 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
locals {
_group_iam_roles = distinct(flatten(values(var.group_iam)))
_group_iam = {
for r in local._group_iam_roles : r => [
for k, v in var.group_iam : "group:${k}" if try(index(v, r), null) != null
]
}
_iam_additive_pairs = flatten([
for role, members in var.iam_additive : [
for member in members : { role = role, member = member }
]
])
_iam_additive_member_pairs = flatten([
for member, roles in var.iam_additive_members : [
for role in roles : { role = role, member = member }
]
])
iam = {
for role in distinct(concat(keys(var.iam), keys(local._group_iam))) :
role => concat(
try(var.iam[role], []),
try(local._group_iam[role], [])
)
}
iam_additive = {
for pair in concat(local._iam_additive_pairs, local._iam_additive_member_pairs) :
"${pair.role}-${pair.member}" => {
role = pair.role
member = pair.member
}
}
}
resource "google_dataplex_datascan_iam_binding" "authoritative_for_role" {
for_each = local.iam
project = google_dataplex_datascan.datascan.project
location = google_dataplex_datascan.datascan.location
data_scan_id = google_dataplex_datascan.datascan.data_scan_id
role = each.key
members = each.value
}
resource "google_dataplex_datascan_iam_member" "additive" {
for_each = (
length(var.iam_additive) + length(var.iam_additive_members) > 0
? local.iam_additive
: {}
)
project = google_dataplex_datascan.datascan.project
location = google_dataplex_datascan.datascan.location
data_scan_id = google_dataplex_datascan.datascan.data_scan_id
role = each.value.role
member = each.value.member
}
resource "google_dataplex_datascan_iam_policy" "authoritative_for_resource" {
count = var.iam_policy != null ? 1 : 0
project = google_dataplex_datascan.datascan.project
location = google_dataplex_datascan.datascan.location
data_scan_id = google_dataplex_datascan.datascan.data_scan_id
policy_data = data.google_iam_policy.authoritative.0.policy_data
}
data "google_iam_policy" "authoritative" {
count = var.iam_policy != null ? 1 : 0
dynamic "binding" {
for_each = try(var.iam_policy, {})
content {
role = binding.key
members = binding.value
}
}
}

View File

@ -0,0 +1,178 @@
/**
* Copyright 2023 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
locals {
prefix = var.prefix == null || var.prefix == "" ? "" : "${var.prefix}-"
_file_data_quality_spec = var.data_quality_spec_file == null ? null : {
sampling_percent = try(local._file_data_quality_spec_raw.samplingPercent, local._file_data_quality_spec_raw.sampling_percent, null)
row_filter = try(local._file_data_quality_spec_raw.rowFilter, local._file_data_quality_spec_raw.row_filter, null)
rules = local._parsed_rules
}
data_quality_spec = (
var.data_quality_spec != null || var.data_quality_spec_file != null ?
merge(var.data_quality_spec, local._file_data_quality_spec) :
null
)
}
resource "google_dataplex_datascan" "datascan" {
project = var.project_id
location = var.region
data_scan_id = "${local.prefix}${var.name}"
display_name = "${local.prefix}${var.name}"
description = var.description == null ? "Terraform Managed." : "Terraform Managed. ${var.description}"
labels = var.labels
data {
resource = var.data.resource
entity = var.data.entity
}
execution_spec {
field = var.incremental_field
trigger {
dynamic "on_demand" {
for_each = var.execution_schedule == null ? [""] : []
content {
}
}
dynamic "schedule" {
for_each = var.execution_schedule != null ? [""] : []
content {
cron = var.execution_schedule
}
}
}
}
dynamic "data_profile_spec" {
for_each = var.data_profile_spec != null ? [""] : []
content {
sampling_percent = try(var.data_profile_spec.sampling_percent, null)
row_filter = try(var.data_profile_spec.row_filter, null)
}
}
dynamic "data_quality_spec" {
for_each = local.data_quality_spec != null ? [""] : []
content {
sampling_percent = try(local.data_quality_spec.sampling_percent, null)
row_filter = try(local.data_quality_spec.row_filter, null)
dynamic "rules" {
for_each = local.data_quality_spec.rules
content {
column = try(rules.value.column, null)
ignore_null = try(rules.value.ignore_null, null)
dimension = rules.value.dimension
threshold = try(rules.value.threshold, null)
dynamic "non_null_expectation" {
for_each = try(rules.value.non_null_expectation, null) != null ? [""] : []
content {
}
}
dynamic "range_expectation" {
for_each = try(rules.value.range_expectation, null) != null ? [""] : []
content {
min_value = try(rules.value.range_expectation.min_value, null)
max_value = try(rules.value.range_expectation.max_value, null)
strict_min_enabled = try(rules.value.range_expectation.strict_min_enabled, null)
strict_max_enabled = try(rules.value.range_expectation.strict_max_enabled, null)
}
}
dynamic "set_expectation" {
for_each = try(rules.value.set_expectation, null) != null ? [""] : []
content {
values = rules.value.set_expectation.values
}
}
dynamic "uniqueness_expectation" {
for_each = try(rules.value.uniqueness_expectation, null) != null ? [""] : []
content {
}
}
dynamic "regex_expectation" {
for_each = try(rules.value.regex_expectation, null) != null ? [""] : []
content {
regex = rules.value.regex_expectation.regex
}
}
dynamic "statistic_range_expectation" {
for_each = try(rules.value.statistic_range_expectation, null) != null ? [""] : []
content {
min_value = try(rules.value.statistic_range_expectation.min_value, null)
max_value = try(rules.value.statistic_range_expectation.max_value, null)
strict_min_enabled = try(rules.value.statistic_range_expectation.strict_min_enabled, null)
strict_max_enabled = try(rules.value.statistic_range_expectation.strict_max_enabled, null)
statistic = rules.value.statistic_range_expectation.statistic
}
}
dynamic "row_condition_expectation" {
for_each = try(rules.value.row_condition_expectation, null) != null ? [""] : []
content {
sql_expression = rules.value.row_condition_expectation.sql_expression
}
}
dynamic "table_condition_expectation" {
for_each = try(rules.value.table_condition_expectation, null) != null ? [""] : []
content {
sql_expression = rules.value.table_condition_expectation.sql_expression
}
}
}
}
}
}
lifecycle {
precondition {
condition = length([for spec in [var.data_profile_spec, var.data_quality_spec, var.data_quality_spec_file] : spec if spec != null]) == 1
error_message = "DataScan can only contain one of 'data_profile_spec', 'data_quality_spec', 'data_quality_spec_file'."
}
precondition {
condition = alltrue([
for rule in try(local.data_quality_spec.rules, []) :
contains(["COMPLETENESS", "ACCURACY", "CONSISTENCY", "VALIDITY", "UNIQUENESS", "INTEGRITY"], rule.dimension)])
error_message = "Datascan 'dimension' field in 'data_quality_spec' must be one of ['COMPLETENESS', 'ACCURACY', 'CONSISTENCY', 'VALIDITY', 'UNIQUENESS', 'INTEGRITY']."
}
precondition {
condition = alltrue([
for rule in try(local.data_quality_spec.rules, []) :
length([
for k, v in rule :
v if contains([
"non_null_expectation",
"range_expectation",
"regex_expectation",
"set_expectation",
"uniqueness_expectation",
"statistic_range_expectation",
"row_condition_expectation",
"table_condition_expectation"
], k) && v != null
]) == 1])
error_message = "Datascan rule must contain a key that is one of ['non_null_expectation', 'range_expectation', 'regex_expectation', 'set_expectation', 'uniqueness_expectation', 'statistic_range_expectation', 'row_condition_expectation', 'table_condition_expectation]."
}
}
}

View File

@ -0,0 +1,35 @@
/**
* Copyright 2023 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
output "data_scan_id" {
description = "Dataplex DataScan ID."
value = google_dataplex_datascan.datascan.data_scan_id
}
output "id" {
description = "A fully qualified Dataplex DataScan identifier for the resource with format projects/{{project}}/locations/{{location}}/dataScans/{{data_scan_id}}."
value = google_dataplex_datascan.datascan.id
}
output "name" {
description = "The relative resource name of the scan, of the form: projects/{project}/locations/{locationId}/dataScans/{datascan_id}, where project refers to a project_id or project_number and locationId refers to a GCP region."
value = google_dataplex_datascan.datascan.name
}
output "type" {
description = "The type of DataScan."
value = google_dataplex_datascan.datascan.type
}

View File

@ -0,0 +1,54 @@
/**
* Copyright 2023 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
locals {
_file_data_quality_spec_raw = var.data_quality_spec_file != null ? yamldecode(file(var.data_quality_spec_file.path)) : tomap({})
_parsed_rules = [
for rule in try(local._file_data_quality_spec_raw.rules, []) : {
column = try(rule.column, null)
ignore_null = try(rule.ignoreNull, rule.ignore_null, null)
dimension = rule.dimension
threshold = try(rule.threshold, null)
non_null_expectation = try(rule.nonNullExpectation, rule.non_null_expectation, null)
range_expectation = can(rule.rangeExpectation) || can(rule.range_expectation) ? {
min_value = try(rule.rangeExpectation.minValue, rule.range_expectation.min_value, null)
max_value = try(rule.rangeExpectation.maxValue, rule.range_expectation.max_value, null)
strict_min_enabled = try(rule.rangeExpectation.strictMinEnabled, rule.range_expectation.strict_min_enabled, null)
strict_max_enabled = try(rule.rangeExpectation.strictMaxEnabled, rule.range_expectation.strict_max_enabled, null)
} : null
regex_expectation = can(rule.regexExpectation) || can(rule.regex_expectation) ? {
regex = try(rule.regexExpectation.regex, rule.regex_expectation.regex, null)
} : null
set_expectation = can(rule.setExpectation) || can(rule.set_expectation) ? {
values = try(rule.setExpectation.values, rule.set_expectation.values, null)
} : null
uniqueness_expectation = try(rule.uniquenessExpectation, rule.uniqueness_expectation, null)
statistic_range_expectation = can(rule.statisticRangeExpectation) || can(rule.statistic_range_expectation) ? {
statistic = try(rule.statisticRangeExpectation.statistic, rule.statistic_range_expectation.statistic)
min_value = try(rule.statisticRangeExpectation.minValue, rule.statistic_range_expectation.min_value, null)
max_value = try(rule.statisticRangeExpectation.maxValue, rule.statistic_range_expectation.max_value, null)
strict_min_enabled = try(rule.statisticRangeExpectation.strictMinEnabled, rule.statistic_range_expectation.strict_min_enabled, null)
strict_max_enabled = try(rule.statisticRangeExpectation.strictMaxEnabled, rule.statistic_range_expectation.strict_max_enabled, null)
} : null
row_condition_expectation = can(rule.rowConditionExpectation) || can(rule.row_condition_expectation) ? {
sql_expression = try(rule.rowConditionExpectation.sqlExpression, rule.row_condition_expectation.sql_expression, null)
} : null
table_condition_expectation = can(rule.tableConditionExpectation) || can(rule.table_condition_expectation) ? {
sql_expression = try(rule.tableConditionExpectation.sqlExpression, rule.table_condition_expectation.sql_expression, null)
} : null
}
]
}

View File

@ -0,0 +1,165 @@
/**
* Copyright 2023 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
variable "data" {
description = "The data source for DataScan. The source can be either a Dataplex `entity` or a BigQuery `resource`."
type = object({
entity = optional(string)
resource = optional(string)
})
validation {
condition = length([for k, v in var.data : v if contains(["resource", "entity"], k) && v != null]) == 1
error_message = "Datascan data must specify one of 'entity', 'resource'."
}
}
variable "data_profile_spec" {
description = "DataProfileScan related setting. Variable descriptions are provided in https://cloud.google.com/dataplex/docs/reference/rest/v1/DataProfileSpec."
default = null
type = object({
sampling_percent = optional(number)
row_filter = optional(string)
})
}
variable "data_quality_spec" {
description = "DataQualityScan related setting. Variable descriptions are provided in https://cloud.google.com/dataplex/docs/reference/rest/v1/DataQualitySpec."
default = null
type = object({
sampling_percent = optional(number)
row_filter = optional(string)
rules = list(object({
column = optional(string)
ignore_null = optional(bool, null)
dimension = string
threshold = optional(number)
non_null_expectation = optional(object({}))
range_expectation = optional(object({
min_value = optional(number)
max_value = optional(number)
strict_min_enabled = optional(bool)
strict_max_enabled = optional(bool)
}))
regex_expectation = optional(object({
regex = string
}))
set_expectation = optional(object({
values = list(string)
}))
uniqueness_expectation = optional(object({}))
statistic_range_expectation = optional(object({
statistic = string
min_value = optional(number)
max_value = optional(number)
strict_min_enabled = optional(bool)
strict_max_enabled = optional(bool)
}))
row_condition_expectation = optional(object({
sql_expression = string
}))
table_condition_expectation = optional(object({
sql_expression = string
}))
}))
})
}
variable "data_quality_spec_file" {
description = "Path to a YAML file containing DataQualityScan related setting. Input content can use either camelCase or snake_case. Variables description are provided in https://cloud.google.com/dataplex/docs/reference/rest/v1/DataQualitySpec."
default = null
type = object({
path = string
})
}
variable "description" {
description = "Custom description for DataScan."
default = null
type = string
}
variable "execution_schedule" {
description = "Schedule DataScan to run periodically based on a cron schedule expression. If not specified, the DataScan is created with `on_demand` schedule, which means it will not run until the user calls `dataScans.run` API."
type = string
default = null
}
variable "group_iam" {
description = "Authoritative IAM binding for organization groups, in {GROUP_EMAIL => [ROLES]} format. Group emails need to be static. Can be used in combination with the `iam` variable."
type = map(list(string))
default = {}
nullable = false
}
variable "iam" {
description = "Dataplex DataScan IAM bindings in {ROLE => [MEMBERS]} format."
type = map(list(string))
default = {}
nullable = false
}
variable "iam_additive" {
description = "IAM additive bindings in {ROLE => [MEMBERS]} format."
type = map(list(string))
default = {}
nullable = false
}
variable "iam_additive_members" {
description = "IAM additive bindings in {MEMBERS => [ROLE]} format. This might break if members are dynamic values."
type = map(list(string))
default = {}
}
variable "iam_policy" {
description = "IAM authoritative policy in {ROLE => [MEMBERS]} format. Roles and members not explicitly listed will be cleared, use with extreme caution."
type = map(list(string))
default = null
}
variable "incremental_field" {
description = "The unnested field (of type Date or Timestamp) that contains values which monotonically increase over time. If not specified, a data scan will run for all data in the table."
type = string
default = null
}
variable "labels" {
description = "Resource labels."
type = map(string)
default = {}
nullable = false
}
variable "name" {
description = "Name of Dataplex Scan."
type = string
}
variable "prefix" {
description = "Optional prefix used to generate Dataplex DataScan ID."
type = string
default = null
}
variable "project_id" {
description = "The ID of the project where the Dataplex DataScan will be created."
type = string
}
variable "region" {
description = "Region for the Dataplex DataScan."
type = string
}

View File

@ -0,0 +1,27 @@
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
terraform {
required_version = ">= 1.4.4"
required_providers {
google = {
source = "hashicorp/google"
version = ">= 4.71.0" # tftest
}
google-beta = {
source = "hashicorp/google-beta"
version = ">= 4.71.0" # tftest
}
}
}

View File

@ -0,0 +1,116 @@
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name = "datascan"
prefix = "test"
project_id = "test-project"
region = "us-central1"
labels = {
billing_id = "a"
}
iam = {
"roles/dataplex.dataScanViewer" = [
"user:user@example.com"
],
"roles/dataplex.dataScanEditor" = [
"user:user@example.com",
]
}
group_iam = {
"user-group@example.com" = [
"roles/dataplex.dataScanEditor"
]
}
execution_schedule = "TZ=America/New_York 1 1 * * *"
data = {
resource = "//bigquery.googleapis.com/projects/bigquery-public-data/datasets/austin_bikeshare/tables/bikeshare_stations"
}
description = "Custom description."
incremental_field = "modified_date"
data_quality_spec = {
sampling_percent = 100
row_filter = "station_id > 1000"
rules = [
{
dimension = "VALIDITY"
non_null_expectation = {}
column = "address"
threshold = 0.99
},
{
column = "council_district"
dimension = "VALIDITY"
ignore_null = true
threshold = 0.9
range_expectation = {
min_value = 1
max_value = 10
strict_min_enabled = true
strict_max_enabled = false
}
},
{
column = "council_district"
dimension = "VALIDITY"
threshold = 0.8
range_expectation = {
min_value = 3
}
},
{
column = "power_type"
dimension = "VALIDITY"
ignore_null = false
regex_expectation = {
regex = ".*solar.*"
}
},
{
column = "property_type"
dimension = "VALIDITY"
ignore_null = false
set_expectation = {
values = ["sidewalk", "parkland"]
}
},
{
column = "address"
dimension = "UNIQUENESS"
uniqueness_expectation = {}
},
{
column = "number_of_docks"
dimension = "VALIDITY"
statistic_range_expectation = {
statistic = "MEAN"
min_value = 5
max_value = 15
strict_min_enabled = true
strict_max_enabled = true
}
},
{
column = "footprint_length"
dimension = "VALIDITY"
row_condition_expectation = {
sql_expression = "footprint_length > 0 AND footprint_length <= 10"
}
},
{
dimension = "VALIDITY"
table_condition_expectation = {
sql_expression = "COUNT(*) > 0"
}
}
]
}

View File

@ -0,0 +1,195 @@
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
values:
google_dataplex_datascan.datascan:
data:
- entity: null
resource: //bigquery.googleapis.com/projects/bigquery-public-data/datasets/austin_bikeshare/tables/bikeshare_stations
data_profile_spec: []
data_quality_spec:
- row_filter: station_id > 1000
rules:
- column: address
dimension: VALIDITY
ignore_null: null
non_null_expectation:
- {}
range_expectation: []
regex_expectation: []
row_condition_expectation: []
set_expectation: []
statistic_range_expectation: []
table_condition_expectation: []
threshold: 0.99
uniqueness_expectation: []
- column: council_district
dimension: VALIDITY
ignore_null: true
non_null_expectation: []
range_expectation:
- max_value: '10'
min_value: '1'
strict_max_enabled: false
strict_min_enabled: true
regex_expectation: []
row_condition_expectation: []
set_expectation: []
statistic_range_expectation: []
table_condition_expectation: []
threshold: 0.9
uniqueness_expectation: []
- column: council_district
dimension: VALIDITY
ignore_null: null
non_null_expectation: []
range_expectation:
- max_value: null
min_value: '3'
strict_max_enabled: false
strict_min_enabled: false
regex_expectation: []
row_condition_expectation: []
set_expectation: []
statistic_range_expectation: []
table_condition_expectation: []
threshold: 0.8
uniqueness_expectation: []
- column: power_type
dimension: VALIDITY
ignore_null: false
non_null_expectation: []
range_expectation: []
regex_expectation:
- regex: .*solar.*
row_condition_expectation: []
set_expectation: []
statistic_range_expectation: []
table_condition_expectation: []
threshold: null
uniqueness_expectation: []
- column: property_type
dimension: VALIDITY
ignore_null: false
non_null_expectation: []
range_expectation: []
regex_expectation: []
row_condition_expectation: []
set_expectation:
- values:
- sidewalk
- parkland
statistic_range_expectation: []
table_condition_expectation: []
threshold: null
uniqueness_expectation: []
- column: address
dimension: UNIQUENESS
ignore_null: null
non_null_expectation: []
range_expectation: []
regex_expectation: []
row_condition_expectation: []
set_expectation: []
statistic_range_expectation: []
table_condition_expectation: []
threshold: null
uniqueness_expectation:
- {}
- column: number_of_docks
dimension: VALIDITY
ignore_null: null
non_null_expectation: []
range_expectation: []
regex_expectation: []
row_condition_expectation: []
set_expectation: []
statistic_range_expectation:
- max_value: '15'
min_value: '5'
statistic: MEAN
strict_max_enabled: true
strict_min_enabled: true
table_condition_expectation: []
threshold: null
uniqueness_expectation: []
- column: footprint_length
dimension: VALIDITY
ignore_null: null
non_null_expectation: []
range_expectation: []
regex_expectation: []
row_condition_expectation:
- sql_expression: footprint_length > 0 AND footprint_length <= 10
set_expectation: []
statistic_range_expectation: []
table_condition_expectation: []
threshold: null
uniqueness_expectation: []
- column: null
dimension: VALIDITY
ignore_null: null
non_null_expectation: []
range_expectation: []
regex_expectation: []
row_condition_expectation: []
set_expectation: []
statistic_range_expectation: []
table_condition_expectation:
- sql_expression: COUNT(*) > 0
threshold: null
uniqueness_expectation: []
sampling_percent: 100
data_scan_id: test-datascan
description: Terraform Managed. Custom description.
display_name: test-datascan
execution_spec:
- field: modified_date
trigger:
- on_demand: []
schedule:
- cron: TZ=America/New_York 1 1 * * *
labels:
billing_id: a
location: us-central1
project: test-project
timeouts: null
google_dataplex_datascan_iam_binding.authoritative_for_role["roles/dataplex.dataScanEditor"]:
condition: []
data_scan_id: test-datascan
location: us-central1
members:
- group:user-group@example.com
- user:user@example.com
project: test-project
role: roles/dataplex.dataScanEditor
google_dataplex_datascan_iam_binding.authoritative_for_role["roles/dataplex.dataScanViewer"]:
condition: []
data_scan_id: test-datascan
location: us-central1
members:
- user:user@example.com
project: test-project
role: roles/dataplex.dataScanViewer
counts:
google_dataplex_datascan: 1
google_dataplex_datascan_iam_binding: 2
modules: 0
resources: 3
outputs:
data_scan_id: test-datascan
id: __missing__
name: __missing__
type: __missing__

View File

@ -0,0 +1,42 @@
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
values:
module.dataplex-datascan.google_dataplex_datascan.datascan:
data:
- entity: null
resource: //bigquery.googleapis.com/projects/bigquery-public-data/datasets/austin_bikeshare/tables/bikeshare_stations
data_profile_spec:
- row_filter: null
sampling_percent: null
data_quality_spec: []
data_scan_id: test-datascan
description: Terraform Managed.
display_name: test-datascan
execution_spec:
- field: null
trigger:
- on_demand: []
schedule:
- cron: TZ=America/New_York 0 1 * * *
labels: null
location: us-central1
project: my-project-name
timeouts: null
counts:
google_dataplex_datascan: 1
modules: 1
resources: 1
outputs: {}

View File

@ -0,0 +1,173 @@
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
values:
module.dataplex-datascan.google_dataplex_datascan.datascan:
data:
- entity: null
resource: //bigquery.googleapis.com/projects/bigquery-public-data/datasets/austin_bikeshare/tables/bikeshare_stations
data_profile_spec: []
data_quality_spec:
- row_filter: station_id > 1000
rules:
- column: address
dimension: VALIDITY
ignore_null: null
non_null_expectation:
- {}
range_expectation: []
regex_expectation: []
row_condition_expectation: []
set_expectation: []
statistic_range_expectation: []
table_condition_expectation: []
threshold: 0.99
uniqueness_expectation: []
- column: council_district
dimension: VALIDITY
ignore_null: true
non_null_expectation: []
range_expectation:
- max_value: '10'
min_value: '1'
strict_max_enabled: false
strict_min_enabled: true
regex_expectation: []
row_condition_expectation: []
set_expectation: []
statistic_range_expectation: []
table_condition_expectation: []
threshold: 0.9
uniqueness_expectation: []
- column: council_district
dimension: VALIDITY
ignore_null: null
non_null_expectation: []
range_expectation:
- max_value: '9'
min_value: '3'
strict_max_enabled: False
strict_min_enabled: False
regex_expectation: []
row_condition_expectation: []
set_expectation: []
statistic_range_expectation: []
table_condition_expectation: []
threshold: 0.8
uniqueness_expectation: []
- column: power_type
dimension: VALIDITY
ignore_null: false
non_null_expectation: []
range_expectation: []
regex_expectation:
- regex: .*solar.*
row_condition_expectation: []
set_expectation: []
statistic_range_expectation: []
table_condition_expectation: []
threshold: null
uniqueness_expectation: []
- column: property_type
dimension: VALIDITY
ignore_null: false
non_null_expectation: []
range_expectation: []
regex_expectation: []
row_condition_expectation: []
set_expectation:
- values:
- sidewalk
- parkland
statistic_range_expectation: []
table_condition_expectation: []
threshold: null
uniqueness_expectation: []
- column: address
dimension: UNIQUENESS
ignore_null: null
non_null_expectation: []
range_expectation: []
regex_expectation: []
row_condition_expectation: []
set_expectation: []
statistic_range_expectation: []
table_condition_expectation: []
threshold: null
uniqueness_expectation:
- {}
- column: number_of_docks
dimension: VALIDITY
ignore_null: null
non_null_expectation: []
range_expectation: []
regex_expectation: []
row_condition_expectation: []
set_expectation: []
statistic_range_expectation:
- max_value: '15'
min_value: '5'
statistic: MEAN
strict_max_enabled: true
strict_min_enabled: true
table_condition_expectation: []
threshold: null
uniqueness_expectation: []
- column: footprint_length
dimension: VALIDITY
ignore_null: null
non_null_expectation: []
range_expectation: []
regex_expectation: []
row_condition_expectation:
- sql_expression: footprint_length > 0 AND footprint_length <= 10
set_expectation: []
statistic_range_expectation: []
table_condition_expectation: []
threshold: null
uniqueness_expectation: []
- column: null
dimension: VALIDITY
ignore_null: null
non_null_expectation: []
range_expectation: []
regex_expectation: []
row_condition_expectation: []
set_expectation: []
statistic_range_expectation: []
table_condition_expectation:
- sql_expression: COUNT(*) > 0
threshold: null
uniqueness_expectation: []
sampling_percent: 100
data_scan_id: test-datascan
description: Terraform Managed.
display_name: test-datascan
execution_spec:
- field: modified_date
trigger:
- on_demand: []
schedule:
- cron: TZ=America/New_York 0 1 * * *
labels:
billing_id: a
location: us-central1
project: my-project-name
timeouts: null
counts:
google_dataplex_datascan: 1
modules: 1
resources: 1
outputs: {}

View File

@ -0,0 +1,41 @@
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
values:
module.dataplex-datascan.google_dataplex_datascan.datascan:
data:
- entity: projects/<project_number>/locations/<locationId>/lakes/<lakeId>/zones/<zoneId>/entities/<entityId>
resource: null
data_profile_spec:
- row_filter: null
sampling_percent: null
data_quality_spec: []
data_scan_id: test-datascan
description: Terraform Managed.
display_name: test-datascan
execution_spec:
- field: null
trigger:
- on_demand:
- {}
schedule: []
location: us-central1
project: my-project-name
timeouts: null
counts:
google_dataplex_datascan: 1
modules: 1
resources: 1
outputs: {}

View File

@ -0,0 +1,67 @@
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
values:
module.dataplex-datascan.google_dataplex_datascan.datascan:
data:
- entity: null
resource: //bigquery.googleapis.com/projects/bigquery-public-data/datasets/austin_bikeshare/tables/bikeshare_stations
data_profile_spec:
- row_filter: null
sampling_percent: null
data_quality_spec: []
data_scan_id: test-datascan
description: Terraform Managed.
display_name: test-datascan
execution_spec:
- field: null
trigger:
- on_demand:
- {}
schedule: []
labels: null
location: us-central1
project: my-project-name
timeouts: null
module.dataplex-datascan.google_dataplex_datascan_iam_binding.authoritative_for_role["roles/dataplex.dataScanAdmin"]:
condition: []
data_scan_id: test-datascan
location: us-central1
members:
- serviceAccount:svc-1@project-id.iam.gserviceaccount.com
project: my-project-name
role: roles/dataplex.dataScanAdmin
module.dataplex-datascan.google_dataplex_datascan_iam_binding.authoritative_for_role["roles/dataplex.dataScanEditor"]:
condition: []
data_scan_id: test-datascan
location: us-central1
members:
- user:admin-user@example.com
project: my-project-name
role: roles/dataplex.dataScanEditor
module.dataplex-datascan.google_dataplex_datascan_iam_binding.authoritative_for_role["roles/dataplex.dataScanViewer"]:
condition: []
data_scan_id: test-datascan
location: us-central1
members:
- group:user-group@example.com
project: my-project-name
role: roles/dataplex.dataScanViewer
counts:
google_dataplex_datascan: 1
google_dataplex_datascan_iam_binding: 3
modules: 1
resources: 4
outputs: {}

View File

@ -0,0 +1,43 @@
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
values:
module.dataplex-datascan.google_dataplex_datascan.datascan:
data:
- entity: null
resource: //bigquery.googleapis.com/projects/bigquery-public-data/datasets/austin_bikeshare/tables/bikeshare_stations
data_profile_spec:
- row_filter: station_id > 1000
sampling_percent: 100
data_quality_spec: []
data_scan_id: test-datascan
description: Terraform Managed.
display_name: test-datascan
execution_spec:
- field: modified_date
trigger:
- on_demand:
- {}
schedule: []
labels:
billing_id: a
location: us-central1
project: my-project-name
timeouts: null
counts:
google_dataplex_datascan: 1
modules: 1
resources: 1
outputs: {}

View File

@ -0,0 +1,16 @@
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
module: modules/dataplex-datascan
tests:
datascan_test_inputs: