diff --git a/blueprints/data-solutions/data-platform-minimal/01-landing.tf b/blueprints/data-solutions/data-platform-minimal/01-landing.tf
index 94ecf5a3..52bf6e8a 100644
--- a/blueprints/data-solutions/data-platform-minimal/01-landing.tf
+++ b/blueprints/data-solutions/data-platform-minimal/01-landing.tf
@@ -64,6 +64,7 @@ module "land-project" {
"bigquerystorage.googleapis.com",
"cloudkms.googleapis.com",
"cloudresourcemanager.googleapis.com",
+ "datalineage.googleapis.com",
"iam.googleapis.com",
"serviceusage.googleapis.com",
"stackdriver.googleapis.com",
diff --git a/blueprints/data-solutions/data-platform-minimal/02-composer.tf b/blueprints/data-solutions/data-platform-minimal/02-composer.tf
index de88af57..c250b1fd 100644
--- a/blueprints/data-solutions/data-platform-minimal/02-composer.tf
+++ b/blueprints/data-solutions/data-platform-minimal/02-composer.tf
@@ -51,16 +51,20 @@ module "processing-sa-cmp-0" {
}
resource "google_composer_environment" "processing-cmp-0" {
- count = var.enable_services.composer == true ? 1 : 0
- project = module.processing-project.project_id
- name = "${var.prefix}-prc-cmp-0"
- region = var.region
+ count = var.enable_services.composer == true ? 1 : 0
+ provider = google-beta
+ project = module.processing-project.project_id
+ name = "${var.prefix}-prc-cmp-0"
+ region = var.region
config {
software_config {
airflow_config_overrides = var.composer_config.software_config.airflow_config_overrides
pypi_packages = var.composer_config.software_config.pypi_packages
env_variables = local.env_variables
image_version = var.composer_config.software_config.image_version
+ cloud_data_lineage_integration {
+ enabled = var.composer_config.software_config.cloud_data_lineage_integration
+ }
}
workloads_config {
scheduler {
diff --git a/blueprints/data-solutions/data-platform-minimal/02-processing.tf b/blueprints/data-solutions/data-platform-minimal/02-processing.tf
index 1bba98da..720e2a81 100644
--- a/blueprints/data-solutions/data-platform-minimal/02-processing.tf
+++ b/blueprints/data-solutions/data-platform-minimal/02-processing.tf
@@ -118,6 +118,7 @@ module "processing-project" {
"compute.googleapis.com",
"container.googleapis.com",
"dataflow.googleapis.com",
+ "datalineage.googleapis.com",
"dataproc.googleapis.com",
"iam.googleapis.com",
"servicenetworking.googleapis.com",
diff --git a/blueprints/data-solutions/data-platform-minimal/03-curated.tf b/blueprints/data-solutions/data-platform-minimal/03-curated.tf
index 8bff815f..53a6e7b2 100644
--- a/blueprints/data-solutions/data-platform-minimal/03-curated.tf
+++ b/blueprints/data-solutions/data-platform-minimal/03-curated.tf
@@ -22,6 +22,7 @@ locals {
"cloudkms.googleapis.com",
"cloudresourcemanager.googleapis.com",
"compute.googleapis.com",
+ "datalineage.googleapis.com",
"iam.googleapis.com",
"servicenetworking.googleapis.com",
"serviceusage.googleapis.com",
diff --git a/blueprints/data-solutions/data-platform-minimal/README.md b/blueprints/data-solutions/data-platform-minimal/README.md
index 1f4eb777..62b30acd 100644
--- a/blueprints/data-solutions/data-platform-minimal/README.md
+++ b/blueprints/data-solutions/data-platform-minimal/README.md
@@ -229,7 +229,7 @@ module "data-platform" {
prefix = "myprefix"
}
-# tftest modules=23 resources=135
+# tftest modules=23 resources=138
```
## Customizations
@@ -302,19 +302,19 @@ The application layer is out of scope of this script. As a demo purpuse only, on
| name | description | type | required | default |
|---|---|:---:|:---:|:---:|
-| [organization_domain](variables.tf#L122) | Organization domain. | string
| ✓ | |
-| [prefix](variables.tf#L127) | Prefix used for resource names. | string
| ✓ | |
-| [project_config](variables.tf#L136) | Provide 'billing_account_id' value if project creation is needed, uses existing 'project_ids' if null. Parent is in 'folders/nnn' or 'organizations/nnn' format. | object({…})
| ✓ | |
-| [composer_config](variables.tf#L17) | Cloud Composer config. | object({…})
| | {}
|
-| [data_catalog_tags](variables.tf#L55) | List of Data Catalog Policy tags to be created with optional IAM binging configuration in {tag => {ROLE => [MEMBERS]}} format. | map(object({…}))
| | {…}
|
-| [data_force_destroy](variables.tf#L69) | Flag to set 'force_destroy' on data services like BiguQery or Cloud Storage. | bool
| | false
|
-| [enable_services](variables.tf#L75) | Flag to enable or disable services in the Data Platform. | object({…})
| | {}
|
-| [groups](variables.tf#L84) | User groups. | map(string)
| | {…}
|
-| [location](variables.tf#L94) | Location used for multi-regional resources. | string
| | "eu"
|
-| [network_config](variables.tf#L100) | Shared VPC network configurations to use. If null networks will be created in projects. | object({…})
| | {}
|
-| [project_suffix](variables.tf#L160) | Suffix used only for project ids. | string
| | null
|
-| [region](variables.tf#L166) | Region used for regional resources. | string
| | "europe-west1"
|
-| [service_encryption_keys](variables.tf#L172) | Cloud KMS to use to encrypt different services. Key location should match service region. | object({…})
| | {}
|
+| [organization_domain](variables.tf#L123) | Organization domain. | string
| ✓ | |
+| [prefix](variables.tf#L128) | Prefix used for resource names. | string
| ✓ | |
+| [project_config](variables.tf#L137) | Provide 'billing_account_id' value if project creation is needed, uses existing 'project_ids' if null. Parent is in 'folders/nnn' or 'organizations/nnn' format. | object({…})
| ✓ | |
+| [composer_config](variables.tf#L17) | Cloud Composer config. | object({…})
| | {}
|
+| [data_catalog_tags](variables.tf#L56) | List of Data Catalog Policy tags to be created with optional IAM binging configuration in {tag => {ROLE => [MEMBERS]}} format. | map(object({…}))
| | {…}
|
+| [data_force_destroy](variables.tf#L70) | Flag to set 'force_destroy' on data services like BiguQery or Cloud Storage. | bool
| | false
|
+| [enable_services](variables.tf#L76) | Flag to enable or disable services in the Data Platform. | object({…})
| | {}
|
+| [groups](variables.tf#L85) | User groups. | map(string)
| | {…}
|
+| [location](variables.tf#L95) | Location used for multi-regional resources. | string
| | "eu"
|
+| [network_config](variables.tf#L101) | Shared VPC network configurations to use. If null networks will be created in projects. | object({…})
| | {}
|
+| [project_suffix](variables.tf#L161) | Suffix used only for project ids. | string
| | null
|
+| [region](variables.tf#L167) | Region used for regional resources. | string
| | "europe-west1"
|
+| [service_encryption_keys](variables.tf#L173) | Cloud KMS to use to encrypt different services. Key location should match service region. | object({…})
| | {}
|
## Outputs
diff --git a/blueprints/data-solutions/data-platform-minimal/demo/README.md b/blueprints/data-solutions/data-platform-minimal/demo/README.md
index b9a24b82..f3c1cbf7 100644
--- a/blueprints/data-solutions/data-platform-minimal/demo/README.md
+++ b/blueprints/data-solutions/data-platform-minimal/demo/README.md
@@ -54,5 +54,5 @@ source ./env.sh
gsutil -i $LND_SA cp demo/data/*.csv gs://$LND_GCS
gsutil -i $CMP_SA cp demo/data/*.j* gs://$PRC_GCS
gsutil -i $CMP_SA cp demo/pyspark_* gs://$PRC_GCS
-gsutil -i $CMP_SA cp demo/dag_*.py $CMP_GCS
+gsutil -i $CMP_SA cp demo/dag_*.py gs://$CMP_GCS/dags
```
diff --git a/blueprints/data-solutions/data-platform-minimal/demo/dag_bq_gcs2bq.py b/blueprints/data-solutions/data-platform-minimal/demo/dag_bq_gcs2bq.py
index 1f89fd80..321071b2 100644
--- a/blueprints/data-solutions/data-platform-minimal/demo/dag_bq_gcs2bq.py
+++ b/blueprints/data-solutions/data-platform-minimal/demo/dag_bq_gcs2bq.py
@@ -92,7 +92,7 @@ with models.DAG(
schema_update_options=['ALLOW_FIELD_RELAXATION', 'ALLOW_FIELD_ADDITION'],
schema_object="customers.json",
schema_object_bucket=PROCESSING_GCS[5:],
- project_id=PROCESSING_PRJ, # The process will continue to run on the dataset project until the Apache Airflow bug is fixed. https://github.com/apache/airflow/issues/32106
+ project_id=PROCESSING_PRJ,
impersonation_chain=[PROCESSING_SA]
)
diff --git a/blueprints/data-solutions/data-platform-minimal/demo/dag_dataproc_gcs2bq.py b/blueprints/data-solutions/data-platform-minimal/demo/dag_dataproc_gcs2bq.py
index 8b1d30e7..3a3dab52 100644
--- a/blueprints/data-solutions/data-platform-minimal/demo/dag_dataproc_gcs2bq.py
+++ b/blueprints/data-solutions/data-platform-minimal/demo/dag_dataproc_gcs2bq.py
@@ -34,7 +34,6 @@ CURATED_GCS = Variable.get("CURATED_GCS")
CURATED_PRJ = Variable.get("CURATED_PRJ")
DP_KMS_KEY = Variable.get("DP_KMS_KEY", "")
DP_REGION = Variable.get("DP_REGION")
-GCP_REGION = Variable.get("GCP_REGION")
LAND_PRJ = Variable.get("LAND_PRJ")
LAND_BQ_DATASET = Variable.get("LAND_BQ_DATASET")
LAND_GCS = Variable.get("LAND_GCS")
diff --git a/blueprints/data-solutions/data-platform-minimal/demo/dag_orchestrate_pyspark.py b/blueprints/data-solutions/data-platform-minimal/demo/dag_orchestrate_pyspark.py
index b96f27e4..4258e7e4 100644
--- a/blueprints/data-solutions/data-platform-minimal/demo/dag_orchestrate_pyspark.py
+++ b/blueprints/data-solutions/data-platform-minimal/demo/dag_orchestrate_pyspark.py
@@ -33,7 +33,6 @@ CURATED_GCS = Variable.get("CURATED_GCS")
CURATED_PRJ = Variable.get("CURATED_PRJ")
DP_KMS_KEY = Variable.get("DP_KMS_KEY", "")
DP_REGION = Variable.get("DP_REGION")
-GCP_REGION = Variable.get("GCP_REGION")
LAND_PRJ = Variable.get("LAND_PRJ")
LAND_BQ_DATASET = Variable.get("LAND_BQ_DATASET")
LAND_GCS = Variable.get("LAND_GCS")
diff --git a/blueprints/data-solutions/data-platform-minimal/variables.tf b/blueprints/data-solutions/data-platform-minimal/variables.tf
index 0dc29003..0bd1deed 100644
--- a/blueprints/data-solutions/data-platform-minimal/variables.tf
+++ b/blueprints/data-solutions/data-platform-minimal/variables.tf
@@ -19,10 +19,11 @@ variable "composer_config" {
type = object({
environment_size = optional(string, "ENVIRONMENT_SIZE_SMALL")
software_config = optional(object({
- airflow_config_overrides = optional(map(string), {})
- pypi_packages = optional(map(string), {})
- env_variables = optional(map(string), {})
- image_version = optional(string, "composer-2-airflow-2")
+ airflow_config_overrides = optional(map(string), {})
+ pypi_packages = optional(map(string), {})
+ env_variables = optional(map(string), {})
+ image_version = optional(string, "composer-2-airflow-2")
+ cloud_data_lineage_integration = optional(bool, true)
}), {})
web_server_access_control = optional(map(string), {})
workloads_config = optional(object({