diff --git a/blueprints/data-solutions/data-platform-minimal/01-landing.tf b/blueprints/data-solutions/data-platform-minimal/01-landing.tf index 94ecf5a3..52bf6e8a 100644 --- a/blueprints/data-solutions/data-platform-minimal/01-landing.tf +++ b/blueprints/data-solutions/data-platform-minimal/01-landing.tf @@ -64,6 +64,7 @@ module "land-project" { "bigquerystorage.googleapis.com", "cloudkms.googleapis.com", "cloudresourcemanager.googleapis.com", + "datalineage.googleapis.com", "iam.googleapis.com", "serviceusage.googleapis.com", "stackdriver.googleapis.com", diff --git a/blueprints/data-solutions/data-platform-minimal/02-composer.tf b/blueprints/data-solutions/data-platform-minimal/02-composer.tf index de88af57..c250b1fd 100644 --- a/blueprints/data-solutions/data-platform-minimal/02-composer.tf +++ b/blueprints/data-solutions/data-platform-minimal/02-composer.tf @@ -51,16 +51,20 @@ module "processing-sa-cmp-0" { } resource "google_composer_environment" "processing-cmp-0" { - count = var.enable_services.composer == true ? 1 : 0 - project = module.processing-project.project_id - name = "${var.prefix}-prc-cmp-0" - region = var.region + count = var.enable_services.composer == true ? 1 : 0 + provider = google-beta + project = module.processing-project.project_id + name = "${var.prefix}-prc-cmp-0" + region = var.region config { software_config { airflow_config_overrides = var.composer_config.software_config.airflow_config_overrides pypi_packages = var.composer_config.software_config.pypi_packages env_variables = local.env_variables image_version = var.composer_config.software_config.image_version + cloud_data_lineage_integration { + enabled = var.composer_config.software_config.cloud_data_lineage_integration + } } workloads_config { scheduler { diff --git a/blueprints/data-solutions/data-platform-minimal/02-processing.tf b/blueprints/data-solutions/data-platform-minimal/02-processing.tf index 1bba98da..720e2a81 100644 --- a/blueprints/data-solutions/data-platform-minimal/02-processing.tf +++ b/blueprints/data-solutions/data-platform-minimal/02-processing.tf @@ -118,6 +118,7 @@ module "processing-project" { "compute.googleapis.com", "container.googleapis.com", "dataflow.googleapis.com", + "datalineage.googleapis.com", "dataproc.googleapis.com", "iam.googleapis.com", "servicenetworking.googleapis.com", diff --git a/blueprints/data-solutions/data-platform-minimal/03-curated.tf b/blueprints/data-solutions/data-platform-minimal/03-curated.tf index 8bff815f..53a6e7b2 100644 --- a/blueprints/data-solutions/data-platform-minimal/03-curated.tf +++ b/blueprints/data-solutions/data-platform-minimal/03-curated.tf @@ -22,6 +22,7 @@ locals { "cloudkms.googleapis.com", "cloudresourcemanager.googleapis.com", "compute.googleapis.com", + "datalineage.googleapis.com", "iam.googleapis.com", "servicenetworking.googleapis.com", "serviceusage.googleapis.com", diff --git a/blueprints/data-solutions/data-platform-minimal/README.md b/blueprints/data-solutions/data-platform-minimal/README.md index 1f4eb777..62b30acd 100644 --- a/blueprints/data-solutions/data-platform-minimal/README.md +++ b/blueprints/data-solutions/data-platform-minimal/README.md @@ -229,7 +229,7 @@ module "data-platform" { prefix = "myprefix" } -# tftest modules=23 resources=135 +# tftest modules=23 resources=138 ``` ## Customizations @@ -302,19 +302,19 @@ The application layer is out of scope of this script. As a demo purpuse only, on | name | description | type | required | default | |---|---|:---:|:---:|:---:| -| [organization_domain](variables.tf#L122) | Organization domain. | string | ✓ | | -| [prefix](variables.tf#L127) | Prefix used for resource names. | string | ✓ | | -| [project_config](variables.tf#L136) | Provide 'billing_account_id' value if project creation is needed, uses existing 'project_ids' if null. Parent is in 'folders/nnn' or 'organizations/nnn' format. | object({…}) | ✓ | | -| [composer_config](variables.tf#L17) | Cloud Composer config. | object({…}) | | {} | -| [data_catalog_tags](variables.tf#L55) | List of Data Catalog Policy tags to be created with optional IAM binging configuration in {tag => {ROLE => [MEMBERS]}} format. | map(object({…})) | | {…} | -| [data_force_destroy](variables.tf#L69) | Flag to set 'force_destroy' on data services like BiguQery or Cloud Storage. | bool | | false | -| [enable_services](variables.tf#L75) | Flag to enable or disable services in the Data Platform. | object({…}) | | {} | -| [groups](variables.tf#L84) | User groups. | map(string) | | {…} | -| [location](variables.tf#L94) | Location used for multi-regional resources. | string | | "eu" | -| [network_config](variables.tf#L100) | Shared VPC network configurations to use. If null networks will be created in projects. | object({…}) | | {} | -| [project_suffix](variables.tf#L160) | Suffix used only for project ids. | string | | null | -| [region](variables.tf#L166) | Region used for regional resources. | string | | "europe-west1" | -| [service_encryption_keys](variables.tf#L172) | Cloud KMS to use to encrypt different services. Key location should match service region. | object({…}) | | {} | +| [organization_domain](variables.tf#L123) | Organization domain. | string | ✓ | | +| [prefix](variables.tf#L128) | Prefix used for resource names. | string | ✓ | | +| [project_config](variables.tf#L137) | Provide 'billing_account_id' value if project creation is needed, uses existing 'project_ids' if null. Parent is in 'folders/nnn' or 'organizations/nnn' format. | object({…}) | ✓ | | +| [composer_config](variables.tf#L17) | Cloud Composer config. | object({…}) | | {} | +| [data_catalog_tags](variables.tf#L56) | List of Data Catalog Policy tags to be created with optional IAM binging configuration in {tag => {ROLE => [MEMBERS]}} format. | map(object({…})) | | {…} | +| [data_force_destroy](variables.tf#L70) | Flag to set 'force_destroy' on data services like BiguQery or Cloud Storage. | bool | | false | +| [enable_services](variables.tf#L76) | Flag to enable or disable services in the Data Platform. | object({…}) | | {} | +| [groups](variables.tf#L85) | User groups. | map(string) | | {…} | +| [location](variables.tf#L95) | Location used for multi-regional resources. | string | | "eu" | +| [network_config](variables.tf#L101) | Shared VPC network configurations to use. If null networks will be created in projects. | object({…}) | | {} | +| [project_suffix](variables.tf#L161) | Suffix used only for project ids. | string | | null | +| [region](variables.tf#L167) | Region used for regional resources. | string | | "europe-west1" | +| [service_encryption_keys](variables.tf#L173) | Cloud KMS to use to encrypt different services. Key location should match service region. | object({…}) | | {} | ## Outputs diff --git a/blueprints/data-solutions/data-platform-minimal/demo/README.md b/blueprints/data-solutions/data-platform-minimal/demo/README.md index b9a24b82..f3c1cbf7 100644 --- a/blueprints/data-solutions/data-platform-minimal/demo/README.md +++ b/blueprints/data-solutions/data-platform-minimal/demo/README.md @@ -54,5 +54,5 @@ source ./env.sh gsutil -i $LND_SA cp demo/data/*.csv gs://$LND_GCS gsutil -i $CMP_SA cp demo/data/*.j* gs://$PRC_GCS gsutil -i $CMP_SA cp demo/pyspark_* gs://$PRC_GCS -gsutil -i $CMP_SA cp demo/dag_*.py $CMP_GCS +gsutil -i $CMP_SA cp demo/dag_*.py gs://$CMP_GCS/dags ``` diff --git a/blueprints/data-solutions/data-platform-minimal/demo/dag_bq_gcs2bq.py b/blueprints/data-solutions/data-platform-minimal/demo/dag_bq_gcs2bq.py index 1f89fd80..321071b2 100644 --- a/blueprints/data-solutions/data-platform-minimal/demo/dag_bq_gcs2bq.py +++ b/blueprints/data-solutions/data-platform-minimal/demo/dag_bq_gcs2bq.py @@ -92,7 +92,7 @@ with models.DAG( schema_update_options=['ALLOW_FIELD_RELAXATION', 'ALLOW_FIELD_ADDITION'], schema_object="customers.json", schema_object_bucket=PROCESSING_GCS[5:], - project_id=PROCESSING_PRJ, # The process will continue to run on the dataset project until the Apache Airflow bug is fixed. https://github.com/apache/airflow/issues/32106 + project_id=PROCESSING_PRJ, impersonation_chain=[PROCESSING_SA] ) diff --git a/blueprints/data-solutions/data-platform-minimal/demo/dag_dataproc_gcs2bq.py b/blueprints/data-solutions/data-platform-minimal/demo/dag_dataproc_gcs2bq.py index 8b1d30e7..3a3dab52 100644 --- a/blueprints/data-solutions/data-platform-minimal/demo/dag_dataproc_gcs2bq.py +++ b/blueprints/data-solutions/data-platform-minimal/demo/dag_dataproc_gcs2bq.py @@ -34,7 +34,6 @@ CURATED_GCS = Variable.get("CURATED_GCS") CURATED_PRJ = Variable.get("CURATED_PRJ") DP_KMS_KEY = Variable.get("DP_KMS_KEY", "") DP_REGION = Variable.get("DP_REGION") -GCP_REGION = Variable.get("GCP_REGION") LAND_PRJ = Variable.get("LAND_PRJ") LAND_BQ_DATASET = Variable.get("LAND_BQ_DATASET") LAND_GCS = Variable.get("LAND_GCS") diff --git a/blueprints/data-solutions/data-platform-minimal/demo/dag_orchestrate_pyspark.py b/blueprints/data-solutions/data-platform-minimal/demo/dag_orchestrate_pyspark.py index b96f27e4..4258e7e4 100644 --- a/blueprints/data-solutions/data-platform-minimal/demo/dag_orchestrate_pyspark.py +++ b/blueprints/data-solutions/data-platform-minimal/demo/dag_orchestrate_pyspark.py @@ -33,7 +33,6 @@ CURATED_GCS = Variable.get("CURATED_GCS") CURATED_PRJ = Variable.get("CURATED_PRJ") DP_KMS_KEY = Variable.get("DP_KMS_KEY", "") DP_REGION = Variable.get("DP_REGION") -GCP_REGION = Variable.get("GCP_REGION") LAND_PRJ = Variable.get("LAND_PRJ") LAND_BQ_DATASET = Variable.get("LAND_BQ_DATASET") LAND_GCS = Variable.get("LAND_GCS") diff --git a/blueprints/data-solutions/data-platform-minimal/variables.tf b/blueprints/data-solutions/data-platform-minimal/variables.tf index 0dc29003..0bd1deed 100644 --- a/blueprints/data-solutions/data-platform-minimal/variables.tf +++ b/blueprints/data-solutions/data-platform-minimal/variables.tf @@ -19,10 +19,11 @@ variable "composer_config" { type = object({ environment_size = optional(string, "ENVIRONMENT_SIZE_SMALL") software_config = optional(object({ - airflow_config_overrides = optional(map(string), {}) - pypi_packages = optional(map(string), {}) - env_variables = optional(map(string), {}) - image_version = optional(string, "composer-2-airflow-2") + airflow_config_overrides = optional(map(string), {}) + pypi_packages = optional(map(string), {}) + env_variables = optional(map(string), {}) + image_version = optional(string, "composer-2-airflow-2") + cloud_data_lineage_integration = optional(bool, true) }), {}) web_server_access_control = optional(map(string), {}) workloads_config = optional(object({