diff --git a/blueprints/data-solutions/data-platform-minimal/02-dataproc.tf b/blueprints/data-solutions/data-platform-minimal/02-dataproc.tf index 4275c559..3a68a7a8 100644 --- a/blueprints/data-solutions/data-platform-minimal/02-dataproc.tf +++ b/blueprints/data-solutions/data-platform-minimal/02-dataproc.tf @@ -84,7 +84,7 @@ module "processing-dp-historyserver" { staging_bucket = module.processing-staging-0.name temp_bucket = module.processing-temp-0.name gce_cluster_config = { - subnetwork = module.processing-vpc[0].subnets["${var.region}/${var.prefix}-processing"].self_link + subnetwork = local.processing_subnet zone = "${var.region}-b" service_account = module.processing-sa-0.email service_account_scopes = ["cloud-platform"] diff --git a/blueprints/data-solutions/data-platform-minimal/02-processing.tf b/blueprints/data-solutions/data-platform-minimal/02-processing.tf index 53da3fa6..1d8cca2a 100644 --- a/blueprints/data-solutions/data-platform-minimal/02-processing.tf +++ b/blueprints/data-solutions/data-platform-minimal/02-processing.tf @@ -50,12 +50,12 @@ locals { processing_subnet = ( local.use_shared_vpc ? var.network_config.subnet_self_link - : module.processing-vpc.0.subnet_self_links["${var.region}/${var.prefix}-processing"] + : try(module.processing-vpc.0.subnet_self_links["${var.region}/${var.prefix}-processing"], null) ) processing_vpc = ( local.use_shared_vpc ? var.network_config.network_self_link - : module.processing-vpc.0.self_link + : try(module.processing-vpc.0.self_link, null) ) } @@ -101,7 +101,7 @@ module "processing-project" { host_project = var.network_config.host_project service_identity_iam = { "roles/compute.networkUser" = [ - "cloudservices", "compute", "container-engine", "dataflow" + "cloudservices", "compute", "container-engine", "dataflow", "dataproc" ] "roles/composer.sharedVpcAgent" = [ "composer" diff --git a/blueprints/data-solutions/data-platform-minimal/README.md b/blueprints/data-solutions/data-platform-minimal/README.md index 5411349b..5760f3f6 100644 --- a/blueprints/data-solutions/data-platform-minimal/README.md +++ b/blueprints/data-solutions/data-platform-minimal/README.md @@ -69,7 +69,7 @@ We use three groups to control access to resources: ### Virtual Private Cloud (VPC) design -As is often the case in real-world configurations, this blueprint accepts as input an existing [Shared-VPC](https://cloud.google.com/vpc/docs/shared-vpc) via the `network_config` variable. Make sure that the GKE API (`container.googleapis.com`) is enabled in the VPC host project. +As is often the case in real-world configurations, this blueprint accepts as input an existing [Shared-VPC](https://cloud.google.com/vpc/docs/shared-vpc) via the `network_config` variable. Make sure that the GKE API (`container.googleapis.com`) is enabled in the VPC host project. Remember also to configure firewall rules needed for the different products you are going to use: Composer, Dataflow or Dataproc. If the `network_config` variable is not provided, one VPC will be created in each project that supports network resources (load, transformation and orchestration). diff --git a/modules/dataproc/README.md b/modules/dataproc/README.md index 668f38f5..f848db57 100644 --- a/modules/dataproc/README.md +++ b/modules/dataproc/README.md @@ -145,16 +145,16 @@ module "processing-dp-cluster" { | name | description | type | required | default | |---|---|:---:|:---:|:---:| -| [name](variables.tf#L211) | Cluster name. | string | ✓ | | -| [project_id](variables.tf#L226) | Project ID. | string | ✓ | | -| [region](variables.tf#L231) | Dataproc region. | string | ✓ | | -| [dataproc_config](variables.tf#L17) | Dataproc cluster config. | object({…}) | | {} | -| [group_iam](variables.tf#L184) | Authoritative IAM binding for organization groups, in {GROUP_EMAIL => [ROLES]} format. Group emails need to be static. Can be used in combination with the `iam` variable. | map(list(string)) | | {} | -| [iam](variables.tf#L191) | IAM bindings in {ROLE => [MEMBERS]} format. | map(list(string)) | | {} | -| [iam_additive](variables.tf#L198) | IAM additive bindings in {ROLE => [MEMBERS]} format. | map(list(string)) | | {} | -| [labels](variables.tf#L205) | The resource labels for instance to use to annotate any related underlying resources, such as Compute Engine VMs. | map(string) | | {} | -| [prefix](variables.tf#L216) | Optional prefix used to generate project id and name. | string | | null | -| [service_account](variables.tf#L236) | Service account to set on the Dataproc cluster. | string | | null | +| [name](variables.tf#L212) | Cluster name. | string | ✓ | | +| [project_id](variables.tf#L227) | Project ID. | string | ✓ | | +| [region](variables.tf#L232) | Dataproc region. | string | ✓ | | +| [dataproc_config](variables.tf#L17) | Dataproc cluster config. | object({…}) | | {} | +| [group_iam](variables.tf#L185) | Authoritative IAM binding for organization groups, in {GROUP_EMAIL => [ROLES]} format. Group emails need to be static. Can be used in combination with the `iam` variable. | map(list(string)) | | {} | +| [iam](variables.tf#L192) | IAM bindings in {ROLE => [MEMBERS]} format. | map(list(string)) | | {} | +| [iam_additive](variables.tf#L199) | IAM additive bindings in {ROLE => [MEMBERS]} format. | map(list(string)) | | {} | +| [labels](variables.tf#L206) | The resource labels for instance to use to annotate any related underlying resources, such as Compute Engine VMs. | map(string) | | {} | +| [prefix](variables.tf#L217) | Optional prefix used to generate project id and name. | string | | null | +| [service_account](variables.tf#L237) | Service account to set on the Dataproc cluster. | string | | null | ## Outputs diff --git a/modules/dataproc/variables.tf b/modules/dataproc/variables.tf index 753a0523..926169b9 100644 --- a/modules/dataproc/variables.tf +++ b/modules/dataproc/variables.tf @@ -49,6 +49,7 @@ variable "dataproc_config" { num_instances = number machine_type = string min_cpu_platform = string + image_uri = string disk_config = optional(object({ boot_disk_type = string boot_disk_size_gb = number