diff --git a/blueprints/data-solutions/data-platform-minimal/02-dataproc.tf b/blueprints/data-solutions/data-platform-minimal/02-dataproc.tf
index 4275c559..3a68a7a8 100644
--- a/blueprints/data-solutions/data-platform-minimal/02-dataproc.tf
+++ b/blueprints/data-solutions/data-platform-minimal/02-dataproc.tf
@@ -84,7 +84,7 @@ module "processing-dp-historyserver" {
staging_bucket = module.processing-staging-0.name
temp_bucket = module.processing-temp-0.name
gce_cluster_config = {
- subnetwork = module.processing-vpc[0].subnets["${var.region}/${var.prefix}-processing"].self_link
+ subnetwork = local.processing_subnet
zone = "${var.region}-b"
service_account = module.processing-sa-0.email
service_account_scopes = ["cloud-platform"]
diff --git a/blueprints/data-solutions/data-platform-minimal/02-processing.tf b/blueprints/data-solutions/data-platform-minimal/02-processing.tf
index 53da3fa6..1d8cca2a 100644
--- a/blueprints/data-solutions/data-platform-minimal/02-processing.tf
+++ b/blueprints/data-solutions/data-platform-minimal/02-processing.tf
@@ -50,12 +50,12 @@ locals {
processing_subnet = (
local.use_shared_vpc
? var.network_config.subnet_self_link
- : module.processing-vpc.0.subnet_self_links["${var.region}/${var.prefix}-processing"]
+ : try(module.processing-vpc.0.subnet_self_links["${var.region}/${var.prefix}-processing"], null)
)
processing_vpc = (
local.use_shared_vpc
? var.network_config.network_self_link
- : module.processing-vpc.0.self_link
+ : try(module.processing-vpc.0.self_link, null)
)
}
@@ -101,7 +101,7 @@ module "processing-project" {
host_project = var.network_config.host_project
service_identity_iam = {
"roles/compute.networkUser" = [
- "cloudservices", "compute", "container-engine", "dataflow"
+ "cloudservices", "compute", "container-engine", "dataflow", "dataproc"
]
"roles/composer.sharedVpcAgent" = [
"composer"
diff --git a/blueprints/data-solutions/data-platform-minimal/README.md b/blueprints/data-solutions/data-platform-minimal/README.md
index 5411349b..5760f3f6 100644
--- a/blueprints/data-solutions/data-platform-minimal/README.md
+++ b/blueprints/data-solutions/data-platform-minimal/README.md
@@ -69,7 +69,7 @@ We use three groups to control access to resources:
### Virtual Private Cloud (VPC) design
-As is often the case in real-world configurations, this blueprint accepts as input an existing [Shared-VPC](https://cloud.google.com/vpc/docs/shared-vpc) via the `network_config` variable. Make sure that the GKE API (`container.googleapis.com`) is enabled in the VPC host project.
+As is often the case in real-world configurations, this blueprint accepts as input an existing [Shared-VPC](https://cloud.google.com/vpc/docs/shared-vpc) via the `network_config` variable. Make sure that the GKE API (`container.googleapis.com`) is enabled in the VPC host project. Remember also to configure firewall rules needed for the different products you are going to use: Composer, Dataflow or Dataproc.
If the `network_config` variable is not provided, one VPC will be created in each project that supports network resources (load, transformation and orchestration).
diff --git a/modules/dataproc/README.md b/modules/dataproc/README.md
index 668f38f5..f848db57 100644
--- a/modules/dataproc/README.md
+++ b/modules/dataproc/README.md
@@ -145,16 +145,16 @@ module "processing-dp-cluster" {
| name | description | type | required | default |
|---|---|:---:|:---:|:---:|
-| [name](variables.tf#L211) | Cluster name. | string
| ✓ | |
-| [project_id](variables.tf#L226) | Project ID. | string
| ✓ | |
-| [region](variables.tf#L231) | Dataproc region. | string
| ✓ | |
-| [dataproc_config](variables.tf#L17) | Dataproc cluster config. | object({…})
| | {}
|
-| [group_iam](variables.tf#L184) | Authoritative IAM binding for organization groups, in {GROUP_EMAIL => [ROLES]} format. Group emails need to be static. Can be used in combination with the `iam` variable. | map(list(string))
| | {}
|
-| [iam](variables.tf#L191) | IAM bindings in {ROLE => [MEMBERS]} format. | map(list(string))
| | {}
|
-| [iam_additive](variables.tf#L198) | IAM additive bindings in {ROLE => [MEMBERS]} format. | map(list(string))
| | {}
|
-| [labels](variables.tf#L205) | The resource labels for instance to use to annotate any related underlying resources, such as Compute Engine VMs. | map(string)
| | {}
|
-| [prefix](variables.tf#L216) | Optional prefix used to generate project id and name. | string
| | null
|
-| [service_account](variables.tf#L236) | Service account to set on the Dataproc cluster. | string
| | null
|
+| [name](variables.tf#L212) | Cluster name. | string
| ✓ | |
+| [project_id](variables.tf#L227) | Project ID. | string
| ✓ | |
+| [region](variables.tf#L232) | Dataproc region. | string
| ✓ | |
+| [dataproc_config](variables.tf#L17) | Dataproc cluster config. | object({…})
| | {}
|
+| [group_iam](variables.tf#L185) | Authoritative IAM binding for organization groups, in {GROUP_EMAIL => [ROLES]} format. Group emails need to be static. Can be used in combination with the `iam` variable. | map(list(string))
| | {}
|
+| [iam](variables.tf#L192) | IAM bindings in {ROLE => [MEMBERS]} format. | map(list(string))
| | {}
|
+| [iam_additive](variables.tf#L199) | IAM additive bindings in {ROLE => [MEMBERS]} format. | map(list(string))
| | {}
|
+| [labels](variables.tf#L206) | The resource labels for instance to use to annotate any related underlying resources, such as Compute Engine VMs. | map(string)
| | {}
|
+| [prefix](variables.tf#L217) | Optional prefix used to generate project id and name. | string
| | null
|
+| [service_account](variables.tf#L237) | Service account to set on the Dataproc cluster. | string
| | null
|
## Outputs
diff --git a/modules/dataproc/variables.tf b/modules/dataproc/variables.tf
index 753a0523..926169b9 100644
--- a/modules/dataproc/variables.tf
+++ b/modules/dataproc/variables.tf
@@ -49,6 +49,7 @@ variable "dataproc_config" {
num_instances = number
machine_type = string
min_cpu_platform = string
+ image_uri = string
disk_config = optional(object({
boot_disk_type = string
boot_disk_size_gb = number