diff --git a/examples/data-solutions/gcs-to-bq-with-dataflow/README.md b/examples/data-solutions/gcs-to-bq-with-dataflow/README.md
index 58b1456d..701e6b7b 100644
--- a/examples/data-solutions/gcs-to-bq-with-dataflow/README.md
+++ b/examples/data-solutions/gcs-to-bq-with-dataflow/README.md
@@ -111,19 +111,19 @@ schema_bq_import.json
You can check data imported into Google BigQuery from the Google Cloud Console UI.
+
## Variables
| name | description | type | required | default |
|---|---|:---:|:---:|:---:|
-| billing_account | Billing account id used as default for new projects. | string
| ✓ | |
-| project_kms_name | Name for the new KMS Project. | string
| ✓ | |
-| project_service_name | Name for the new Service Project. | string
| ✓ | |
-| root_node | The resource name of the parent Folder or Organization. Must be of the form folders/folder_id or organizations/org_id. | string
| ✓ | |
-| location | The location where resources will be deployed. | string
| | "europe"
|
+| kms_project_id | Name for the new KMS Project. | string
| ✓ | |
+| service_project_id | Name for the new Service Project. | string
| ✓ | |
+| billing_account | Billing account id used as default for new projects. | string
| | null
|
+| project_create | Set to true to create projects, will use existing ones by default. | bool
| | false
|
| region | The region where resources will be deployed. | string
| | "europe-west1"
|
-| ssh_source_ranges | IP CIDR ranges that will be allowed to connect via SSH to the onprem instance. | list(string)
| | ["0.0.0.0/0"]
|
+| root_node | The resource name of the parent Folder or Organization. Must be of the form folders/folder_id or organizations/org_id. | string
| | null
|
| vpc_ip_cidr_range | Ip range used in the subnet deployef in the Service Project. | string
| | "10.0.0.0/20"
|
| vpc_name | Name of the VPC created in the Service Project. | string
| | "local"
|
| vpc_subnet_name | Name of the subnet created in the Service Project. | string
| | "subnet"
|
@@ -134,8 +134,10 @@ You can check data imported into Google BigQuery from the Google Cloud Console U
|---|---|:---:|
| bq_tables | Bigquery Tables. | |
| buckets | GCS Bucket Cloud KMS crypto keys. | |
+| data_ingestion_command | | |
| projects | Project ids. | |
| vm | GCE VM. | |
+
diff --git a/examples/data-solutions/gcs-to-bq-with-dataflow/main.tf b/examples/data-solutions/gcs-to-bq-with-dataflow/main.tf
index c9a650b8..43768f41 100644
--- a/examples/data-solutions/gcs-to-bq-with-dataflow/main.tf
+++ b/examples/data-solutions/gcs-to-bq-with-dataflow/main.tf
@@ -26,7 +26,7 @@ locals {
module "project-service" {
source = "../../../modules/project"
- name = var.project_service_name
+ name = var.service_project_id
parent = var.root_node
billing_account = var.billing_account
project_create = var.project_create
@@ -40,12 +40,13 @@ module "project-service" {
"servicenetworking.googleapis.com",
"storage.googleapis.com",
]
- oslogin = true
+ # TODO(jccb): doesn't work when project_create=false
+ # oslogin = true
}
module "project-kms" {
source = "../../../modules/project"
- name = var.project_kms_name
+ name = var.kms_project_id
parent = var.root_node
billing_account = var.billing_account
project_create = var.project_create
@@ -63,7 +64,7 @@ module "service-account-bq" {
project_id = module.project-service.project_id
name = "bq-test"
iam_project_roles = {
- (var.project_service_name) = [
+ (var.service_project_id) = [
"roles/logging.logWriter",
"roles/monitoring.metricWriter",
"roles/bigquery.admin"
@@ -76,7 +77,7 @@ module "service-account-gce" {
project_id = module.project-service.project_id
name = "gce-test"
iam_project_roles = {
- (var.project_service_name) = [
+ (var.service_project_id) = [
"roles/logging.logWriter",
"roles/monitoring.metricWriter",
"roles/dataflow.admin",
@@ -92,7 +93,7 @@ module "service-account-df" {
project_id = module.project-service.project_id
name = "df-test"
iam_project_roles = {
- (var.project_service_name) = [
+ (var.service_project_id) = [
"roles/dataflow.worker",
"roles/bigquery.dataOwner",
"roles/bigquery.metadataViewer",
@@ -102,14 +103,6 @@ module "service-account-df" {
}
}
-# data "google_bigquery_default_service_account" "bq_sa" {
-# project = module.project-service.project_id
-# }
-
-# data "google_storage_project_service_account" "gcs_account" {
-# project = module.project-service.project_id
-# }
-
###############################################################################
# KMS #
###############################################################################
@@ -119,39 +112,30 @@ module "kms" {
project_id = module.project-kms.project_id
keyring = {
name = "my-keyring",
- location = var.location
+ location = var.region
+ }
+ keys = {
+ key-df = null
+ key-gce = null
+ key-gcs = null
+ key-bq = null
}
- keys = { key-gce = null, key-gcs = null, key-bq = null }
key_iam = {
key-gce = {
"roles/cloudkms.cryptoKeyEncrypterDecrypter" = [
- "serviceAccount:${module.project-service.service_accounts.robots.compute}",
+ "serviceAccount:${module.project-service.service_accounts.robots.compute}"
]
},
key-gcs = {
"roles/cloudkms.cryptoKeyEncrypterDecrypter" = [
- "serviceAccount:${module.project-service.service_accounts.robots.storage}",
- #"serviceAccount:${data.google_storage_project_service_account.gcs_account.email_address}"
+ "serviceAccount:${module.project-service.service_accounts.robots.storage}"
]
},
key-bq = {
"roles/cloudkms.cryptoKeyEncrypterDecrypter" = [
- "serviceAccount:${module.project-service.service_accounts.robots.bq}",
- #"serviceAccount:${data.google_bigquery_default_service_account.bq_sa.email}",
+ "serviceAccount:${module.project-service.service_accounts.robots.bq}"
]
},
- }
-}
-
-module "kms-regional" {
- source = "../../../modules/kms"
- project_id = module.project-kms.project_id
- keyring = {
- name = "my-keyring-regional",
- location = var.region
- }
- keys = { key-df = null }
- key_iam = {
key-df = {
"roles/cloudkms.cryptoKeyEncrypterDecrypter" = [
"serviceAccount:${module.project-service.service_accounts.robots.dataflow}",
@@ -161,6 +145,18 @@ module "kms-regional" {
}
}
+# module "kms-regional" {
+# source = "../../../modules/kms"
+# project_id = module.project-kms.project_id
+# keyring = {
+# name = "my-keyring-regional",
+# location = var.region
+# }
+# keys = { key-df = null }
+# key_iam = {
+# }
+# }
+
###############################################################################
# Networking #
###############################################################################
@@ -198,7 +194,7 @@ module "nat" {
# GCE #
###############################################################################
-module "vm_example" {
+module "vm" {
source = "../../../modules/compute-vm"
project_id = module.project-service.project_id
zone = "${var.region}-b"
@@ -266,7 +262,9 @@ module "kms-gcs" {
project_id = module.project-service.project_id
prefix = module.project-service.project_id
name = each.key
+ storage_class = "REGIONAL"
iam = each.value.members
+ location = var.region
encryption_key = module.kms.keys.key-gcs.id
force_destroy = true
}
@@ -279,6 +277,7 @@ module "bigquery-dataset" {
source = "../../../modules/bigquery-dataset"
project_id = module.project-service.project_id
id = "bq_dataset"
+ location = var.region
access = {
reader-group = { role = "READER", type = "user" }
owner = { role = "OWNER", type = "user" }
diff --git a/examples/data-solutions/gcs-to-bq-with-dataflow/outputs.tf b/examples/data-solutions/gcs-to-bq-with-dataflow/outputs.tf
index 0678de67..1ae4a9fe 100644
--- a/examples/data-solutions/gcs-to-bq-with-dataflow/outputs.tf
+++ b/examples/data-solutions/gcs-to-bq-with-dataflow/outputs.tf
@@ -25,6 +25,26 @@ output "buckets" {
}
}
+output "data_ingestion_command" {
+ value = <<-EOF
+ python data_ingestion.py \
+ --runner=DataflowRunner \
+ --max_num_workers=10 \
+ --autoscaling_algorithm=THROUGHPUT_BASED \
+ --region=${var.region} \
+ --staging_location=${module.kms-gcs["df-tmplocation"].url} \
+ --temp_location=${module.kms-gcs["df-tmplocation"].url}/ \
+ --project=${var.service_project_id} \
+ --input=${module.kms-gcs["data"].url}/### FILE NAME ###.csv \
+ --output=${module.bigquery-dataset.dataset_id}.${module.bigquery-dataset.table_ids.df_import} \
+ --service_account_email=${module.service-account-df.email} \
+ --network=${var.vpc_name} \
+ --subnetwork=${var.vpc_subnet_name} \
+ --dataflow_kms_key=${module.kms.key_ids.key-df} \
+ --no_use_public_ips
+ EOF
+}
+
output "projects" {
description = "Project ids."
value = {
@@ -36,7 +56,7 @@ output "projects" {
output "vm" {
description = "GCE VM."
value = {
- name = module.vm_example.instance.name
- address = module.vm_example.internal_ip
+ name = module.vm.instance.name
+ address = module.vm.internal_ip
}
}
diff --git a/examples/data-solutions/gcs-to-bq-with-dataflow/variables.tf b/examples/data-solutions/gcs-to-bq-with-dataflow/variables.tf
index 9f9d5126..a81599a3 100644
--- a/examples/data-solutions/gcs-to-bq-with-dataflow/variables.tf
+++ b/examples/data-solutions/gcs-to-bq-with-dataflow/variables.tf
@@ -19,24 +19,18 @@ variable "billing_account" {
default = null
}
-variable "location" {
- description = "The location where resources will be deployed."
- type = string
- default = "europe"
-}
-
variable "project_create" {
description = "Set to true to create projects, will use existing ones by default."
type = bool
default = false
}
-variable "project_kms_name" {
+variable "kms_project_id" {
description = "Name for the new KMS Project."
type = string
}
-variable "project_service_name" {
+variable "service_project_id" {
description = "Name for the new Service Project."
type = string
}
@@ -53,11 +47,12 @@ variable "root_node" {
default = null
}
-variable "ssh_source_ranges" {
- description = "IP CIDR ranges that will be allowed to connect via SSH to the onprem instance."
- type = list(string)
- default = ["0.0.0.0/0"]
-}
+# FIXME(jccb): this is not used
+# variable "ssh_source_ranges" {
+# description = "IP CIDR ranges that will be allowed to connect via SSH to the onprem instance."
+# type = list(string)
+# default = ["0.0.0.0/0"]
+# }
variable "vpc_ip_cidr_range" {
description = "Ip range used in the subnet deployef in the Service Project."
diff --git a/modules/project/outputs.tf b/modules/project/outputs.tf
index 417febe3..8469efc9 100644
--- a/modules/project/outputs.tf
+++ b/modules/project/outputs.tf
@@ -70,7 +70,9 @@ output "service_accounts" {
depends_on = [
google_project_service.project_services,
google_kms_crypto_key_iam_member.crypto_key,
- google_project_service_identity.jit_si
+ google_project_service_identity.jit_si,
+ data.google_bigquery_default_service_account.bq_sa,
+ data.google_storage_project_service_account.gcs_sa
]
}