diff --git a/examples/data-solutions/gcs-to-bq-with-dataflow/README.md b/examples/data-solutions/gcs-to-bq-with-dataflow/README.md index 58b1456d..701e6b7b 100644 --- a/examples/data-solutions/gcs-to-bq-with-dataflow/README.md +++ b/examples/data-solutions/gcs-to-bq-with-dataflow/README.md @@ -111,19 +111,19 @@ schema_bq_import.json You can check data imported into Google BigQuery from the Google Cloud Console UI. + ## Variables | name | description | type | required | default | |---|---|:---:|:---:|:---:| -| billing_account | Billing account id used as default for new projects. | string | ✓ | | -| project_kms_name | Name for the new KMS Project. | string | ✓ | | -| project_service_name | Name for the new Service Project. | string | ✓ | | -| root_node | The resource name of the parent Folder or Organization. Must be of the form folders/folder_id or organizations/org_id. | string | ✓ | | -| location | The location where resources will be deployed. | string | | "europe" | +| kms_project_id | Name for the new KMS Project. | string | ✓ | | +| service_project_id | Name for the new Service Project. | string | ✓ | | +| billing_account | Billing account id used as default for new projects. | string | | null | +| project_create | Set to true to create projects, will use existing ones by default. | bool | | false | | region | The region where resources will be deployed. | string | | "europe-west1" | -| ssh_source_ranges | IP CIDR ranges that will be allowed to connect via SSH to the onprem instance. | list(string) | | ["0.0.0.0/0"] | +| root_node | The resource name of the parent Folder or Organization. Must be of the form folders/folder_id or organizations/org_id. | string | | null | | vpc_ip_cidr_range | Ip range used in the subnet deployef in the Service Project. | string | | "10.0.0.0/20" | | vpc_name | Name of the VPC created in the Service Project. | string | | "local" | | vpc_subnet_name | Name of the subnet created in the Service Project. | string | | "subnet" | @@ -134,8 +134,10 @@ You can check data imported into Google BigQuery from the Google Cloud Console U |---|---|:---:| | bq_tables | Bigquery Tables. | | | buckets | GCS Bucket Cloud KMS crypto keys. | | +| data_ingestion_command | | | | projects | Project ids. | | | vm | GCE VM. | | + diff --git a/examples/data-solutions/gcs-to-bq-with-dataflow/main.tf b/examples/data-solutions/gcs-to-bq-with-dataflow/main.tf index c9a650b8..43768f41 100644 --- a/examples/data-solutions/gcs-to-bq-with-dataflow/main.tf +++ b/examples/data-solutions/gcs-to-bq-with-dataflow/main.tf @@ -26,7 +26,7 @@ locals { module "project-service" { source = "../../../modules/project" - name = var.project_service_name + name = var.service_project_id parent = var.root_node billing_account = var.billing_account project_create = var.project_create @@ -40,12 +40,13 @@ module "project-service" { "servicenetworking.googleapis.com", "storage.googleapis.com", ] - oslogin = true + # TODO(jccb): doesn't work when project_create=false + # oslogin = true } module "project-kms" { source = "../../../modules/project" - name = var.project_kms_name + name = var.kms_project_id parent = var.root_node billing_account = var.billing_account project_create = var.project_create @@ -63,7 +64,7 @@ module "service-account-bq" { project_id = module.project-service.project_id name = "bq-test" iam_project_roles = { - (var.project_service_name) = [ + (var.service_project_id) = [ "roles/logging.logWriter", "roles/monitoring.metricWriter", "roles/bigquery.admin" @@ -76,7 +77,7 @@ module "service-account-gce" { project_id = module.project-service.project_id name = "gce-test" iam_project_roles = { - (var.project_service_name) = [ + (var.service_project_id) = [ "roles/logging.logWriter", "roles/monitoring.metricWriter", "roles/dataflow.admin", @@ -92,7 +93,7 @@ module "service-account-df" { project_id = module.project-service.project_id name = "df-test" iam_project_roles = { - (var.project_service_name) = [ + (var.service_project_id) = [ "roles/dataflow.worker", "roles/bigquery.dataOwner", "roles/bigquery.metadataViewer", @@ -102,14 +103,6 @@ module "service-account-df" { } } -# data "google_bigquery_default_service_account" "bq_sa" { -# project = module.project-service.project_id -# } - -# data "google_storage_project_service_account" "gcs_account" { -# project = module.project-service.project_id -# } - ############################################################################### # KMS # ############################################################################### @@ -119,39 +112,30 @@ module "kms" { project_id = module.project-kms.project_id keyring = { name = "my-keyring", - location = var.location + location = var.region + } + keys = { + key-df = null + key-gce = null + key-gcs = null + key-bq = null } - keys = { key-gce = null, key-gcs = null, key-bq = null } key_iam = { key-gce = { "roles/cloudkms.cryptoKeyEncrypterDecrypter" = [ - "serviceAccount:${module.project-service.service_accounts.robots.compute}", + "serviceAccount:${module.project-service.service_accounts.robots.compute}" ] }, key-gcs = { "roles/cloudkms.cryptoKeyEncrypterDecrypter" = [ - "serviceAccount:${module.project-service.service_accounts.robots.storage}", - #"serviceAccount:${data.google_storage_project_service_account.gcs_account.email_address}" + "serviceAccount:${module.project-service.service_accounts.robots.storage}" ] }, key-bq = { "roles/cloudkms.cryptoKeyEncrypterDecrypter" = [ - "serviceAccount:${module.project-service.service_accounts.robots.bq}", - #"serviceAccount:${data.google_bigquery_default_service_account.bq_sa.email}", + "serviceAccount:${module.project-service.service_accounts.robots.bq}" ] }, - } -} - -module "kms-regional" { - source = "../../../modules/kms" - project_id = module.project-kms.project_id - keyring = { - name = "my-keyring-regional", - location = var.region - } - keys = { key-df = null } - key_iam = { key-df = { "roles/cloudkms.cryptoKeyEncrypterDecrypter" = [ "serviceAccount:${module.project-service.service_accounts.robots.dataflow}", @@ -161,6 +145,18 @@ module "kms-regional" { } } +# module "kms-regional" { +# source = "../../../modules/kms" +# project_id = module.project-kms.project_id +# keyring = { +# name = "my-keyring-regional", +# location = var.region +# } +# keys = { key-df = null } +# key_iam = { +# } +# } + ############################################################################### # Networking # ############################################################################### @@ -198,7 +194,7 @@ module "nat" { # GCE # ############################################################################### -module "vm_example" { +module "vm" { source = "../../../modules/compute-vm" project_id = module.project-service.project_id zone = "${var.region}-b" @@ -266,7 +262,9 @@ module "kms-gcs" { project_id = module.project-service.project_id prefix = module.project-service.project_id name = each.key + storage_class = "REGIONAL" iam = each.value.members + location = var.region encryption_key = module.kms.keys.key-gcs.id force_destroy = true } @@ -279,6 +277,7 @@ module "bigquery-dataset" { source = "../../../modules/bigquery-dataset" project_id = module.project-service.project_id id = "bq_dataset" + location = var.region access = { reader-group = { role = "READER", type = "user" } owner = { role = "OWNER", type = "user" } diff --git a/examples/data-solutions/gcs-to-bq-with-dataflow/outputs.tf b/examples/data-solutions/gcs-to-bq-with-dataflow/outputs.tf index 0678de67..1ae4a9fe 100644 --- a/examples/data-solutions/gcs-to-bq-with-dataflow/outputs.tf +++ b/examples/data-solutions/gcs-to-bq-with-dataflow/outputs.tf @@ -25,6 +25,26 @@ output "buckets" { } } +output "data_ingestion_command" { + value = <<-EOF + python data_ingestion.py \ + --runner=DataflowRunner \ + --max_num_workers=10 \ + --autoscaling_algorithm=THROUGHPUT_BASED \ + --region=${var.region} \ + --staging_location=${module.kms-gcs["df-tmplocation"].url} \ + --temp_location=${module.kms-gcs["df-tmplocation"].url}/ \ + --project=${var.service_project_id} \ + --input=${module.kms-gcs["data"].url}/### FILE NAME ###.csv \ + --output=${module.bigquery-dataset.dataset_id}.${module.bigquery-dataset.table_ids.df_import} \ + --service_account_email=${module.service-account-df.email} \ + --network=${var.vpc_name} \ + --subnetwork=${var.vpc_subnet_name} \ + --dataflow_kms_key=${module.kms.key_ids.key-df} \ + --no_use_public_ips + EOF +} + output "projects" { description = "Project ids." value = { @@ -36,7 +56,7 @@ output "projects" { output "vm" { description = "GCE VM." value = { - name = module.vm_example.instance.name - address = module.vm_example.internal_ip + name = module.vm.instance.name + address = module.vm.internal_ip } } diff --git a/examples/data-solutions/gcs-to-bq-with-dataflow/variables.tf b/examples/data-solutions/gcs-to-bq-with-dataflow/variables.tf index 9f9d5126..a81599a3 100644 --- a/examples/data-solutions/gcs-to-bq-with-dataflow/variables.tf +++ b/examples/data-solutions/gcs-to-bq-with-dataflow/variables.tf @@ -19,24 +19,18 @@ variable "billing_account" { default = null } -variable "location" { - description = "The location where resources will be deployed." - type = string - default = "europe" -} - variable "project_create" { description = "Set to true to create projects, will use existing ones by default." type = bool default = false } -variable "project_kms_name" { +variable "kms_project_id" { description = "Name for the new KMS Project." type = string } -variable "project_service_name" { +variable "service_project_id" { description = "Name for the new Service Project." type = string } @@ -53,11 +47,12 @@ variable "root_node" { default = null } -variable "ssh_source_ranges" { - description = "IP CIDR ranges that will be allowed to connect via SSH to the onprem instance." - type = list(string) - default = ["0.0.0.0/0"] -} +# FIXME(jccb): this is not used +# variable "ssh_source_ranges" { +# description = "IP CIDR ranges that will be allowed to connect via SSH to the onprem instance." +# type = list(string) +# default = ["0.0.0.0/0"] +# } variable "vpc_ip_cidr_range" { description = "Ip range used in the subnet deployef in the Service Project." diff --git a/modules/project/outputs.tf b/modules/project/outputs.tf index 417febe3..8469efc9 100644 --- a/modules/project/outputs.tf +++ b/modules/project/outputs.tf @@ -70,7 +70,9 @@ output "service_accounts" { depends_on = [ google_project_service.project_services, google_kms_crypto_key_iam_member.crypto_key, - google_project_service_identity.jit_si + google_project_service_identity.jit_si, + data.google_bigquery_default_service_account.bq_sa, + data.google_storage_project_service_account.gcs_sa ] }