Merge pull request #429 from GoogleCloudPlatform/ludo-fix-example-project-creation

Make project creation optional in gcs to bq example
This commit is contained in:
Julio Castillo 2022-01-14 17:05:42 +01:00 committed by GitHub
commit d8bed9cba1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 384 additions and 354 deletions

View File

@ -111,22 +111,19 @@ schema_bq_import.json
You can check data imported into Google BigQuery from the Google Cloud Console UI.
<!-- BEGIN TFDOC -->
## Variables
| name | description | type | required | default |
|---|---|:---:|:---:|:---:|
| billing_account | Billing account id used as default for new projects. | <code>string</code> | ✓ | |
| project_kms_name | Name for the new KMS Project. | <code>string</code> | ✓ | |
| project_service_name | Name for the new Service Project. | <code>string</code> | ✓ | |
| root_node | The resource name of the parent Folder or Organization. Must be of the form folders/folder_id or organizations/org_id. | <code>string</code> | ✓ | |
| location | The location where resources will be deployed. | <code>string</code> | | <code>&#34;europe&#34;</code> |
| prefix | Unique prefix used for resource names. Not used for project if 'project_create' is null. | <code>string</code> | ✓ | |
| project_id | Project id, references existing project if `project_create` is null. | <code>string</code> | ✓ | |
| project_create | Provide values if project creation is needed, uses existing project if null. Parent is in 'folders/nnn' or 'organizations/nnn' format | <code title="object&#40;&#123;&#10; billing_account_id &#61; string&#10; parent &#61; string&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | | <code>null</code> |
| region | The region where resources will be deployed. | <code>string</code> | | <code>&#34;europe-west1&#34;</code> |
| ssh_source_ranges | IP CIDR ranges that will be allowed to connect via SSH to the onprem instance. | <code>list&#40;string&#41;</code> | | <code>&#91;&#34;0.0.0.0&#47;0&#34;&#93;</code> |
| vpc_ip_cidr_range | Ip range used in the subnet deployef in the Service Project. | <code>string</code> | | <code>&#34;10.0.0.0&#47;20&#34;</code> |
| vpc_name | Name of the VPC created in the Service Project. | <code>string</code> | | <code>&#34;local&#34;</code> |
| vpc_subnet_name | Name of the subnet created in the Service Project. | <code>string</code> | | <code>&#34;subnet&#34;</code> |
| vpc_subnet_range | Ip range used for the VPC subnet created for the example. | <code>string</code> | | <code>&#34;10.0.0.0&#47;20&#34;</code> |
## Outputs
@ -134,8 +131,11 @@ You can check data imported into Google BigQuery from the Google Cloud Console U
|---|---|:---:|
| bq_tables | Bigquery Tables. | |
| buckets | GCS Bucket Cloud KMS crypto keys. | |
| projects | Project ids. | |
| data_ingestion_command | | |
| project_id | Project id. | |
| vm | GCE VM. | |
<!-- END TFDOC -->

View File

@ -0,0 +1,65 @@
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
module "bigquery-dataset" {
source = "../../../modules/bigquery-dataset"
project_id = module.project.project_id
id = "example_dataset"
location = var.region
access = {
reader-group = { role = "READER", type = "user" }
owner = { role = "OWNER", type = "user" }
}
access_identities = {
reader-group = module.service-account-bq.email
owner = module.service-account-bq.email
}
encryption_key = module.kms.keys.key-bq.id
tables = {
bq_import = {
friendly_name = "BQ import"
labels = {}
options = null
partitioning = {
field = null
range = null # use start/end/interval for range
time = null
}
schema = file("${path.module}/schema_bq_import.json")
options = {
clustering = null
expiration_time = null
encryption_key = module.kms.keys.key-bq.id
}
deletion_protection = false
},
df_import = {
friendly_name = "Dataflow import"
labels = {}
options = null
partitioning = {
field = null
range = null # use start/end/interval for range
time = null
}
schema = file("${path.module}/schema_df_import.json")
options = {
clustering = null
expiration_time = null
encryption_key = module.kms.keys.key-bq.id
}
deletion_protection = false
}
}
}

View File

@ -0,0 +1,54 @@
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
locals {
vm-startup-script = join("\n", [
"#! /bin/bash",
"apt-get update && apt-get install -y bash-completion git python3-venv gcc build-essential python-dev python3-dev",
"pip3 install --upgrade setuptools pip"
])
}
module "vm" {
source = "../../../modules/compute-vm"
project_id = module.project.project_id
zone = "${var.region}-b"
name = "${var.prefix}-vm-0"
network_interfaces = [{
network = module.vpc.self_link,
subnetwork = local.subnet_self_link,
nat = false,
addresses = null
}]
attached_disks = [{
name = "data", size = 10, source = null, source_type = null, options = null
}]
boot_disk = {
image = "projects/debian-cloud/global/images/family/debian-10"
type = "pd-ssd"
size = 10
encrypt_disk = true
}
encryption = {
encrypt_boot = true
disk_encryption_key_raw = null
kms_key_self_link = module.kms.key_ids.key-gce
}
metadata = {
startup-script = local.vm-startup-script
}
service_account = module.service-account-gce.email
service_account_scopes = ["https://www.googleapis.com/auth/cloud-platform"]
tags = ["ssh"]
}

View File

@ -0,0 +1,49 @@
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
module "gcs-data" {
source = "../../../modules/gcs"
project_id = module.project.project_id
prefix = var.prefix
name = "data"
location = var.region
storage_class = "REGIONAL"
iam = {
"roles/storage.admin" = [
"serviceAccount:${module.service-account-gce.email}",
],
"roles/storage.objectViewer" = [
"serviceAccount:${module.service-account-df.email}",
]
}
encryption_key = module.kms.keys.key-gcs.id
force_destroy = true
}
module "gcs-df-tmp" {
source = "../../../modules/gcs"
project_id = module.project.project_id
prefix = var.prefix
name = "df-tmp"
location = var.region
storage_class = "REGIONAL"
iam = {
"roles/storage.admin" = [
"serviceAccount:${module.service-account-gce.email}",
"serviceAccount:${module.service-account-df.email}",
]
}
encryption_key = module.kms.keys.key-gcs.id
force_destroy = true
}

View File

@ -0,0 +1,60 @@
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
module "service-account-bq" {
source = "../../../modules/iam-service-account"
project_id = module.project.project_id
name = "bq-test"
prefix = var.prefix
iam_project_roles = {
(module.project.project_id) = [
"roles/bigquery.admin",
"roles/logging.logWriter",
"roles/monitoring.metricWriter",
]
}
}
module "service-account-df" {
source = "../../../modules/iam-service-account"
project_id = module.project.project_id
name = "df-test"
prefix = var.prefix
iam_project_roles = {
(module.project.project_id) = [
"roles/bigquery.dataOwner",
"roles/bigquery.jobUser",
"roles/bigquery.metadataViewer",
"roles/dataflow.worker",
"roles/storage.objectViewer",
]
}
}
module "service-account-gce" {
source = "../../../modules/iam-service-account"
project_id = module.project.project_id
name = "gce-test"
prefix = var.prefix
iam_project_roles = {
(module.project.project_id) = [
"roles/bigquery.dataOwner",
"roles/bigquery.jobUser",
"roles/dataflow.admin",
"roles/iam.serviceAccountUser",
"roles/logging.logWriter",
"roles/monitoring.metricWriter",
]
}
}

View File

@ -0,0 +1,63 @@
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
module "kms" {
source = "../../../modules/kms"
project_id = module.project.project_id
keyring = {
name = "${var.prefix}-keyring",
location = var.region
}
keys = {
key-df = null
key-gce = null
key-gcs = null
key-bq = null
}
key_iam = {
key-gce = {
"roles/cloudkms.cryptoKeyEncrypterDecrypter" = [
"serviceAccount:${module.project.service_accounts.robots.compute}"
]
},
key-gcs = {
"roles/cloudkms.cryptoKeyEncrypterDecrypter" = [
"serviceAccount:${module.project.service_accounts.robots.storage}"
]
},
key-bq = {
"roles/cloudkms.cryptoKeyEncrypterDecrypter" = [
"serviceAccount:${module.project.service_accounts.robots.bq}"
]
},
key-df = {
"roles/cloudkms.cryptoKeyEncrypterDecrypter" = [
"serviceAccount:${module.project.service_accounts.robots.dataflow}",
"serviceAccount:${module.project.service_accounts.robots.compute}",
]
}
}
}
# module "kms-regional" {
# source = "../../../modules/kms"
# project_id = module.project-kms.project_id
# keyring = {
# name = "my-keyring-regional",
# location = var.region
# }
# keys = { key-df = null }
# key_iam = {
# }
# }

View File

@ -13,22 +13,17 @@
# limitations under the License.
locals {
vm-startup-script = join("\n", [
"#! /bin/bash",
"apt-get update && apt-get install -y bash-completion git python3-venv gcc build-essential python-dev python3-dev",
"pip3 install --upgrade setuptools pip"
])
subnet_name = module.vpc.subnets["${var.region}/${var.prefix}-subnet-0"].name
subnet_self_link = module.vpc.subnets["${var.region}/${var.prefix}-subnet-0"].self_link
}
###############################################################################
# Projects - Centralized #
###############################################################################
module "project-service" {
module "project" {
source = "../../../modules/project"
name = var.project_service_name
parent = var.root_node
billing_account = var.billing_account
name = var.project_id
parent = try(var.project_create.parent, null)
billing_account = try(var.project_create.billing_account_id, null)
project_create = var.project_create != null
prefix = var.project_create == null ? null : var.prefix
services = [
"bigquery.googleapis.com",
"bigqueryreservation.googleapis.com",
@ -39,138 +34,19 @@ module "project-service" {
"servicenetworking.googleapis.com",
"storage.googleapis.com",
]
oslogin = true
}
module "project-kms" {
source = "../../../modules/project"
name = var.project_kms_name
parent = var.root_node
billing_account = var.billing_account
services = [
"cloudkms.googleapis.com",
]
}
###############################################################################
# Project Service Accounts #
###############################################################################
module "service-account-bq" {
source = "../../../modules/iam-service-account"
project_id = module.project-service.project_id
name = "bq-test"
iam_project_roles = {
(var.project_service_name) = [
"roles/logging.logWriter",
"roles/monitoring.metricWriter",
"roles/bigquery.admin"
]
service_config = {
disable_on_destroy = false, disable_dependent_services = false
}
}
module "service-account-gce" {
source = "../../../modules/iam-service-account"
project_id = module.project-service.project_id
name = "gce-test"
iam_project_roles = {
(var.project_service_name) = [
"roles/logging.logWriter",
"roles/monitoring.metricWriter",
"roles/dataflow.admin",
"roles/iam.serviceAccountUser",
"roles/bigquery.dataOwner",
"roles/bigquery.jobUser" # Needed to import data using 'bq' command
]
}
}
module "service-account-df" {
source = "../../../modules/iam-service-account"
project_id = module.project-service.project_id
name = "df-test"
iam_project_roles = {
(var.project_service_name) = [
"roles/dataflow.worker",
"roles/bigquery.dataOwner",
"roles/bigquery.metadataViewer",
"roles/storage.objectViewer",
"roles/bigquery.jobUser"
]
}
}
# data "google_bigquery_default_service_account" "bq_sa" {
# project = module.project-service.project_id
# }
# data "google_storage_project_service_account" "gcs_account" {
# project = module.project-service.project_id
# }
###############################################################################
# KMS #
###############################################################################
module "kms" {
source = "../../../modules/kms"
project_id = module.project-kms.project_id
keyring = {
name = "my-keyring",
location = var.location
}
keys = { key-gce = null, key-gcs = null, key-bq = null }
key_iam = {
key-gce = {
"roles/cloudkms.cryptoKeyEncrypterDecrypter" = [
"serviceAccount:${module.project-service.service_accounts.robots.compute}",
]
},
key-gcs = {
"roles/cloudkms.cryptoKeyEncrypterDecrypter" = [
"serviceAccount:${module.project-service.service_accounts.robots.storage}",
#"serviceAccount:${data.google_storage_project_service_account.gcs_account.email_address}"
]
},
key-bq = {
"roles/cloudkms.cryptoKeyEncrypterDecrypter" = [
"serviceAccount:${module.project-service.service_accounts.robots.bq}",
#"serviceAccount:${data.google_bigquery_default_service_account.bq_sa.email}",
]
},
}
}
module "kms-regional" {
source = "../../../modules/kms"
project_id = module.project-kms.project_id
keyring = {
name = "my-keyring-regional",
location = var.region
}
keys = { key-df = null }
key_iam = {
key-df = {
"roles/cloudkms.cryptoKeyEncrypterDecrypter" = [
"serviceAccount:${module.project-service.service_accounts.robots.dataflow}",
"serviceAccount:${module.project-service.service_accounts.robots.compute}",
]
}
}
}
###############################################################################
# Networking #
###############################################################################
module "vpc" {
source = "../../../modules/net-vpc"
project_id = module.project-service.project_id
name = var.vpc_name
project_id = module.project.project_id
name = "${var.prefix}-vpc"
subnets = [
{
ip_cidr_range = var.vpc_ip_cidr_range
name = var.vpc_subnet_name
ip_cidr_range = var.vpc_subnet_range
name = "${var.prefix}-subnet-0"
region = var.region
secondary_ip_range = {}
}
@ -179,147 +55,15 @@ module "vpc" {
module "vpc-firewall" {
source = "../../../modules/net-vpc-firewall"
project_id = module.project-service.project_id
project_id = module.project.project_id
network = module.vpc.name
admin_ranges = [var.vpc_ip_cidr_range]
admin_ranges = [var.vpc_subnet_range]
}
module "nat" {
source = "../../../modules/net-cloudnat"
project_id = module.project-service.project_id
project_id = module.project.project_id
region = var.region
name = "default"
name = "${var.prefix}-default"
router_network = module.vpc.name
}
###############################################################################
# GCE #
###############################################################################
module "vm_example" {
source = "../../../modules/compute-vm"
project_id = module.project-service.project_id
zone = "${var.region}-b"
name = "vm-example"
network_interfaces = [{
network = module.vpc.self_link,
subnetwork = module.vpc.subnet_self_links["${var.region}/${var.vpc_subnet_name}"],
nat = false,
addresses = null
}]
attached_disks = [
{
name = "data"
size = 10
source = null
source_type = null
options = null
}
]
boot_disk = {
image = "projects/debian-cloud/global/images/family/debian-10"
type = "pd-ssd"
size = 10
encrypt_disk = true
}
encryption = {
encrypt_boot = true
disk_encryption_key_raw = null
kms_key_self_link = module.kms.key_ids.key-gce
}
metadata = {
startup-script = local.vm-startup-script
}
service_account = module.service-account-gce.email
service_account_scopes = ["https://www.googleapis.com/auth/cloud-platform"]
tags = ["ssh"]
}
###############################################################################
# GCS #
###############################################################################
module "kms-gcs" {
source = "../../../modules/gcs"
for_each = {
data = {
members = {
"roles/storage.admin" = [
"serviceAccount:${module.service-account-gce.email}",
],
"roles/storage.objectViewer" = [
"serviceAccount:${module.service-account-df.email}",
]
}
}
df-tmplocation = {
members = {
"roles/storage.admin" = [
"serviceAccount:${module.service-account-gce.email}",
"serviceAccount:${module.service-account-df.email}",
]
}
}
}
project_id = module.project-service.project_id
prefix = module.project-service.project_id
name = each.key
iam = each.value.members
encryption_key = module.kms.keys.key-gcs.id
force_destroy = true
}
###############################################################################
# BQ #
###############################################################################
module "bigquery-dataset" {
source = "../../../modules/bigquery-dataset"
project_id = module.project-service.project_id
id = "bq_dataset"
access = {
reader-group = { role = "READER", type = "user" }
owner = { role = "OWNER", type = "user" }
}
access_identities = {
reader-group = module.service-account-bq.email
owner = module.service-account-bq.email
}
encryption_key = module.kms.keys.key-bq.id
tables = {
bq_import = {
friendly_name = "BQ import"
labels = {}
options = null
partitioning = {
field = null
range = null # use start/end/interval for range
time = null
}
schema = file("${path.module}/schema_bq_import.json")
options = {
clustering = null
expiration_time = null
encryption_key = module.kms.keys.key-bq.id
}
deletion_protection = true
},
df_import = {
friendly_name = "Dataflow import"
labels = {}
options = null
partitioning = {
field = null
range = null # use start/end/interval for range
time = null
}
schema = file("${path.module}/schema_df_import.json")
options = {
clustering = null
expiration_time = null
encryption_key = module.kms.keys.key-bq.id
}
deletion_protection = true
}
}
}

View File

@ -20,23 +20,40 @@ output "bq_tables" {
output "buckets" {
description = "GCS Bucket Cloud KMS crypto keys."
value = {
for name, bucket in module.kms-gcs :
bucket.name => bucket.url
data = module.gcs-data.name
df-tmp = module.gcs-df-tmp.name
}
}
output "projects" {
description = "Project ids."
value = {
service-project = module.project-service.project_id
kms-project = module.project-kms.project_id
}
output "data_ingestion_command" {
value = <<-EOF
python data_ingestion.py \
--runner=DataflowRunner \
--max_num_workers=10 \
--autoscaling_algorithm=THROUGHPUT_BASED \
--region=${var.region} \
--staging_location=${module.gcs-df-tmp.url} \
--temp_location=${module.gcs-df-tmp.url}/ \
--project=${var.project_id} \
--input=${module.gcs-data.url}/### FILE NAME ###.csv \
--output=${module.bigquery-dataset.dataset_id}.${module.bigquery-dataset.table_ids.df_import} \
--service_account_email=${module.service-account-df.email} \
--network=${module.vpc.name} \
--subnetwork=${local.subnet_name} \
--dataflow_kms_key=${module.kms.key_ids.key-df} \
--no_use_public_ips
EOF
}
output "project_id" {
description = "Project id."
value = module.project.project_id
}
output "vm" {
description = "GCE VM."
value = {
name = module.vm_example.instance.name
address = module.vm_example.internal_ip
name = module.vm.instance.name
address = module.vm.internal_ip
}
}

View File

@ -13,24 +13,23 @@
# limitations under the License.
variable "billing_account" {
description = "Billing account id used as default for new projects."
variable "prefix" {
description = "Unique prefix used for resource names. Not used for project if 'project_create' is null."
type = string
default = null
}
variable "location" {
description = "The location where resources will be deployed."
type = string
default = "europe"
variable "project_create" {
description = "Provide values if project creation is needed, uses existing project if null. Parent is in 'folders/nnn' or 'organizations/nnn' format"
type = object({
billing_account_id = string
parent = string
})
default = null
}
variable "project_kms_name" {
description = "Name for the new KMS Project."
type = string
}
variable "project_service_name" {
description = "Name for the new Service Project."
variable "project_id" {
description = "Project id, references existing project if `project_create` is null."
type = string
}
@ -40,31 +39,8 @@ variable "region" {
default = "europe-west1"
}
variable "root_node" {
description = "The resource name of the parent Folder or Organization. Must be of the form folders/folder_id or organizations/org_id."
type = string
}
variable "ssh_source_ranges" {
description = "IP CIDR ranges that will be allowed to connect via SSH to the onprem instance."
type = list(string)
default = ["0.0.0.0/0"]
}
variable "vpc_ip_cidr_range" {
description = "Ip range used in the subnet deployef in the Service Project."
variable "vpc_subnet_range" {
description = "Ip range used for the VPC subnet created for the example."
type = string
default = "10.0.0.0/20"
}
variable "vpc_name" {
description = "Name of the VPC created in the Service Project."
type = string
default = "local"
}
variable "vpc_subnet_name" {
description = "Name of the subnet created in the Service Project."
type = string
default = "subnet"
}

View File

@ -191,7 +191,6 @@ resource "google_bigquery_table" "default" {
type = each.value.partitioning.time.type
}
}
}
resource "google_bigquery_table" "views" {

View File

@ -70,7 +70,9 @@ output "service_accounts" {
depends_on = [
google_project_service.project_services,
google_kms_crypto_key_iam_member.crypto_key,
google_project_service_identity.jit_si
google_project_service_identity.jit_si,
data.google_bigquery_default_service_account.bq_sa,
data.google_storage_project_service_account.gcs_sa
]
}

View File

@ -15,9 +15,11 @@
*/
module "test" {
source = "../../../../../examples/data-solutions/gcs-to-bq-with-dataflow/"
billing_account = var.billing_account
project_kms_name = var.project_kms_name
project_service_name = var.project_service_name
root_node = var.root_node
source = "../../../../../examples/data-solutions/gcs-to-bq-with-dataflow/"
prefix = var.prefix
project_id = var.project_id
project_create = {
billing_account_id = var.billing_account_id
parent = var.parent
}
}

View File

@ -14,23 +14,22 @@
* limitations under the License.
*/
variable "billing_account" {
type = string
default = "123456-123456-123456"
variable "billing_account_id" {
default = "012345-678901-234567"
}
variable "root_node" {
description = "The resource name of the parent Folder or Organization. Must be of the form folders/folder_id or organizations/org_id."
type = string
default = "folders/12345678"
variable "parent" {
default = "folders/01234567890"
}
variable "project_service_name" {
type = string
default = "project-srv"
variable "prefix" {
default = "fabric"
}
variable "project_kms_name" {
type = string
default = "project-kms"
variable "project_id" {
default = "gcs-to-bq"
}
variable "region" {
default = "europe-west1"
}

View File

@ -15,5 +15,5 @@
def test_resources(e2e_plan_runner):
"Test that plan works and the numbers of resources is as expected."
modules, resources = e2e_plan_runner()
assert len(modules) == 14
assert len(resources) == 61
assert len(modules) == 12
assert len(resources) == 57