This commit is contained in:
Lorenzo Caggioni 2022-01-14 16:59:10 +01:00
parent 823282af7c
commit 1ed4491f23
6 changed files with 81 additions and 26 deletions

View File

@ -7,6 +7,7 @@ The solution will use:
- Cloud NAT to let resources egress to the Internet, to run system updates and install packages
- rely on [Service Account Impersonation](https://cloud.google.com/iam/docs/impersonating-service-accounts) to avoid the use of service account keys
- Service Accounts with least privilege on each resource
- (Optional) CMEK encription for GCS bucket, DataFlow instances and BigQuery tables
The example is designed to match real-world use cases with a minimum amount of resources and some compromise listed below. It can be used as a starting point for more complex scenarios.
@ -19,7 +20,6 @@ In the example we implemented some compromise to keep the example minimal and ea
- Use only Identity Groups to assigne roles
- Use Authorative IAM role assignement
- Split resources in different project: Data Landing, Data Transformation, Data Lake, ...
- CMEK adoption to encrypt resources
- Use VPC-SC to mitigate data exfiltration
## Managed resources and services

View File

@ -17,25 +17,25 @@
###############################################################################
module "gcs-data" {
source = "../../../modules/gcs"
project_id = module.project.project_id
prefix = var.prefix
name = "data"
location = var.region
storage_class = "REGIONAL"
# encryption_key = module.kms.keys.key-gcs.id
force_destroy = true
source = "../../../modules/gcs"
project_id = module.project.project_id
prefix = var.prefix
name = "data"
location = var.region
storage_class = "REGIONAL"
encryption_key = var.cmek_encryption ? try(module.kms[0].keys.key-gcs.id, null) : null
force_destroy = true
}
module "gcs-df-tmp" {
source = "../../../modules/gcs"
project_id = module.project.project_id
prefix = var.prefix
name = "df-tmp"
location = var.region
storage_class = "REGIONAL"
# encryption_key = module.kms.keys.key-gcs.id
force_destroy = true
source = "../../../modules/gcs"
project_id = module.project.project_id
prefix = var.prefix
name = "df-tmp"
location = var.region
storage_class = "REGIONAL"
encryption_key = var.cmek_encryption ? try(module.kms[0].keys.key-gcs.id, null) : null
force_destroy = true
}
###############################################################################
@ -46,6 +46,7 @@ module "bigquery-dataset" {
source = "../../../modules/bigquery-dataset"
project_id = module.project.project_id
id = "datalake"
location = var.region
# Define Tables in Terraform for the porpuse of the example.
# Probably in a production environment you would handle Tables creation in a
# separate Terraform State or using a different tool/pipeline (for example: Dataform).
@ -63,4 +64,5 @@ module "bigquery-dataset" {
deletion_protection = false
}
}
encryption_key = var.cmek_encryption ? try(module.kms[0].keys.key-bq.id, null) : null
}

View File

@ -0,0 +1,46 @@
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
module "kms" {
count = var.cmek_encryption ? 1 : 0
source = "../../../modules/kms"
project_id = module.project.project_id
keyring = {
name = "${var.prefix}-keyring",
location = var.region
}
keys = {
key-df = null
key-gcs = null
key-bq = null
}
key_iam = {
key-gcs = {
"roles/cloudkms.cryptoKeyEncrypterDecrypter" = [
"serviceAccount:${module.project.service_accounts.robots.storage}"
]
},
key-bq = {
"roles/cloudkms.cryptoKeyEncrypterDecrypter" = [
"serviceAccount:${module.project.service_accounts.robots.bq}"
]
},
key-df = {
"roles/cloudkms.cryptoKeyEncrypterDecrypter" = [
"serviceAccount:${module.project.service_accounts.robots.dataflow}",
"serviceAccount:${module.project.service_accounts.robots.compute}",
]
}
}
}

View File

@ -24,16 +24,18 @@ module "project" {
project_create = var.project_create != null
prefix = var.project_create == null ? null : var.prefix
services = [
"compute.googleapis.com",
"servicenetworking.googleapis.com",
"storage-component.googleapis.com",
"bigquery.googleapis.com",
"bigquerystorage.googleapis.com",
"bigqueryreservation.googleapis.com",
"cloudkms.googleapis.com",
"compute.googleapis.com",
"servicenetworking.googleapis.com",
"storage.googleapis.com",
"storage-component.googleapis.com",
"dataflow.googleapis.com",
]
#Using Additive IAM to let users use existing project
iam_additive = {
iam = {
# GCS roles
"roles/storage.objectAdmin" = [
module.service-account-df.iam_email,
@ -43,9 +45,10 @@ module "project" {
module.service-account-orch.iam_email,
],
#Bigquery roles
"roles/bigquery.admin" = [
"roles/bigquery.admin" = concat([
module.service-account-orch.iam_email,
]
], var.data_eng_principals
)
"roles/bigquery.dataEditor" = [
module.service-account-df.iam_email,
module.service-account-bq.iam_email

View File

@ -56,6 +56,7 @@ output "command-02-dataflow" {
--subnetwork ${module.vpc.subnets[format("%s/%s", var.region, "subnet")].self_link} \
--staging-location ${module.gcs-df-tmp.url} \
--service-account-email ${module.service-account-df.email} \
${var.cmek_encryption ? format("--dataflow-kms-key=%s", module.kms[0].key_ids.key-df) : ""} \
--parameters \
javascriptTextTransformFunctionName=transform,\
JSONPath=${module.gcs-data.url}/person_schema.json,\
@ -69,7 +70,6 @@ bigQueryLoadingTemporaryDirectory=${module.gcs-df-tmp.url}
output "command-03-bq" {
description = "bq command to query imported data."
value = <<EOT
gcloud auth application-default login --impersonate-service-account=${module.service-account-bq.email}
bq query --project_id= ${module.project.project_id} --use_legacy_sql=false 'SELECT * FROM `${module.project.project_id}.${module.bigquery-dataset.dataset_id}.${module.bigquery-dataset.tables["person"].table_id}` LIMIT 1000'"
bq query --project_id=${module.project.project_id} --use_legacy_sql=false 'SELECT * FROM `${module.project.project_id}.${module.bigquery-dataset.dataset_id}.${module.bigquery-dataset.tables["person"].table_id}` LIMIT 1000'"
EOT
}

View File

@ -12,7 +12,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.
variable "cmek_encryption" {
description = "Flag to enable CMEK on GCP resources created."
type = bool
default = false
}
variable "data_eng_principals" {
description = "Groups with Service Account Tocken creator role on service accounts in iam format 'group:group@domain.com' or 'user:user@domain.com'."