Minimal Data Platform - Make components optional (#1380)
make some components optional: Composer and Data Proc history server.
This commit is contained in:
parent
6a89d71e96
commit
40656a23de
|
@ -18,7 +18,7 @@ locals {
|
|||
iam_lnd = {
|
||||
"roles/storage.objectCreator" = [module.land-sa-cs-0.iam_email]
|
||||
"roles/storage.objectViewer" = [module.processing-sa-cmp-0.iam_email]
|
||||
"roles/storage.objectAdmin" = [module.processing-sa-dp-0.iam_email]
|
||||
"roles/storage.objectAdmin" = [module.processing-sa-0.iam_email]
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -25,10 +25,10 @@ locals {
|
|||
GCP_REGION = var.region
|
||||
LAND_PRJ = module.land-project.project_id
|
||||
LAND_GCS = module.land-cs-0.name
|
||||
PHS_CLUSTER_NAME = module.processing-dp-historyserver.name
|
||||
PHS_CLUSTER_NAME = try(module.processing-dp-historyserver[0].name, null)
|
||||
PROCESSING_GCS = module.processing-cs-0.name
|
||||
PROCESSING_PRJ = module.processing-project.project_id
|
||||
PROCESSING_SA_DP = module.processing-sa-dp-0.email
|
||||
PROCESSING_SA = module.processing-sa-0.email
|
||||
PROCESSING_SUBNET = local.processing_subnet
|
||||
PROCESSING_VPC = local.processing_vpc
|
||||
}
|
||||
|
@ -47,7 +47,7 @@ module "processing-sa-cmp-0" {
|
|||
}
|
||||
|
||||
resource "google_composer_environment" "processing-cmp-0" {
|
||||
count = var.composer_config.disable_deployment == true ? 0 : 1
|
||||
count = var.enable_services.composer == true ? 1 : 0
|
||||
project = module.processing-project.project_id
|
||||
name = "${var.prefix}-prc-cmp-0"
|
||||
region = var.region
|
||||
|
|
|
@ -14,7 +14,8 @@
|
|||
|
||||
# tfdoc:file:description Cloud Dataproc resources.
|
||||
|
||||
module "processing-cs-dp-history" {
|
||||
module "processing-dp-history" {
|
||||
count = var.enable_services.dataproc_history_server == true ? 1 : 0
|
||||
source = "../../../modules/gcs"
|
||||
project_id = module.processing-project.project_id
|
||||
prefix = var.prefix
|
||||
|
@ -24,12 +25,12 @@ module "processing-cs-dp-history" {
|
|||
encryption_key = var.service_encryption_keys.storage
|
||||
}
|
||||
|
||||
module "processing-sa-dp-0" {
|
||||
module "processing-sa-0" {
|
||||
source = "../../../modules/iam-service-account"
|
||||
project_id = module.processing-project.project_id
|
||||
prefix = var.prefix
|
||||
name = "prc-dp-0"
|
||||
display_name = "Dataproc service account"
|
||||
name = "prc-0"
|
||||
display_name = "Processing service account"
|
||||
iam = {
|
||||
"roles/iam.serviceAccountTokenCreator" = [
|
||||
local.groups_iam.data-engineers,
|
||||
|
@ -41,7 +42,7 @@ module "processing-sa-dp-0" {
|
|||
}
|
||||
}
|
||||
|
||||
module "processing-dp-staging-0" {
|
||||
module "processing-staging-0" {
|
||||
source = "../../../modules/gcs"
|
||||
project_id = module.processing-project.project_id
|
||||
prefix = var.prefix
|
||||
|
@ -51,7 +52,7 @@ module "processing-dp-staging-0" {
|
|||
encryption_key = var.service_encryption_keys.storage
|
||||
}
|
||||
|
||||
module "processing-dp-temp-0" {
|
||||
module "processing-temp-0" {
|
||||
source = "../../../modules/gcs"
|
||||
project_id = module.processing-project.project_id
|
||||
prefix = var.prefix
|
||||
|
@ -61,7 +62,7 @@ module "processing-dp-temp-0" {
|
|||
encryption_key = var.service_encryption_keys.storage
|
||||
}
|
||||
|
||||
module "processing-dp-log-0" {
|
||||
module "processing-log-0" {
|
||||
source = "../../../modules/gcs"
|
||||
project_id = module.processing-project.project_id
|
||||
prefix = var.prefix
|
||||
|
@ -72,19 +73,20 @@ module "processing-dp-log-0" {
|
|||
}
|
||||
|
||||
module "processing-dp-historyserver" {
|
||||
count = var.enable_services.dataproc_history_server == true ? 1 : 0
|
||||
source = "../../../modules/dataproc"
|
||||
project_id = module.processing-project.project_id
|
||||
name = "hystory-server"
|
||||
name = "history-server"
|
||||
prefix = var.prefix
|
||||
region = var.region
|
||||
dataproc_config = {
|
||||
cluster_config = {
|
||||
staging_bucket = module.processing-dp-staging-0.name
|
||||
temp_bucket = module.processing-dp-temp-0.name
|
||||
staging_bucket = module.processing-staging-0.name
|
||||
temp_bucket = module.processing-temp-0.name
|
||||
gce_cluster_config = {
|
||||
subnetwork = module.processing-vpc[0].subnets["${var.region}/${var.prefix}-processing"].self_link
|
||||
zone = "${var.region}-b"
|
||||
service_account = module.processing-sa-dp-0.email
|
||||
service_account = module.processing-sa-0.email
|
||||
service_account_scopes = ["cloud-platform"]
|
||||
internal_ip_only = true
|
||||
}
|
||||
|
@ -99,10 +101,10 @@ module "processing-dp-historyserver" {
|
|||
"dataproc:dataproc.allow.zero.workers" = "true"
|
||||
"dataproc:job.history.to-gcs.enabled" = "true"
|
||||
"spark:spark.history.fs.logDirectory" = (
|
||||
"gs://${module.processing-dp-staging-0.name}/*/spark-job-history"
|
||||
"gs://${module.processing-staging-0.name}/*/spark-job-history"
|
||||
)
|
||||
"spark:spark.eventLog.dir" = (
|
||||
"gs://${module.processing-dp-staging-0.name}/*/spark-job-history"
|
||||
"gs://${module.processing-staging-0.name}/*/spark-job-history"
|
||||
)
|
||||
"spark:spark.history.custom.executor.log.url.applyIncompleteApplication" = "false"
|
||||
"spark:spark.history.custom.executor.log.url" = (
|
||||
|
|
|
@ -28,7 +28,7 @@ locals {
|
|||
module.processing-sa-cmp-0.iam_email
|
||||
]
|
||||
"roles/dataproc.worker" = [
|
||||
module.processing-sa-dp-0.iam_email
|
||||
module.processing-sa-0.iam_email
|
||||
]
|
||||
"roles/iam.serviceAccountUser" = [
|
||||
module.processing-sa-cmp-0.iam_email, local.groups_iam.data-engineers
|
||||
|
|
|
@ -16,13 +16,13 @@
|
|||
|
||||
locals {
|
||||
cur_iam = {
|
||||
"roles/bigquery.dataOwner" = [module.processing-sa-dp-0.iam_email]
|
||||
"roles/bigquery.dataOwner" = [module.processing-sa-0.iam_email]
|
||||
"roles/bigquery.dataViewer" = [
|
||||
local.groups_iam.data-analysts,
|
||||
local.groups_iam.data-engineers
|
||||
]
|
||||
"roles/bigquery.jobUser" = [
|
||||
module.processing-sa-dp-0.iam_email,
|
||||
module.processing-sa-0.iam_email,
|
||||
local.groups_iam.data-analysts,
|
||||
local.groups_iam.data-engineers
|
||||
]
|
||||
|
@ -35,7 +35,7 @@ locals {
|
|||
"roles/storage.objectViewer" = [
|
||||
local.groups_iam.data-analysts, local.groups_iam.data-engineers
|
||||
]
|
||||
"roles/storage.objectAdmin" = [module.processing-sa-dp-0.iam_email]
|
||||
"roles/storage.objectAdmin" = [module.processing-sa-0.iam_email]
|
||||
}
|
||||
cur_services = [
|
||||
"iam.googleapis.com",
|
||||
|
|
|
@ -20,16 +20,16 @@ locals {
|
|||
"roles/dlp.estimatesAdmin" = [local.groups_iam.data-engineers]
|
||||
"roles/dlp.reader" = [local.groups_iam.data-engineers]
|
||||
"roles/dlp.user" = [
|
||||
module.processing-sa-dp-0.iam_email,
|
||||
module.processing-sa-0.iam_email,
|
||||
local.groups_iam.data-engineers
|
||||
]
|
||||
"roles/datacatalog.admin" = [local.groups_iam.data-security]
|
||||
"roles/datacatalog.viewer" = [
|
||||
module.processing-sa-dp-0.iam_email,
|
||||
module.processing-sa-0.iam_email,
|
||||
local.groups_iam.data-analysts
|
||||
]
|
||||
"roles/datacatalog.categoryFineGrainedReader" = [
|
||||
module.processing-sa-dp-0.iam_email
|
||||
module.processing-sa-0.iam_email
|
||||
]
|
||||
}
|
||||
}
|
||||
|
|
|
@ -230,8 +230,8 @@ network_config = {
|
|||
host_project = "PROJECT_ID"
|
||||
network_self_link = "https://www.googleapis.com/compute/v1/projects/PROJECT_ID/global/networks/NAME"
|
||||
subnet_self_links = {
|
||||
processing_dataproc = "https://www.googleapis.com/compute/v1/projects/PROJECT_ID/regions/REGION/subnetworks/NAME"
|
||||
processing_composer = "https://www.googleapis.com/compute/v1/projects/PROJECT_ID/regions/REGION/subnetworks/NAME"
|
||||
processing_transformation = "https://www.googleapis.com/compute/v1/projects/PROJECT_ID/regions/REGION/subnetworks/NAME"
|
||||
processing_composer = "https://www.googleapis.com/compute/v1/projects/PROJECT_ID/regions/REGION/subnetworks/NAME"
|
||||
}
|
||||
composer_ip_ranges = {
|
||||
cloudsql = "192.168.XXX.XXX/24"
|
||||
|
@ -280,29 +280,30 @@ The application layer is out of scope of this script. As a demo purpuse only, on
|
|||
|
||||
| name | description | type | required | default |
|
||||
|---|---|:---:|:---:|:---:|
|
||||
| [organization_domain](variables.tf#L114) | Organization domain. | <code>string</code> | ✓ | |
|
||||
| [prefix](variables.tf#L119) | Prefix used for resource names. | <code>string</code> | ✓ | |
|
||||
| [project_config](variables.tf#L128) | Provide 'billing_account_id' value if project creation is needed, uses existing 'project_ids' if null. Parent is in 'folders/nnn' or 'organizations/nnn' format. | <code title="object({ billing_account_id = optional(string, null) parent = string project_ids = optional(object({ landing = string processing = string curated = string common = string }), { landing = "lnd" processing = "prc" curated = "cur" common = "cmn" } ) })">object({…})</code> | ✓ | |
|
||||
| [composer_config](variables.tf#L17) | Cloud Composer config. | <code title="object({ disable_deployment = optional(bool, false) environment_size = optional(string, "ENVIRONMENT_SIZE_SMALL") software_config = optional(object({ airflow_config_overrides = optional(map(string), {}) pypi_packages = optional(map(string), {}) env_variables = optional(map(string), {}) image_version = optional(string, "composer-2-airflow-2") }), {}) workloads_config = optional(object({ scheduler = optional(object({ cpu = optional(number, 0.5) memory_gb = optional(number, 1.875) storage_gb = optional(number, 1) count = optional(number, 1) } ), {}) web_server = optional(object({ cpu = optional(number, 0.5) memory_gb = optional(number, 1.875) storage_gb = optional(number, 1) }), {}) worker = optional(object({ cpu = optional(number, 0.5) memory_gb = optional(number, 1.875) storage_gb = optional(number, 1) min_count = optional(number, 1) max_count = optional(number, 3) } ), {}) }), {}) })">object({…})</code> | | <code>{}</code> |
|
||||
| [data_catalog_tags](variables.tf#L55) | List of Data Catalog Policy tags to be created with optional IAM binging configuration in {tag => {ROLE => [MEMBERS]}} format. | <code>map(map(list(string)))</code> | | <code title="{ "3_Confidential" = null "2_Private" = null "1_Sensitive" = null }">{…}</code> |
|
||||
| [data_force_destroy](variables.tf#L66) | Flag to set 'force_destroy' on data services like BiguQery or Cloud Storage. | <code>bool</code> | | <code>false</code> |
|
||||
| [groups](variables.tf#L72) | User groups. | <code>map(string)</code> | | <code title="{ data-analysts = "gcp-data-analysts" data-engineers = "gcp-data-engineers" data-security = "gcp-data-security" }">{…}</code> |
|
||||
| [location](variables.tf#L82) | Location used for multi-regional resources. | <code>string</code> | | <code>"eu"</code> |
|
||||
| [network_config](variables.tf#L88) | Shared VPC network configurations to use. If null networks will be created in projects. | <code title="object({ host_project = optional(string) network_self_link = optional(string) subnet_self_links = optional(object({ processing_dataproc = string processing_composer = string }), null) composer_ip_ranges = optional(object({ connection_subnetwork = optional(string) cloud_sql = optional(string, "10.20.10.0/24") gke_master = optional(string, "10.20.11.0/28") pods_range_name = optional(string, "pods") services_range_name = optional(string, "services") }), {}) })">object({…})</code> | | <code>{}</code> |
|
||||
| [project_suffix](variables.tf#L152) | Suffix used only for project ids. | <code>string</code> | | <code>null</code> |
|
||||
| [region](variables.tf#L158) | Region used for regional resources. | <code>string</code> | | <code>"europe-west1"</code> |
|
||||
| [service_encryption_keys](variables.tf#L164) | Cloud KMS to use to encrypt different services. Key location should match service region. | <code title="object({ bq = optional(string) composer = optional(string) compute = optional(string) storage = optional(string) })">object({…})</code> | | <code>{}</code> |
|
||||
| [organization_domain](variables.tf#L122) | Organization domain. | <code>string</code> | ✓ | |
|
||||
| [prefix](variables.tf#L127) | Prefix used for resource names. | <code>string</code> | ✓ | |
|
||||
| [project_config](variables.tf#L136) | Provide 'billing_account_id' value if project creation is needed, uses existing 'project_ids' if null. Parent is in 'folders/nnn' or 'organizations/nnn' format. | <code title="object({ billing_account_id = optional(string, null) parent = string project_ids = optional(object({ landing = string processing = string curated = string common = string }), { landing = "lnd" processing = "prc" curated = "cur" common = "cmn" } ) })">object({…})</code> | ✓ | |
|
||||
| [composer_config](variables.tf#L17) | Cloud Composer config. | <code title="object({ environment_size = optional(string, "ENVIRONMENT_SIZE_SMALL") software_config = optional(object({ airflow_config_overrides = optional(map(string), {}) pypi_packages = optional(map(string), {}) env_variables = optional(map(string), {}) image_version = optional(string, "composer-2-airflow-2") }), {}) workloads_config = optional(object({ scheduler = optional(object({ cpu = optional(number, 0.5) memory_gb = optional(number, 1.875) storage_gb = optional(number, 1) count = optional(number, 1) } ), {}) web_server = optional(object({ cpu = optional(number, 0.5) memory_gb = optional(number, 1.875) storage_gb = optional(number, 1) }), {}) worker = optional(object({ cpu = optional(number, 0.5) memory_gb = optional(number, 1.875) storage_gb = optional(number, 1) min_count = optional(number, 1) max_count = optional(number, 3) } ), {}) }), {}) })">object({…})</code> | | <code>{}</code> |
|
||||
| [data_catalog_tags](variables.tf#L54) | List of Data Catalog Policy tags to be created with optional IAM binging configuration in {tag => {ROLE => [MEMBERS]}} format. | <code>map(map(list(string)))</code> | | <code title="{ "3_Confidential" = null "2_Private" = null "1_Sensitive" = null }">{…}</code> |
|
||||
| [data_force_destroy](variables.tf#L65) | Flag to set 'force_destroy' on data services like BiguQery or Cloud Storage. | <code>bool</code> | | <code>false</code> |
|
||||
| [enable_services](variables.tf#L71) | Flag to enable or disable services in the Data Platform. | <code title="object({ composer = optional(bool, true) dataproc_history_server = optional(bool, true) })">object({…})</code> | | <code>{}</code> |
|
||||
| [groups](variables.tf#L80) | User groups. | <code>map(string)</code> | | <code title="{ data-analysts = "gcp-data-analysts" data-engineers = "gcp-data-engineers" data-security = "gcp-data-security" }">{…}</code> |
|
||||
| [location](variables.tf#L90) | Location used for multi-regional resources. | <code>string</code> | | <code>"eu"</code> |
|
||||
| [network_config](variables.tf#L96) | Shared VPC network configurations to use. If null networks will be created in projects. | <code title="object({ host_project = optional(string) network_self_link = optional(string) subnet_self_links = optional(object({ processing_transformation = string processing_composer = string }), null) composer_ip_ranges = optional(object({ connection_subnetwork = optional(string) cloud_sql = optional(string, "10.20.10.0/24") gke_master = optional(string, "10.20.11.0/28") pods_range_name = optional(string, "pods") services_range_name = optional(string, "services") }), {}) })">object({…})</code> | | <code>{}</code> |
|
||||
| [project_suffix](variables.tf#L160) | Suffix used only for project ids. | <code>string</code> | | <code>null</code> |
|
||||
| [region](variables.tf#L166) | Region used for regional resources. | <code>string</code> | | <code>"europe-west1"</code> |
|
||||
| [service_encryption_keys](variables.tf#L172) | Cloud KMS to use to encrypt different services. Key location should match service region. | <code title="object({ bq = optional(string) composer = optional(string) compute = optional(string) storage = optional(string) })">object({…})</code> | | <code>{}</code> |
|
||||
|
||||
## Outputs
|
||||
|
||||
| name | description | sensitive |
|
||||
|---|---|:---:|
|
||||
| [bigquery-datasets](outputs.tf#L17) | BigQuery datasets. | |
|
||||
| [dataproc-hystory-server](outputs.tf#L24) | List of bucket names which have been assigned to the cluster. | |
|
||||
| [gcs-buckets](outputs.tf#L34) | GCS buckets. | ✓ |
|
||||
| [kms_keys](outputs.tf#L44) | Cloud MKS keys. | |
|
||||
| [projects](outputs.tf#L49) | GCP Projects informations. | |
|
||||
| [vpc_network](outputs.tf#L67) | VPC network. | |
|
||||
| [vpc_subnet](outputs.tf#L75) | VPC subnetworks. | |
|
||||
| [dataproc-history-server](outputs.tf#L24) | List of bucket names which have been assigned to the cluster. | |
|
||||
| [gcs-buckets](outputs.tf#L29) | GCS buckets. | ✓ |
|
||||
| [kms_keys](outputs.tf#L39) | Cloud MKS keys. | |
|
||||
| [projects](outputs.tf#L44) | GCP Projects informations. | |
|
||||
| [vpc_network](outputs.tf#L62) | VPC network. | |
|
||||
| [vpc_subnet](outputs.tf#L70) | VPC subnetworks. | |
|
||||
|
||||
<!-- END TFDOC -->
|
||||
|
|
|
@ -41,9 +41,9 @@ LAND_GCS = os.environ.get("LAND_GCS")
|
|||
PHS_CLUSTER_NAME = os.environ.get("PHS_CLUSTER_NAME")
|
||||
PROCESSING_GCS = os.environ.get("PROCESSING_GCS")
|
||||
PROCESSING_PRJ = os.environ.get("PROCESSING_PRJ")
|
||||
PROCESSING_SA_DP = os.environ.get("PROCESSING_SA_DP")
|
||||
PROCESSING_SA_SUBNET = os.environ.get("PROCESSING_SUBNET")
|
||||
PROCESSING_SA_VPC = os.environ.get("PROCESSING_VPC")
|
||||
PROCESSING_SA = os.environ.get("PROCESSING_SA")
|
||||
PROCESSING_SUBNET = os.environ.get("PROCESSING_SUBNET")
|
||||
PROCESSING_VPC = os.environ.get("PROCESSING_VPC")
|
||||
|
||||
PYTHON_FILE_LOCATION = "gs://"+PROCESSING_GCS+"/pyspark_sort.py"
|
||||
PHS_CLUSTER_PATH = "projects/"+PROCESSING_PRJ+"/regions/"+DP_REGION+"/clusters/"+PHS_CLUSTER_NAME
|
||||
|
@ -65,8 +65,8 @@ with models.DAG(
|
|||
batch={
|
||||
"environment_config": {
|
||||
"execution_config": {
|
||||
"service_account": PROCESSING_SA_DP,
|
||||
"subnetwork_uri": PROCESSING_SA_SUBNET
|
||||
"service_account": PROCESSING_SA,
|
||||
"subnetwork_uri": PROCESSING_SUBNET
|
||||
},
|
||||
"peripherals_config": {
|
||||
"spark_history_server_config":{
|
||||
|
|
|
@ -21,14 +21,9 @@ output "bigquery-datasets" {
|
|||
}
|
||||
}
|
||||
|
||||
output "dataproc-hystory-server" {
|
||||
output "dataproc-history-server" {
|
||||
description = "List of bucket names which have been assigned to the cluster."
|
||||
value = {
|
||||
bucket_names = module.processing-dp-historyserver.bucket_names
|
||||
http_ports = module.processing-dp-historyserver.http_ports
|
||||
instance_names = module.processing-dp-historyserver.instance_names
|
||||
name = module.processing-dp-historyserver.name
|
||||
}
|
||||
value = one(module.processing-dp-historyserver)
|
||||
}
|
||||
|
||||
output "gcs-buckets" {
|
||||
|
@ -67,15 +62,15 @@ output "projects" {
|
|||
output "vpc_network" {
|
||||
description = "VPC network."
|
||||
value = {
|
||||
processing_dataproc = local.processing_vpc
|
||||
processing_composer = local.processing_vpc
|
||||
processing_transformation = local.processing_vpc
|
||||
processing_composer = local.processing_vpc
|
||||
}
|
||||
}
|
||||
|
||||
output "vpc_subnet" {
|
||||
description = "VPC subnetworks."
|
||||
value = {
|
||||
processing_dataproc = local.processing_subnet
|
||||
processing_composer = local.processing_subnet
|
||||
processing_transformation = local.processing_subnet
|
||||
processing_composer = local.processing_subnet
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,8 +17,7 @@
|
|||
variable "composer_config" {
|
||||
description = "Cloud Composer config."
|
||||
type = object({
|
||||
disable_deployment = optional(bool, false)
|
||||
environment_size = optional(string, "ENVIRONMENT_SIZE_SMALL")
|
||||
environment_size = optional(string, "ENVIRONMENT_SIZE_SMALL")
|
||||
software_config = optional(object({
|
||||
airflow_config_overrides = optional(map(string), {})
|
||||
pypi_packages = optional(map(string), {})
|
||||
|
@ -69,6 +68,15 @@ variable "data_force_destroy" {
|
|||
default = false
|
||||
}
|
||||
|
||||
variable "enable_services" {
|
||||
description = "Flag to enable or disable services in the Data Platform."
|
||||
type = object({
|
||||
composer = optional(bool, true)
|
||||
dataproc_history_server = optional(bool, true)
|
||||
})
|
||||
default = {}
|
||||
}
|
||||
|
||||
variable "groups" {
|
||||
description = "User groups."
|
||||
type = map(string)
|
||||
|
@ -91,8 +99,8 @@ variable "network_config" {
|
|||
host_project = optional(string)
|
||||
network_self_link = optional(string)
|
||||
subnet_self_links = optional(object({
|
||||
processing_dataproc = string
|
||||
processing_composer = string
|
||||
processing_transformation = string
|
||||
processing_composer = string
|
||||
}), null)
|
||||
composer_ip_ranges = optional(object({
|
||||
connection_subnetwork = optional(string)
|
||||
|
|
Loading…
Reference in New Issue