diff --git a/examples/data-solutions/README.md b/examples/data-solutions/README.md index 23fabdcc..e91e4873 100644 --- a/examples/data-solutions/README.md +++ b/examples/data-solutions/README.md @@ -36,6 +36,6 @@ This [example](./cloudsql-multiregion/) creates a [Cloud SQL instance](https://c ### Data Playground starter with Cloud Vertex AI Notebook and GCS - -This [example](./data-playground/) creates a [Vertex AI Notebook](https://cloud.google.com/vertex-ai/docs/workbench/introduction) running under a VPC network and a starter GCS bucket to store inputs and outputs of data experiments. + +This [example](./data-playground/) creates a [Vertex AI Notebook](https://cloud.google.com/vertex-ai/docs/workbench/introduction) running on a VPC with a private IP and a dedicated Service Account. A GCS bucket and a BigQuery dataset are created to store inputs and outputs of data experiments.
\ No newline at end of file diff --git a/examples/data-solutions/data-playground/README.md b/examples/data-solutions/data-playground/README.md index fb759695..91a06145 100644 --- a/examples/data-solutions/data-playground/README.md +++ b/examples/data-solutions/data-playground/README.md @@ -1,6 +1,6 @@ # Data Playground -This example creates a minimum viable template for a data experimentation project with the needed APIs enabled, basic VPC and Firewall set in place, GCS bucket and an AI notebook to get started. +This example creates a minimum viable architecture for a data experimentation project with the needed APIs enabled, VPC and Firewall set in place, BigQuesy dataset, GCS bucket and an AI notebook to get started. This is the high level diagram: @@ -10,34 +10,58 @@ This is the high level diagram: This sample creates several distinct groups of resources: -- projects - - Service Project configured for GCE instances and GCS buckets +- project - networking - - VPC network - - One default subnet + - VPC network with a default subnet and CloudNat - Firewall rules for [SSH access via IAP](https://cloud.google.com/iap/docs/using-tcp-forwarding) and open communication within the VPC -- Vertex AI notebook - - One Jupyter lab notebook instance with public access -- GCS - - One bucket initial bucket +- Vertex AI Workbench notebook configured with a private IP and using a dedicated Service Account +- One GCS bucket +- One BigQuery dataset +## Deploy your enviroment +We assume the identiy running the following steps has the following role: +- resourcemanager.projectCreator in case a new project will be created. +- owner on the project in case you use an existing project. + +Run Terraform init: +``` +$ terraform init +``` + +Configure the Terraform variable in your terraform.tfvars file. You need to spefify at least the following variables: +``` +prefix = "prefix" +project_id = "data-001" +``` + +You can run now: +``` +$ terraform apply +``` + +You can now connect to the Vertex AI notbook to perform your data analysy. + ## Variables -| name | description | type | required | default | -| ------------------------- | --------------------------------------------------------------------------------------------------------------------------------------- | ----------- | -------- | ------------ | -| project\_id | Project id, references existing project if \`project\_create\` is null. | string | ✓ | | -| location | The location where resources will be deployed | string | | europe | -| region | The region where resources will be deployed. | string | | europe-west1 | -| project\_create | Provide values if project creation is needed, uses existing project if null. Parent format: folders/folder\_id or organizations/org\_id | object({…}) | | null | -| prefix | Unique prefix used for resource names. Not used for project if 'project\_create' is null. | string | | dp | -| service\_encryption\_keys | Cloud KMS to use to encrypt different services. Key location should match service region. | object({…}) | | null | -| vpc\_config | Parameters to create a simple VPC for the Data Playground | object({…}) | | {...} | + +| name | description | type | required | default | +|---|---|:---:|:---:|:---:| +| [prefix](variables.tf#L36) | Unique prefix used for resource names. Not used for project if 'project_create' is null. | string | ✓ | | +| [project_id](variables.tf#L22) | Project id, references existing project if `project_create` is null. | string | ✓ | | +| [location](variables.tf#L16) | The location where resources will be deployed. | string | | "EU" | +| [project_create](variables.tf#L27) | Provide values if project creation is needed, uses existing project if null. Parent format: folders/folder_id or organizations/org_id | object({…}) | | null | +| [region](variables.tf#L41) | The region where resources will be deployed. | string | | "europe-west1" | +| [vpc_config](variables.tf#L57) | Parameters to create a VPC. | object({…}) | | {…} | ## Outputs -| Name | Description | -| ----------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------- | -| bucket | GCS Bucket URL. | -| project | Project id | -| vpc | VPC Network name | -| notebook | Vertex AI notebook name | + +| name | description | sensitive | +|---|---|:---:| +| [bucket](outputs.tf#L15) | GCS Bucket URL. | | +| [dataset](outputs.tf#L20) | GCS Bucket URL. | | +| [notebook](outputs.tf#L25) | Vertex AI notebook details. | | +| [project](outputs.tf#L33) | Project id | | +| [vpc](outputs.tf#L38) | VPC Network | | + + diff --git a/examples/data-solutions/data-playground/diagram.png b/examples/data-solutions/data-playground/diagram.png index 9da71fd0..b2d2d8eb 100644 Binary files a/examples/data-solutions/data-playground/diagram.png and b/examples/data-solutions/data-playground/diagram.png differ diff --git a/examples/data-solutions/data-playground/main.tf b/examples/data-solutions/data-playground/main.tf index 43453f5a..b6b23d97 100644 --- a/examples/data-solutions/data-playground/main.tf +++ b/examples/data-solutions/data-playground/main.tf @@ -27,25 +27,33 @@ module "project" { project_create = var.project_create != null prefix = var.project_create == null ? null : var.prefix services = [ - "stackdriver.googleapis.com", - "compute.googleapis.com", - "storage-component.googleapis.com", - "storage.googleapis.com", - "servicenetworking.googleapis.com", "bigquery.googleapis.com", "bigquerystorage.googleapis.com", "bigqueryreservation.googleapis.com", + "composer.googleapis.com", + "compute.googleapis.com", "dataflow.googleapis.com", + "ml.googleapis.com", "notebooks.googleapis.com", - "composer.googleapis.com" + "servicenetworking.googleapis.com", + "stackdriver.googleapis.com", + "storage.googleapis.com", + "storage-component.googleapis.com" ] policy_boolean = { # "constraints/compute.requireOsLogin" = false # Example of applying a project wide policy, mainly useful for Composer } service_encryption_key_ids = { + compute = [try(local.service_encryption_keys.compute, null)] + bq = [try(local.service_encryption_keys.bq, null)] storage = [try(local.service_encryption_keys.storage, null)] } + + service_config = { + disable_on_destroy = false, + disable_dependent_services = false + } } ############################################################################### @@ -55,11 +63,11 @@ module "project" { module "vpc" { source = "../../../modules/net-vpc" project_id = module.project.project_id - name = var.vpc_config.vpc_name + name = "${var.prefix}-vpc" subnets = [ { ip_cidr_range = var.vpc_config.ip_cidr_range - name = var.vpc_config.subnet_name + name = "${var.prefix}-subnet" region = var.region secondary_ip_range = {} } @@ -71,27 +79,73 @@ module "vpc-firewall" { project_id = module.project.project_id network = module.vpc.name admin_ranges = [var.vpc_config.ip_cidr_range] + custom_rules = { + #TODO Remove and rely on 'ssh' tag once terraform-provider-google/issues/9273 is fixed + ("${var.prefix}-iap") = { + description = "Enable SSH from IAP on Notebooks." + direction = "INGRESS" + action = "allow" + sources = [] + ranges = ["35.235.240.0/20"] + targets = ["notebook-instance"] + use_service_accounts = false + rules = [{ protocol = "tcp", ports = [22] }] + extra_attributes = {} + } + } +} + +module "cloudnat" { + source = "../../../modules/net-cloudnat" + project_id = module.project.project_id + name = "${var.prefix}-default" + region = var.region + router_network = module.vpc.name } ############################################################################### -# GCS # +# Storage # ############################################################################### -module "base-gcs-bucket" { +module "bucket" { source = "../../../modules/gcs" project_id = module.project.project_id - prefix = module.project.project_id - name = "base" + prefix = var.prefix + location = var.location + name = "data" encryption_key = try(local.service_encryption_keys.storage, null) # Example assignment of an encryption key } +module "dataset" { + source = "../../../modules/bigquery-dataset" + project_id = module.project.project_id + id = "${var.prefix}_data" + encryption_key = try(local.service_encryption_keys.bq, null) # Example assignment of an encryption key +} + ############################################################################### # Vertex AI Notebook # ############################################################################### # TODO: Add encryption_key to Vertex AI notebooks as well # TODO: Add shared VPC support + +module "service-account-notebook" { + source = "../../../modules/iam-service-account" + project_id = module.project.project_id + name = "notebook-sa" + iam_project_roles = { + (module.project.project_id) = [ + "roles/bigquery.admin", + "roles/bigquery.jobUser", + "roles/bigquery.dataEditor", + "roles/bigquery.user", + "roles/storage.admin", + ] + } +} + resource "google_notebooks_instance" "playground" { - name = "data-play-notebook" + name = "${var.prefix}-notebook" location = format("%s-%s", var.region, "b") machine_type = "e2-medium" project = module.project.project_id @@ -104,10 +158,17 @@ resource "google_notebooks_instance" "playground" { install_gpu_driver = true boot_disk_type = "PD_SSD" boot_disk_size_gb = 110 + disk_encryption = try(local.service_encryption_keys.compute != null, false) ? "CMEK" : "GMEK" + kms_key = try(local.service_encryption_keys.compute, null) - no_public_ip = false + no_public_ip = true no_proxy_access = false network = module.vpc.network.id - subnet = module.vpc.subnets[format("%s/%s", var.region, var.vpc_config.subnet_name)].id + subnet = module.vpc.subnets[format("%s/%s", var.region, "${var.prefix}-subnet")].id + + service_account = module.service-account-notebook.email + + #TODO Uncomment once terraform-provider-google/issues/9273 is fixed + # tags = ["ssh"] } diff --git a/examples/data-solutions/data-playground/outputs.tf b/examples/data-solutions/data-playground/outputs.tf index 3c47229f..03db2506 100644 --- a/examples/data-solutions/data-playground/outputs.tf +++ b/examples/data-solutions/data-playground/outputs.tf @@ -14,12 +14,20 @@ output "bucket" { description = "GCS Bucket URL." - value = module.base-gcs-bucket.url + value = module.bucket.url +} + +output "dataset" { + description = "GCS Bucket URL." + value = module.dataset.id } output "notebook" { - description = "Vertex AI notebook" - value = resource.google_notebooks_instance.playground.name + description = "Vertex AI notebook details." + value = { + name = resource.google_notebooks_instance.playground.name + id = resource.google_notebooks_instance.playground.id + } } output "project" { @@ -30,4 +38,4 @@ output "project" { output "vpc" { description = "VPC Network" value = module.vpc.name -} \ No newline at end of file +} diff --git a/examples/data-solutions/data-playground/variables.tf b/examples/data-solutions/data-playground/variables.tf index 92f63e2d..76ba0db1 100644 --- a/examples/data-solutions/data-playground/variables.tf +++ b/examples/data-solutions/data-playground/variables.tf @@ -16,7 +16,7 @@ variable "location" { description = "The location where resources will be deployed." type = string - default = "europe" + default = "EU" } variable "project_id" { @@ -36,7 +36,6 @@ variable "project_create" { variable "prefix" { description = "Unique prefix used for resource names. Not used for project if 'project_create' is null." type = string - default = "dp" } variable "region" { @@ -48,21 +47,19 @@ variable "region" { variable "service_encryption_keys" { # service encription key description = "Cloud KMS to use to encrypt different services. Key location should match service region." type = object({ + bq = string + compute = string storage = string }) default = null } variable "vpc_config" { - description = "Parameters to create a simple VPC for the Data Playground" + description = "Parameters to create a VPC." type = object({ ip_cidr_range = string - subnet_name = string - vpc_name = string }) default = { ip_cidr_range = "10.0.0.0/20" - subnet_name = "default-subnet" - vpc_name = "data-playground-vpc" } -} \ No newline at end of file +} diff --git a/tests/examples/data_solutions/data_playground/fixture/main.tf b/tests/examples/data_solutions/data_playground/fixture/main.tf index 8082f997..b52d9b8f 100644 --- a/tests/examples/data_solutions/data_playground/fixture/main.tf +++ b/tests/examples/data_solutions/data_playground/fixture/main.tf @@ -17,6 +17,7 @@ module "test" { source = "../../../../../examples/data-solutions/data-playground/" project_id = "sampleproject" + prefix = "tst" project_create = { billing_account_id = "123456-123456-123456", parent = "folders/467898377" diff --git a/tests/examples/data_solutions/data_playground/test_plan.py b/tests/examples/data_solutions/data_playground/test_plan.py index 1807e3de..05bda08c 100644 --- a/tests/examples/data_solutions/data_playground/test_plan.py +++ b/tests/examples/data_solutions/data_playground/test_plan.py @@ -22,5 +22,5 @@ FIXTURES_DIR = os.path.join(os.path.dirname(__file__), 'fixture') def test_resources(e2e_plan_runner): "Test that plan works and the numbers of resources is as expected." modules, resources = e2e_plan_runner(FIXTURES_DIR) - assert len(modules) == 4 - assert len(resources) == 23 + assert len(modules) == 7 + assert len(resources) == 34