From 2bb9e3f64e9929ec59851102dd91714bec98cee2 Mon Sep 17 00:00:00 2001 From: Luca Prete Date: Mon, 28 Mar 2022 09:43:00 +0200 Subject: [PATCH] [#597] Fix link_checker --- CHANGELOG.md | 2 +- fast/stages/02-networking-nva/README.md | 6 +- fast/stages/02-networking-peering/README.md | 4 +- fast/stages/02-networking-vpn/README.md | 4 +- fast/stages/03-project-factory/dev/README.md | 2 +- modules/net-glb/README.md | 10 ++-- tools/check_links.py | 58 ++++++++++++++------ 7 files changed, 55 insertions(+), 31 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8f3be053..9d771a3d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,7 +27,7 @@ All notable changes to this project will be documented in this file. - **incompatible change** the variable for service identities IAM has changed in the project factory - add `data-catalog-policy-tag` module - new [workload identity federetion example](examples/cloud-operations/workload-identity-federation) -- new `api-gateway` [module](/modules/api-gateway) and [example](examples/serverless/api-gateway). +- new `api-gateway` [module](./modules/api-gateway) and [example](examples/serverless/api-gateway). - **incompatible change** the `psn_ranges` variable has been renamed to `psa_ranges` in the `net-vpc` module and its type changed from `list(string)` to `map(string)` - **incompatible change** removed `iam` flag for organization and folder level sinks - **incompatible change** removed `ingress_settings` configuration option in the `cloud-functions` module. diff --git a/fast/stages/02-networking-nva/README.md b/fast/stages/02-networking-nva/README.md index 52ea03e8..a7637e63 100644 --- a/fast/stages/02-networking-nva/README.md +++ b/fast/stages/02-networking-nva/README.md @@ -300,7 +300,7 @@ Subnets created using the `net-vpc` module are PGA-enabled by default. - 199.36.153.4/30 (`restricted.googleapis.com`) and 199.36.153.8/30 (`private.googleapis.com`) should be routed from on-premises to the trusted landing VPC, and from there to the `default-internet-gateway`. \ The `vpn_onprem_configs` variable contains the ranges advertised from GCP to on-premises. Furthermore, the trusted landing VPC (e.g. see `landing-trusted-vpc` in [`landing.tf`](./landing.tf)) has explicit routes to send traffic destined to restricted and private - googleapis.com to the Internet gateway (which works for Google APIs only, and not for the whole Internet, since Cloud NAT is not configured in the trusted landing VPC). -- On-premises, a private DNS zone for `googleapis.com` should be created and configured per [this article](https://cloud.google.com/vpc/docs/configure-private-google-access-hybrid#config-domain). Its configuration can be copied from the module `googleapis-private-zone` in [`dns.tf`](./dns.tf) +- On-premises, a private DNS zone for `googleapis.com` should be created and configured per [this article](https://cloud.google.com/vpc/docs/configure-private-google-access-hybrid#config-domain). Its configuration can be copied from the module `googleapis-private-zone` in [`dns-landing.tf`](./dns-landing.tf) ### Preliminar activities @@ -336,9 +336,7 @@ Copy `vpc-peering-prod.tf` to `vpc-peering-staging.tf` and replace "prod" with " Configure the NVAs deployed or update the sample [NVA config file](data/nva-startup-script.tftpl) making sure they support the new subnets. -DNS configurations are managed in the `dns-*.tf` files. -Copy the `dns-prod.tf` to `dns-staging.tf` and replace within the files "prod" with "staging", where relevant. -Don't forget to add a peering zone in the landing project and point it to the newly created environment private zone. +DNS configurations are centralised in the `dns-*.tf` files. Spokes delegate DNS resolution to Landing through DNS peering, and optionally define a private zone (e.g. `dev.gcp.example.com`) which the landing peers to. To configure DNS for a new environment, copy one of the other environments DNS files (e.g. (dns-dev.tf)[dns-dev.tf]) into a new `dns-*.tf` file suffixed with the environment name (e.g. `dns-staging.tf`), and update its content accordingly. Don't forget to add a peering zone from the landing to the newly created environment private zone. diff --git a/fast/stages/02-networking-peering/README.md b/fast/stages/02-networking-peering/README.md index 88682428..30b6b483 100644 --- a/fast/stages/02-networking-peering/README.md +++ b/fast/stages/02-networking-peering/README.md @@ -228,7 +228,7 @@ Subnets created by the `net-vpc` module are PGA-enabled by default. - 199.36.153.4/30 (`restricted.googleapis.com`) and 199.36.153.8/30 (`private.googleapis.com`) should be routed from on-prem to VPC, and from there to the `default-internet-gateway`. \ Per variable `vpn_onprem_configs` such ranges are advertised to onprem - furthermore every VPC (e.g. see `landing-vpc` in [`landing.tf`](./landing.tf)) has explicit routes set in case the `0.0.0.0/0` route is changed. -- A private DNS zone for `googleapis.com` should be created and configured per [this article](https://cloud.google.com/vpc/docs/configure-private-google-access-hybrid#config-domain), as implemented in module `googleapis-private-zone` in [`dns.tf`](./dns.tf) +- A private DNS zone for `googleapis.com` should be created and configured per [this article](https://cloud.google.com/vpc/docs/configure-private-google-access-hybrid#config-domain), as implemented in module `googleapis-private-zone` in [`dns-landing.tf`](./dns-landing.tf) ### Preliminar activities @@ -259,7 +259,7 @@ The new VPC requires a set of dedicated CIDRs, one per region, added to variable > Variables managing L7 Interal Load Balancers (`l7ilb_subnets`) and Private Service Access (`psa_ranges`) should also be adapted, and subnets and firewall rules for the new spoke should be added as described above. -DNS configurations are centralised in the `dns.tf` file. Spokes delegate DNS resolution to Landing through DNS peering, and optionally define a private zone (e.g. `staging.gcp.example.com`) which the landing peers to. To configure DNS for a new environment, copy all the `prod-*` modules in the `dns.tf` file to `staging-*`, and update their content accordingly. Don't forget to add a peering zone from Landing to the newly created environment private zone. +DNS configurations are centralised in the `dns-*.tf` files. Spokes delegate DNS resolution to Landing through DNS peering, and optionally define a private zone (e.g. `dev.gcp.example.com`) which the landing peers to. To configure DNS for a new environment, copy one of the other environments DNS files (e.g. (dns-dev.tf)[dns-dev.tf]) into a new `dns-*.tf` file suffixed with the environment name (e.g. `dns-staging.tf`), and update its content accordingly. Don't forget to add a peering zone from the landing to the newly created environment private zone. diff --git a/fast/stages/02-networking-vpn/README.md b/fast/stages/02-networking-vpn/README.md index d611aa42..15df423a 100644 --- a/fast/stages/02-networking-vpn/README.md +++ b/fast/stages/02-networking-vpn/README.md @@ -242,7 +242,7 @@ Subnets created by the `net-vpc` module are PGA-enabled by default. - 199.36.153.4/30 (`restricted.googleapis.com`) and 199.36.153.8/30 (`private.googleapis.com`) should be routed from on-prem to VPC, and from there to the `default-internet-gateway`. \ Per variable `vpn_onprem_configs` such ranges are advertised to onprem - furthermore every VPC (e.g. see `landing-vpc` in [`landing.tf`](./landing.tf)) has explicit routes set in case the `0.0.0.0/0` route is changed. -- A private DNS zone for `googleapis.com` should be created and configured per [this article](https://cloud.google.com/vpc/docs/configure-private-google-access-hybrid#config-domain), as implemented in module `googleapis-private-zone` in [`dns.tf`](./dns.tf) +- A private DNS zone for `googleapis.com` should be created and configured per [this article](https://cloud.google.com/vpc/docs/configure-private-google-access-hybrid#config-domain), as implemented in module `googleapis-private-zone` in [dns-landing.tf](./dns-landing.tf) ### Preliminar activities @@ -282,7 +282,7 @@ VPN configuration also controls BGP advertisements, which requires the following - `vpn_onprem_configs` to configure the new advertisments to on-premises for the new CIDRs - `vpn_spoke_configs` to configure the new advertisements to `landing` for the new VPC - new keys (one per region) should be added, such as e.g. `staging-ew1` and `staging-ew4` -DNS configurations are centralised in the `dns.tf` file. Spokes delegate DNS resolution to Landing through DNS peering, and optionally define a private zone (e.g. `staging.gcp.example.com`) which the landing peers to. To configure DNS for a new environment, copy all the `prod-*` modules in the `dns.tf` file to `staging-*`, and update their content accordingly. Don't forget to add a peering zone from Landing to the newly created environment private zone. +DNS configurations are centralised in the `dns-*.tf` files. Spokes delegate DNS resolution to Landing through DNS peering, and optionally define a private zone (e.g. `dev.gcp.example.com`) which the landing peers to. To configure DNS for a new environment, copy one of the other environments DNS files (e.g. (dns-dev.tf)[dns-dev.tf]) into a new `dns-*.tf` file suffixed with the environment name (e.g. `dns-staging.tf`), and update its content accordingly. Don't forget to add a peering zone from the landing to the newly created environment private zone. diff --git a/fast/stages/03-project-factory/dev/README.md b/fast/stages/03-project-factory/dev/README.md index ba505614..4722b359 100644 --- a/fast/stages/03-project-factory/dev/README.md +++ b/fast/stages/03-project-factory/dev/README.md @@ -84,7 +84,7 @@ If you're not using Fast, refer to the [Variables](#variables) table at the bott Besides the values above, a project factory takes 2 additional inputs: - `data/defaults.yaml`, manually configured by adapting the [`data/defaults.yaml`](./data/defaults.yaml), which defines per-environment default values e.g., for billing alerts and labels. -- `data/projects/*.yaml`, one file per project (optionally grouped in folders), which configures each project. A [`data/projects/project.yaml`](./data/projects/project.yaml) is provided as reference and documentation for the schema. Projects will be named after the filename, e.g., `fast-dev-lab0.yaml` will create project `fast-dev-lab0`. +- `data/projects/*.yaml`, one file per project (optionally grouped in folders), which configures each project. A [`data/projects/project.yaml`](./data/projects/project.yaml.sample) is provided as reference and documentation for the schema. Projects will be named after the filename, e.g., `fast-dev-lab0.yaml` will create project `fast-dev-lab0`. Once the configuration is complete, run the project factory by running diff --git a/modules/net-glb/README.md b/modules/net-glb/README.md index e59fee2a..a947c666 100644 --- a/modules/net-glb/README.md +++ b/modules/net-glb/README.md @@ -486,11 +486,11 @@ resource "tls_self_signed_cert" "self_signed_cert" { An External Global Load Balancer is made of multiple components, that change depending on the configurations. Sometimes, it may be tricky to understand what they are, and how they relate to each other. Following, we provide a very brief overview to become more familiar with them. -- The global load balancer [forwarding rule](global_forwarding_rule.tf) binds a frontend public Virtual IP (VIP) to an HTTP(S) [target proxy](target_proxy.tf). -- If the target proxy is HTTPS, it requires one or more managed or unmanaged [SSL certificates](ssl_certificates.tf). -- Target proxies leverage [url-maps](url_map.tf): set of L7 rules, which create a mapping between specific hostnames, URIs (and more) to one or more [backends services](backend_services.tf). -- [Backend services](backend_services.tf) can either link to a bucket or one or multiple groups, which can be GCE instance groups or NEGs. It is assumed in this module that buckets and groups are previously created through other modules, and passed in as input variables. -- Backend services support one or more [health checks](health_checks.tf), used to verify that the backend is indeed healthy, so that traffic can be forwarded to it. Health checks currently supported in this module are HTTP, HTTPS, HTTP2, SSL, TCP. +- The global load balancer [forwarding rule](global-forwarding-rule.tf) binds a frontend public Virtual IP (VIP) to an HTTP(S) [target proxy](target-proxy.tf). +- If the target proxy is HTTPS, it requires one or more managed or unmanaged [SSL certificates](ssl-certificates.tf). +Target proxies leverage [url-maps](url-map.tf): set of L7 rules, which create a mapping between specific hostnames, URIs (and more) to one or more [backends services](backend-services.tf). +- [Backend services](backend-services.tf) can either link to a bucket or one or multiple groups, which can be GCE instance groups or NEGs. It is assumed in this module that buckets and groups are previously created through other modules, and passed in as input variables. +- Backend services support one or more [health checks](health-checks.tf), used to verify that the backend is indeed healthy, so that traffic can be forwarded to it. Health checks currently supported in this module are HTTP, HTTPS, HTTP2, SSL, TCP. diff --git a/tools/check_links.py b/tools/check_links.py index 8fc57a9a..c79bca9e 100755 --- a/tools/check_links.py +++ b/tools/check_links.py @@ -29,35 +29,61 @@ import marko BASEDIR = pathlib.Path(__file__).resolve().parents[1] DOC = collections.namedtuple('DOC', 'path relpath links') LINK = collections.namedtuple('LINK', 'dest valid') +OBJS_EXPAND = (marko.block.List, marko.block.ListItem, marko.block.Paragraph) +OBJS_LINK = marko.inline.Link + + +def check_link(link, readme_path): + 'Checks if a link element has a valid destination.' + link_valid = None + url = urllib.parse.urlparse(link.dest) + if url.scheme: + # TODO: worth checking if the call returns 404, 403, 500 + link_valid = True + else: + link_valid = (readme_path.parent / url.path).exists() + return LINK(link.dest, link_valid) + + +def check_elements(elements, readme_path): + 'Recursively finds and checks links in a list of elements.' + if len(elements) == 0: + return [] + + el = elements[0] + + # If there is one element, check the link, + # expand it (if possible), return [] otherwise + if len(elements) == 1: + if isinstance(el, OBJS_LINK): + return [check_link(el, readme_path)] + if isinstance(el, OBJS_EXPAND): + return check_elements(el.children, readme_path) + return [] + + # If there is more than one element call recursively: + # concatenate call on the first element and call on all other elements + if len(elements) > 1: + link_in_first_element = check_elements([el], readme_path) + link_in_other_elements = check_elements(elements[1:len(elements)], readme_path) + return link_in_first_element + link_in_other_elements def check_docs(dir_name): - 'Traverse dir_name and check links in Markdown files.' + 'Traverses dir_name and checks for all Markdown files.' dir_path = BASEDIR / dir_name for readme_path in sorted(dir_path.glob('**/*.md')): if '.terraform' in str(readme_path) or '.pytest' in str(readme_path): continue - links = [] - for el in marko.parser.Parser().parse(readme_path.read_text()).children: - if not isinstance(el, marko.block.Paragraph): - continue - for subel in el.children: - if not isinstance(subel, marko.inline.Link): - continue - link_valid = None - url = urllib.parse.urlparse(subel.dest) - if url.scheme: - link_valid = True - else: - link_valid = (readme_path.parent / url.path).exists() - links.append(LINK(subel.dest, link_valid)) + els = marko.parser.Parser().parse(readme_path.read_text()).children + links = check_elements(els, readme_path) yield DOC(readme_path, str(readme_path.relative_to(dir_path)), links) @click.command() @click.argument('dirs', type=str, nargs=-1) def main(dirs): - 'Check links in Markdown files contained in dirs.' + 'Checks links in Markdown files contained in dirs.' errors = 0 for dir_name in dirs: print(f'----- {dir_name} -----')