feat: adding `max-retries` limit to preupgrade retry limit (#10137)

<!--
The default pull request template is for types feat, fix, or refactor.
For other templates, add one of the following parameters to the url:
- template=docs.md
- template=other.md
-->

## Description

Ref: #9973, #10056 

<!-- Add a description of the changes that this PR introduces and the files that
are the most critical to review. -->

* Skipping backup when skipping upgrade at specified height
* adding env var `PREUPGRADE_MAX_RETRIES` which specifies the limit on how many times to retry preupgrade on error code `31`

---

### Author Checklist

*All items are required. Please add a note to the item if the item is not applicable and
please add links to any relevant follow up issues.*

I have...

- [ ] included the correct [type prefix](https://github.com/commitizen/conventional-commit-types/blob/v3.0.0/index.json) in the PR title
- [ ] added `!` to the type prefix if API or client breaking change
- [ ] targeted the correct branch (see [PR Targeting](https://github.com/cosmos/cosmos-sdk/blob/master/CONTRIBUTING.md#pr-targeting))
- [ ] provided a link to the relevant issue or specification
- [ ] followed the guidelines for [building modules](https://github.com/cosmos/cosmos-sdk/blob/master/docs/building-modules)
- [ ] included the necessary unit and integration [tests](https://github.com/cosmos/cosmos-sdk/blob/master/CONTRIBUTING.md#testing)
- [ ] added a changelog entry to `CHANGELOG.md`
- [ ] included comments for [documenting Go code](https://blog.golang.org/godoc)
- [ ] updated the relevant documentation or specification
- [ ] reviewed "Files changed" and left comments if necessary
- [ ] confirmed all CI checks have passed

### Reviewers Checklist

*All items are required. Please add a note if the item is not applicable and please add
your handle next to the items reviewed if you only reviewed selected items.*

I have...

- [ ] confirmed the correct [type prefix](https://github.com/commitizen/conventional-commit-types/blob/v3.0.0/index.json) in the PR title
- [ ] confirmed `!` in the type prefix if API or client breaking change
- [ ] confirmed all author checklist items have been addressed 
- [ ] reviewed state machine logic
- [ ] reviewed API design and naming
- [ ] reviewed documentation is accurate
- [ ] reviewed tests and test coverage
- [ ] manually tested (if applicable)
This commit is contained in:
Spoorthi Satheesha 2021-09-23 21:01:10 +05:30 committed by GitHub
parent 0ad9b4e4fe
commit f2652874e6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 64 additions and 33 deletions

View File

@ -41,7 +41,8 @@ Ref: https://keepachangelog.com/en/1.0.0/
+ [\#8590](https://github.com/cosmos/cosmos-sdk/pull/8590) File watcher for cosmovisor. Instead of parsing logs from stdin and stderr, we watch the `<DAEMON_HOME>/data/upgrade-info.json` file updates using polling mechanism.
+ [\#10128](https://github.com/cosmos/cosmos-sdk/pull/10128) Change default value of `DAEMON_RESTART_AFTER_UPGRADE` to `true`.
+ [\#9999](https://github.com/cosmos/cosmos-sdk/issues/9999) Added `version` command that returns the cosmovisor version and the application version.
+ [\#9999](https://github.com/cosmos/cosmos-sdk/pull/10103) Added `version` command that returns the cosmovisor version and the application version.
+ [\#9973](https://github.com/cosmos/cosmos-sdk/pull/10056) Added support for pre-upgrade command in Cosmovisor to be called before the binary is upgraded. Added new environmental variable `DAEMON_PREUPGRADE_MAX_RETRIES` that holds the maximum number of times to reattempt pre-upgrade before failing.
### Improvements

View File

@ -43,6 +43,8 @@ All arguments passed to `cosmovisor` will be passed to the application binary (a
* `DAEMON_RESTART_AFTER_UPGRADE` (*optional*, default = `true`), if `true`, restarts the subprocess with the same command-line arguments and flags (but with the new binary) after a successful upgrade. Otherwise (`false`), `cosmovisor` stops running after an upgrade and requires the system administrator to manually restart it. Note restart is only after the upgrade and does not auto-restart the subprocess after an error occurs.
* `DAEMON_POLL_INTERVAL` is the interval length in milliseconds for polling the upgrade plan file. Default: 300.
* `UNSAFE_SKIP_BACKUP` (defaults to `false`), if set to `false`, backs up the data before trying the upgrade. Otherwise (`true`), upgrades directly without performing a backup. The default value of false is useful and recommended in case of failures and when a backup needed to rollback. We recommend using the default backup option `UNSAFE_SKIP_BACKUP=false`.
* `DAEMON_PREUPGRADE_MAX_RETRIES` (defaults to `0`). The maximum number of times to call `pre-upgrade` in the application after exit status of `31`. After the maximum number of retries, cosmovisor fails the upgrade.
### Folder Layout

View File

@ -15,12 +15,13 @@ import (
// environment variable names
const (
envHome = "DAEMON_HOME"
envName = "DAEMON_NAME"
envDownloadBin = "DAEMON_ALLOW_DOWNLOAD_BINARIES"
envRestartUpgrade = "DAEMON_RESTART_AFTER_UPGRADE"
envSkipBackup = "UNSAFE_SKIP_BACKUP"
envInterval = "DAEMON_POLL_INTERVAL"
envHome = "DAEMON_HOME"
envName = "DAEMON_NAME"
envDownloadBin = "DAEMON_ALLOW_DOWNLOAD_BINARIES"
envRestartUpgrade = "DAEMON_RESTART_AFTER_UPGRADE"
envSkipBackup = "UNSAFE_SKIP_BACKUP"
envInterval = "DAEMON_POLL_INTERVAL"
envPreupgradeMaxRetries = "DAEMON_PREUPGRADE_MAX_RETRIES"
)
const (
@ -42,6 +43,7 @@ type Config struct {
RestartAfterUpgrade bool
PollInterval time.Duration
UnsafeSkipBackup bool
PreupgradeMaxRetries int
// currently running upgrade
currentUpgrade UpgradeInfo
@ -146,6 +148,12 @@ func GetConfigFromEnv() (*Config, error) {
if err := cfg.validate(); err != nil {
return nil, err
}
envPreupgradeMaxRetriesVal := os.Getenv(envPreupgradeMaxRetries)
if cfg.PreupgradeMaxRetries, err = strconv.Atoi(envPreupgradeMaxRetriesVal); err != nil && envPreupgradeMaxRetriesVal != "" {
return nil, fmt.Errorf("%s could not be parsed to int: %w", envPreupgradeMaxRetries, err)
}
return cfg, nil
}

View File

@ -61,13 +61,13 @@ func (l Launcher) Run(args []string, stdout, stderr io.Writer) (bool, error) {
if err != nil || !needsUpdate {
return false, err
}
if err := doBackup(l.cfg); err != nil {
return false, err
}
if !SkipUpgrade(args, l.fw.currentInfo) {
err = doPreUpgrade(l.cfg)
if err != nil {
if !IsSkipUpgradeHeight(args, l.fw.currentInfo) {
if err := doBackup(l.cfg); err != nil {
return false, err
}
if err = doPreUpgrade(l.cfg); err != nil {
return false, err
}
}
@ -152,32 +152,52 @@ func doBackup(cfg *Config) error {
return nil
}
// doPreUpgrade runs the pre-upgrade command defined by the application
// doPreUpgrade runs the pre-upgrade command defined by the application and handles respective error codes
// cfg contains the cosmovisor config from env var
func doPreUpgrade(cfg *Config) error {
bin, err := cfg.CurrentBin()
preUpgradeCmd := exec.Command(bin, "pre-upgrade")
counter := 0
for {
if counter > cfg.PreupgradeMaxRetries {
return fmt.Errorf("pre-upgrade command failed. reached max attempt of retries - %d", cfg.PreupgradeMaxRetries)
}
_, err = preUpgradeCmd.Output()
err := executePreUpgradeCmd(cfg)
counter += 1
if err != nil {
if err.(*exec.ExitError).ProcessState.ExitCode() == 1 {
fmt.Println("pre-upgrade command does not exist. continuing the upgrade.")
return nil
}
if err.(*exec.ExitError).ProcessState.ExitCode() == 30 {
return fmt.Errorf("pre-upgrade command failed : %w", err)
}
if err.(*exec.ExitError).ProcessState.ExitCode() == 31 {
fmt.Println("pre-upgrade command failed. retrying.")
return doPreUpgrade(cfg)
if err != nil {
if err.(*exec.ExitError).ProcessState.ExitCode() == 1 {
fmt.Println("pre-upgrade command does not exist. continuing the upgrade.")
return nil
}
if err.(*exec.ExitError).ProcessState.ExitCode() == 30 {
return fmt.Errorf("pre-upgrade command failed : %w", err)
}
if err.(*exec.ExitError).ProcessState.ExitCode() == 31 {
fmt.Println("pre-upgrade command failed. retrying. attempt:", counter)
fmt.Println(err)
continue
}
}
fmt.Println("pre-upgrade successful. continuing the upgrade.")
return nil
}
fmt.Println("pre-upgrade successful. continuing the upgrade.")
return nil
}
// skipUpgrade checks if pre-upgrade script must be run. If the height in the upgrade plan matches any of the heights provided in --safe-skip-upgrade, the script is not run
func SkipUpgrade(args []string, upgradeInfo UpgradeInfo) bool {
// executePreUpgradeCmd runs the pre-upgrade command defined by the application
// cfg contains the cosmosvisor config from the env vars
func executePreUpgradeCmd(cfg *Config) error {
bin, err := cfg.CurrentBin()
if err != nil {
return err
}
preUpgradeCmd := exec.Command(bin, "pre-upgrade")
_, err = preUpgradeCmd.Output()
return err
}
// IsSkipUpgradeHeight checks if pre-upgrade script must be run. If the height in the upgrade plan matches any of the heights provided in --safe-skip-upgrade, the script is not run
func IsSkipUpgradeHeight(args []string, upgradeInfo UpgradeInfo) bool {
skipUpgradeHeights := UpgradeSkipHeights(args)
for _, h := range skipUpgradeHeights {
if h == int(upgradeInfo.Height) {

View File

@ -160,7 +160,7 @@ func TestSkipUpgrade(t *testing.T) {
for i := range cases {
tc := cases[i]
require := require.New(t)
h := cosmovisor.SkipUpgrade(tc.args, tc.upgradeInfo)
h := cosmovisor.IsSkipUpgradeHeight(tc.args, tc.upgradeInfo)
require.Equal(h, tc.expectRes)
}
}