feat: adding `max-retries` limit to preupgrade retry limit (#10137)
<!-- The default pull request template is for types feat, fix, or refactor. For other templates, add one of the following parameters to the url: - template=docs.md - template=other.md --> ## Description Ref: #9973, #10056 <!-- Add a description of the changes that this PR introduces and the files that are the most critical to review. --> * Skipping backup when skipping upgrade at specified height * adding env var `PREUPGRADE_MAX_RETRIES` which specifies the limit on how many times to retry preupgrade on error code `31` --- ### Author Checklist *All items are required. Please add a note to the item if the item is not applicable and please add links to any relevant follow up issues.* I have... - [ ] included the correct [type prefix](https://github.com/commitizen/conventional-commit-types/blob/v3.0.0/index.json) in the PR title - [ ] added `!` to the type prefix if API or client breaking change - [ ] targeted the correct branch (see [PR Targeting](https://github.com/cosmos/cosmos-sdk/blob/master/CONTRIBUTING.md#pr-targeting)) - [ ] provided a link to the relevant issue or specification - [ ] followed the guidelines for [building modules](https://github.com/cosmos/cosmos-sdk/blob/master/docs/building-modules) - [ ] included the necessary unit and integration [tests](https://github.com/cosmos/cosmos-sdk/blob/master/CONTRIBUTING.md#testing) - [ ] added a changelog entry to `CHANGELOG.md` - [ ] included comments for [documenting Go code](https://blog.golang.org/godoc) - [ ] updated the relevant documentation or specification - [ ] reviewed "Files changed" and left comments if necessary - [ ] confirmed all CI checks have passed ### Reviewers Checklist *All items are required. Please add a note if the item is not applicable and please add your handle next to the items reviewed if you only reviewed selected items.* I have... - [ ] confirmed the correct [type prefix](https://github.com/commitizen/conventional-commit-types/blob/v3.0.0/index.json) in the PR title - [ ] confirmed `!` in the type prefix if API or client breaking change - [ ] confirmed all author checklist items have been addressed - [ ] reviewed state machine logic - [ ] reviewed API design and naming - [ ] reviewed documentation is accurate - [ ] reviewed tests and test coverage - [ ] manually tested (if applicable)
This commit is contained in:
parent
0ad9b4e4fe
commit
f2652874e6
|
@ -41,7 +41,8 @@ Ref: https://keepachangelog.com/en/1.0.0/
|
|||
|
||||
+ [\#8590](https://github.com/cosmos/cosmos-sdk/pull/8590) File watcher for cosmovisor. Instead of parsing logs from stdin and stderr, we watch the `<DAEMON_HOME>/data/upgrade-info.json` file updates using polling mechanism.
|
||||
+ [\#10128](https://github.com/cosmos/cosmos-sdk/pull/10128) Change default value of `DAEMON_RESTART_AFTER_UPGRADE` to `true`.
|
||||
+ [\#9999](https://github.com/cosmos/cosmos-sdk/issues/9999) Added `version` command that returns the cosmovisor version and the application version.
|
||||
+ [\#9999](https://github.com/cosmos/cosmos-sdk/pull/10103) Added `version` command that returns the cosmovisor version and the application version.
|
||||
+ [\#9973](https://github.com/cosmos/cosmos-sdk/pull/10056) Added support for pre-upgrade command in Cosmovisor to be called before the binary is upgraded. Added new environmental variable `DAEMON_PREUPGRADE_MAX_RETRIES` that holds the maximum number of times to reattempt pre-upgrade before failing.
|
||||
|
||||
|
||||
### Improvements
|
||||
|
|
|
@ -43,6 +43,8 @@ All arguments passed to `cosmovisor` will be passed to the application binary (a
|
|||
* `DAEMON_RESTART_AFTER_UPGRADE` (*optional*, default = `true`), if `true`, restarts the subprocess with the same command-line arguments and flags (but with the new binary) after a successful upgrade. Otherwise (`false`), `cosmovisor` stops running after an upgrade and requires the system administrator to manually restart it. Note restart is only after the upgrade and does not auto-restart the subprocess after an error occurs.
|
||||
* `DAEMON_POLL_INTERVAL` is the interval length in milliseconds for polling the upgrade plan file. Default: 300.
|
||||
* `UNSAFE_SKIP_BACKUP` (defaults to `false`), if set to `false`, backs up the data before trying the upgrade. Otherwise (`true`), upgrades directly without performing a backup. The default value of false is useful and recommended in case of failures and when a backup needed to rollback. We recommend using the default backup option `UNSAFE_SKIP_BACKUP=false`.
|
||||
* `DAEMON_PREUPGRADE_MAX_RETRIES` (defaults to `0`). The maximum number of times to call `pre-upgrade` in the application after exit status of `31`. After the maximum number of retries, cosmovisor fails the upgrade.
|
||||
|
||||
|
||||
### Folder Layout
|
||||
|
||||
|
|
|
@ -15,12 +15,13 @@ import (
|
|||
|
||||
// environment variable names
|
||||
const (
|
||||
envHome = "DAEMON_HOME"
|
||||
envName = "DAEMON_NAME"
|
||||
envDownloadBin = "DAEMON_ALLOW_DOWNLOAD_BINARIES"
|
||||
envRestartUpgrade = "DAEMON_RESTART_AFTER_UPGRADE"
|
||||
envSkipBackup = "UNSAFE_SKIP_BACKUP"
|
||||
envInterval = "DAEMON_POLL_INTERVAL"
|
||||
envHome = "DAEMON_HOME"
|
||||
envName = "DAEMON_NAME"
|
||||
envDownloadBin = "DAEMON_ALLOW_DOWNLOAD_BINARIES"
|
||||
envRestartUpgrade = "DAEMON_RESTART_AFTER_UPGRADE"
|
||||
envSkipBackup = "UNSAFE_SKIP_BACKUP"
|
||||
envInterval = "DAEMON_POLL_INTERVAL"
|
||||
envPreupgradeMaxRetries = "DAEMON_PREUPGRADE_MAX_RETRIES"
|
||||
)
|
||||
|
||||
const (
|
||||
|
@ -42,6 +43,7 @@ type Config struct {
|
|||
RestartAfterUpgrade bool
|
||||
PollInterval time.Duration
|
||||
UnsafeSkipBackup bool
|
||||
PreupgradeMaxRetries int
|
||||
|
||||
// currently running upgrade
|
||||
currentUpgrade UpgradeInfo
|
||||
|
@ -146,6 +148,12 @@ func GetConfigFromEnv() (*Config, error) {
|
|||
if err := cfg.validate(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
envPreupgradeMaxRetriesVal := os.Getenv(envPreupgradeMaxRetries)
|
||||
if cfg.PreupgradeMaxRetries, err = strconv.Atoi(envPreupgradeMaxRetriesVal); err != nil && envPreupgradeMaxRetriesVal != "" {
|
||||
return nil, fmt.Errorf("%s could not be parsed to int: %w", envPreupgradeMaxRetries, err)
|
||||
}
|
||||
|
||||
return cfg, nil
|
||||
}
|
||||
|
||||
|
|
|
@ -61,13 +61,13 @@ func (l Launcher) Run(args []string, stdout, stderr io.Writer) (bool, error) {
|
|||
if err != nil || !needsUpdate {
|
||||
return false, err
|
||||
}
|
||||
if err := doBackup(l.cfg); err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
if !SkipUpgrade(args, l.fw.currentInfo) {
|
||||
err = doPreUpgrade(l.cfg)
|
||||
if err != nil {
|
||||
if !IsSkipUpgradeHeight(args, l.fw.currentInfo) {
|
||||
if err := doBackup(l.cfg); err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
if err = doPreUpgrade(l.cfg); err != nil {
|
||||
return false, err
|
||||
}
|
||||
}
|
||||
|
@ -152,32 +152,52 @@ func doBackup(cfg *Config) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
// doPreUpgrade runs the pre-upgrade command defined by the application
|
||||
// doPreUpgrade runs the pre-upgrade command defined by the application and handles respective error codes
|
||||
// cfg contains the cosmovisor config from env var
|
||||
func doPreUpgrade(cfg *Config) error {
|
||||
bin, err := cfg.CurrentBin()
|
||||
preUpgradeCmd := exec.Command(bin, "pre-upgrade")
|
||||
counter := 0
|
||||
for {
|
||||
if counter > cfg.PreupgradeMaxRetries {
|
||||
return fmt.Errorf("pre-upgrade command failed. reached max attempt of retries - %d", cfg.PreupgradeMaxRetries)
|
||||
}
|
||||
|
||||
_, err = preUpgradeCmd.Output()
|
||||
err := executePreUpgradeCmd(cfg)
|
||||
counter += 1
|
||||
|
||||
if err != nil {
|
||||
if err.(*exec.ExitError).ProcessState.ExitCode() == 1 {
|
||||
fmt.Println("pre-upgrade command does not exist. continuing the upgrade.")
|
||||
return nil
|
||||
}
|
||||
if err.(*exec.ExitError).ProcessState.ExitCode() == 30 {
|
||||
return fmt.Errorf("pre-upgrade command failed : %w", err)
|
||||
}
|
||||
if err.(*exec.ExitError).ProcessState.ExitCode() == 31 {
|
||||
fmt.Println("pre-upgrade command failed. retrying.")
|
||||
return doPreUpgrade(cfg)
|
||||
if err != nil {
|
||||
if err.(*exec.ExitError).ProcessState.ExitCode() == 1 {
|
||||
fmt.Println("pre-upgrade command does not exist. continuing the upgrade.")
|
||||
return nil
|
||||
}
|
||||
if err.(*exec.ExitError).ProcessState.ExitCode() == 30 {
|
||||
return fmt.Errorf("pre-upgrade command failed : %w", err)
|
||||
}
|
||||
if err.(*exec.ExitError).ProcessState.ExitCode() == 31 {
|
||||
fmt.Println("pre-upgrade command failed. retrying. attempt:", counter)
|
||||
fmt.Println(err)
|
||||
continue
|
||||
}
|
||||
}
|
||||
fmt.Println("pre-upgrade successful. continuing the upgrade.")
|
||||
return nil
|
||||
}
|
||||
fmt.Println("pre-upgrade successful. continuing the upgrade.")
|
||||
return nil
|
||||
}
|
||||
|
||||
// skipUpgrade checks if pre-upgrade script must be run. If the height in the upgrade plan matches any of the heights provided in --safe-skip-upgrade, the script is not run
|
||||
func SkipUpgrade(args []string, upgradeInfo UpgradeInfo) bool {
|
||||
// executePreUpgradeCmd runs the pre-upgrade command defined by the application
|
||||
// cfg contains the cosmosvisor config from the env vars
|
||||
func executePreUpgradeCmd(cfg *Config) error {
|
||||
bin, err := cfg.CurrentBin()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
preUpgradeCmd := exec.Command(bin, "pre-upgrade")
|
||||
_, err = preUpgradeCmd.Output()
|
||||
return err
|
||||
}
|
||||
|
||||
// IsSkipUpgradeHeight checks if pre-upgrade script must be run. If the height in the upgrade plan matches any of the heights provided in --safe-skip-upgrade, the script is not run
|
||||
func IsSkipUpgradeHeight(args []string, upgradeInfo UpgradeInfo) bool {
|
||||
skipUpgradeHeights := UpgradeSkipHeights(args)
|
||||
for _, h := range skipUpgradeHeights {
|
||||
if h == int(upgradeInfo.Height) {
|
||||
|
|
|
@ -160,7 +160,7 @@ func TestSkipUpgrade(t *testing.T) {
|
|||
for i := range cases {
|
||||
tc := cases[i]
|
||||
require := require.New(t)
|
||||
h := cosmovisor.SkipUpgrade(tc.args, tc.upgradeInfo)
|
||||
h := cosmovisor.IsSkipUpgradeHeight(tc.args, tc.upgradeInfo)
|
||||
require.Equal(h, tc.expectRes)
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue