feat: Add backup option for cosmovisor (#9652)
<!-- The default pull request template is for types feat, fix, or refactor. For other templates, add one of the following parameters to the url: - template=docs.md - template=other.md --> ## Description Ref: https://github.com/cosmos/cosmos-sdk/issues/9616#issuecomment-873051972 depends: #8590 <!-- Add a description of the changes that this PR introduces and the files that are the most critical to review. --> This PR adds a full backup option for cosmovisor. `UNSAFE_SKIP_BACKUP` is an `env` setting introduced newly. - if `false` (default, **recommended**), cosmovisor will try to take backup and then upgrade. In case of failure while taking backup, it will just halt the process there and won't try the upgrade. - If `true`, the cosmovisor will try to upgrade without any backup. This setting makes it hard to recover from a failed upgrade. Node operators either need to sync from a healthy node or use a snapshot from others. --- ### Author Checklist *All items are required. Please add a note to the item if the item is not applicable and please add links to any relevant follow up issues.* I have... - [x] included the correct [type prefix](https://github.com/commitizen/conventional-commit-types/blob/v3.0.0/index.json) in the PR title - [ ] added `!` to the type prefix if API or client breaking change - [ ] targeted the correct branch (see [PR Targeting](https://github.com/cosmos/cosmos-sdk/blob/master/CONTRIBUTING.md#pr-targeting)) - [x] provided a link to the relevant issue or specification - [ ] followed the guidelines for [building modules](https://github.com/cosmos/cosmos-sdk/blob/master/docs/building-modules) - [ ] included the necessary unit and integration [tests](https://github.com/cosmos/cosmos-sdk/blob/master/CONTRIBUTING.md#testing) - [ ] added a changelog entry to `CHANGELOG.md` - [x] included comments for [documenting Go code](https://blog.golang.org/godoc) - [x] updated the relevant documentation or specification - [x] reviewed "Files changed" and left comments if necessary - [ ] confirmed all CI checks have passed ### Reviewers Checklist *All items are required. Please add a note if the item is not applicable and please add your handle next to the items reviewed if you only reviewed selected items.* I have... - [ ] confirmed the correct [type prefix](https://github.com/commitizen/conventional-commit-types/blob/v3.0.0/index.json) in the PR title - [ ] confirmed `!` in the type prefix if API or client breaking change - [ ] confirmed all author checklist items have been addressed - [ ] reviewed state machine logic - [ ] reviewed API design and naming - [ ] reviewed documentation is accurate - [ ] reviewed tests and test coverage - [ ] manually tested (if applicable)
This commit is contained in:
parent
eb79dd022f
commit
5a47154f6c
|
@ -22,6 +22,7 @@ All arguments passed to `cosmovisor` will be passed to the application binary (a
|
|||
* `DAEMON_NAME` is the name of the binary itself (e.g. `gaiad`, `regend`, `simd`, etc.).
|
||||
* `DAEMON_ALLOW_DOWNLOAD_BINARIES` (*optional*), if set to `true`, will enable auto-downloading of new binaries (for security reasons, this is intended for full nodes rather than validators). By default, `cosmovisor` will not auto-download new binaries.
|
||||
* `DAEMON_RESTART_AFTER_UPGRADE` (*optional*), if set to `true`, will restart the subprocess with the same command-line arguments and flags (but with the new binary) after a successful upgrade. By default, `cosmovisor` stops running after an upgrade and requires the system administrator to manually restart it. Note that `cosmovisor` will not auto-restart the subprocess if there was an error.
|
||||
* `UNSAFE_SKIP_BACKUP` (defaults to `false`), if set to `false`, will backup the data before trying the upgrade. Otherwise it will upgrade directly without doing any backup. This is useful (and recommended) in case of failures and when needed to rollback. It is advised to use backup option, i.e., `UNSAFE_SKIP_BACKUP=false`
|
||||
|
||||
## Folder Layout
|
||||
|
||||
|
|
|
@ -24,6 +24,7 @@ type Config struct {
|
|||
AllowDownloadBinaries bool
|
||||
RestartAfterUpgrade bool
|
||||
LogBufferSize int
|
||||
UnsafeSkipBackup bool
|
||||
}
|
||||
|
||||
// Root returns the root directory where all info lives
|
||||
|
@ -113,6 +114,8 @@ func GetConfigFromEnv() (*Config, error) {
|
|||
cfg.LogBufferSize = bufio.MaxScanTokenSize
|
||||
}
|
||||
|
||||
cfg.UnsafeSkipBackup = os.Getenv("UNSAFE_SKIP_BACKUP") == "true"
|
||||
|
||||
if err := cfg.validate(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
|
|
@ -22,6 +22,7 @@ func Run(args []string) error {
|
|||
}
|
||||
|
||||
doUpgrade, err := cosmovisor.LaunchProcess(cfg, args, os.Stdout, os.Stderr)
|
||||
|
||||
// if RestartAfterUpgrade, we launch after a successful upgrade (only condition LaunchProcess returns nil)
|
||||
for cfg.RestartAfterUpgrade && err == nil && doUpgrade {
|
||||
doUpgrade, err = cosmovisor.LaunchProcess(cfg, args, os.Stdout, os.Stderr)
|
||||
|
|
|
@ -2,15 +2,21 @@ package cosmovisor
|
|||
|
||||
import (
|
||||
"bufio"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"os"
|
||||
"os/exec"
|
||||
"os/signal"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/otiai10/copy"
|
||||
)
|
||||
|
||||
// LaunchProcess runs a subprocess and returns when the subprocess exits,
|
||||
|
@ -70,12 +76,59 @@ func LaunchProcess(cfg *Config, args []string, stdout, stderr io.Writer) (bool,
|
|||
}
|
||||
|
||||
if upgradeInfo != nil {
|
||||
if err := doBackup(cfg); err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
return true, DoUpgrade(cfg, upgradeInfo)
|
||||
}
|
||||
|
||||
return false, nil
|
||||
}
|
||||
|
||||
func doBackup(cfg *Config) error {
|
||||
// take backup if `UNSAFE_SKIP_BACKUP` is not set.
|
||||
if !cfg.UnsafeSkipBackup {
|
||||
// check if upgrade-info.json is not empty.
|
||||
var uInfo UpgradeInfo
|
||||
upgradeInfoFile, err := ioutil.ReadFile(filepath.Join(cfg.Home, "data", "upgrade-info.json"))
|
||||
if err != nil {
|
||||
return fmt.Errorf("error while reading upgrade-info.json: %w", err)
|
||||
}
|
||||
|
||||
err = json.Unmarshal(upgradeInfoFile, &uInfo)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if uInfo.Name == "" {
|
||||
return fmt.Errorf("upgrade-info.json is empty")
|
||||
}
|
||||
|
||||
// a destination directory, Format YYYY-MM-DD
|
||||
st := time.Now()
|
||||
stStr := fmt.Sprintf("%d-%d-%d", st.Year(), st.Month(), st.Day())
|
||||
dst := filepath.Join(cfg.Home, fmt.Sprintf("data"+"-backup-%s", stStr))
|
||||
|
||||
fmt.Printf("starting to take backup of data directory at time %s", st)
|
||||
|
||||
// copy the $DAEMON_HOME/data to a backup dir
|
||||
err = copy.Copy(filepath.Join(cfg.Home, "data"), dst)
|
||||
|
||||
if err != nil {
|
||||
return fmt.Errorf("error while taking data backup: %w", err)
|
||||
}
|
||||
|
||||
// backup is done, lets check endtime to calculate total time taken for backup process
|
||||
et := time.Now()
|
||||
timeTaken := et.Sub(st)
|
||||
fmt.Printf("backup saved at location: %s, completed at time: %s\n"+
|
||||
"time taken to complete the backup: %s", dst, et, timeTaken)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// WaitResult is used to wrap feedback on cmd state with some mutex logic.
|
||||
// This is needed as multiple go-routines can affect this - two read pipes that can trigger upgrade
|
||||
// As well as the command, which can fail
|
||||
|
|
|
@ -23,7 +23,7 @@ func TestProcessTestSuite(t *testing.T) {
|
|||
// and args are passed through
|
||||
func (s *processTestSuite) TestLaunchProcess() {
|
||||
home := copyTestData(s.T(), "validate")
|
||||
cfg := &cosmovisor.Config{Home: home, Name: "dummyd"}
|
||||
cfg := &cosmovisor.Config{Home: home, Name: "dummyd", UnsafeSkipBackup: true}
|
||||
|
||||
// should run the genesis binary and produce expected output
|
||||
var stdout, stderr bytes.Buffer
|
||||
|
@ -65,7 +65,7 @@ func (s *processTestSuite) TestLaunchProcessWithDownloads() {
|
|||
// zip_binary -> "chain3" = ref_zipped -> zip_directory
|
||||
// zip_directory no upgrade
|
||||
home := copyTestData(s.T(), "download")
|
||||
cfg := &cosmovisor.Config{Home: home, Name: "autod", AllowDownloadBinaries: true}
|
||||
cfg := &cosmovisor.Config{Home: home, Name: "autod", AllowDownloadBinaries: true, UnsafeSkipBackup: true}
|
||||
|
||||
// should run the genesis binary and produce expected output
|
||||
var stdout, stderr bytes.Buffer
|
||||
|
|
Loading…
Reference in New Issue