2020-08-19 08:58:43 -07:00
package cosmovisor
import (
2021-08-05 13:00:08 -07:00
"encoding/json"
2020-09-11 02:42:11 -07:00
"fmt"
2020-08-19 08:58:43 -07:00
"io"
2020-11-13 06:54:30 -08:00
"os"
2020-08-19 08:58:43 -07:00
"os/exec"
2020-11-13 06:54:30 -08:00
"os/signal"
2021-08-05 13:00:08 -07:00
"path/filepath"
2021-09-07 00:18:58 -07:00
"strconv"
2020-08-19 08:58:43 -07:00
"strings"
2020-11-13 06:54:30 -08:00
"syscall"
2021-08-05 13:00:08 -07:00
"time"
"github.com/otiai10/copy"
2021-11-09 06:15:43 -08:00
upgradetypes "github.com/cosmos/cosmos-sdk/x/upgrade/types"
2020-08-19 08:58:43 -07:00
)
2021-08-11 08:03:48 -07:00
type Launcher struct {
cfg * Config
fw * fileWatcher
}
func NewLauncher ( cfg * Config ) ( Launcher , error ) {
fw , err := newUpgradeFileWatcher ( cfg . UpgradeInfoFilePath ( ) , cfg . PollInterval )
return Launcher { cfg , fw } , err
}
// Run launches the app in a subprocess and returns when the subprocess (app)
// exits (either when it dies, or *after* a successful upgrade.) and upgrade finished.
// Returns true if the upgrade request was detected and the upgrade process started.
func ( l Launcher ) Run ( args [ ] string , stdout , stderr io . Writer ) ( bool , error ) {
bin , err := l . cfg . CurrentBin ( )
2020-08-19 08:58:43 -07:00
if err != nil {
2020-09-11 02:42:11 -07:00
return false , fmt . Errorf ( "error creating symlink to genesis: %w" , err )
2020-08-19 08:58:43 -07:00
}
2020-09-11 02:42:11 -07:00
if err := EnsureBinary ( bin ) ; err != nil {
2021-08-11 08:03:48 -07:00
return false , fmt . Errorf ( "current binary is invalid: %w" , err )
2020-08-19 08:58:43 -07:00
}
2021-09-28 03:26:54 -07:00
Logger . Info ( ) . Str ( "path" , bin ) . Strs ( "args" , args ) . Msg ( "running app" )
2020-08-19 08:58:43 -07:00
cmd := exec . Command ( bin , args ... )
2021-08-11 08:03:48 -07:00
cmd . Stdout = stdout
cmd . Stderr = stderr
2020-09-11 02:42:11 -07:00
if err := cmd . Start ( ) ; err != nil {
2021-08-11 08:03:48 -07:00
return false , fmt . Errorf ( "launching process %s %s failed: %w" , bin , strings . Join ( args , " " ) , err )
2020-08-19 08:58:43 -07:00
}
2020-11-13 06:54:30 -08:00
sigs := make ( chan os . Signal , 1 )
signal . Notify ( sigs , syscall . SIGQUIT , syscall . SIGTERM )
go func ( ) {
sig := <- sigs
if err := cmd . Process . Signal ( sig ) ; err != nil {
2021-09-28 03:26:54 -07:00
Logger . Fatal ( ) . Err ( err ) . Str ( "bin" , bin ) . Msg ( "terminated" )
2020-11-13 06:54:30 -08:00
}
} ( )
2021-08-11 08:03:48 -07:00
needsUpdate , err := l . WaitForUpgradeOrExit ( cmd )
if err != nil || ! needsUpdate {
2020-08-19 08:58:43 -07:00
return false , err
}
2021-08-11 08:03:48 -07:00
2021-09-23 08:31:10 -07:00
if ! IsSkipUpgradeHeight ( args , l . fw . currentInfo ) {
if err := doBackup ( l . cfg ) ; err != nil {
return false , err
}
if err = doPreUpgrade ( l . cfg ) ; err != nil {
2021-09-07 00:18:58 -07:00
return false , err
}
}
2021-08-11 08:03:48 -07:00
return true , DoUpgrade ( l . cfg , l . fw . currentInfo )
}
// WaitForUpgradeOrExit checks upgrade plan file created by the app.
// When it returns, the process (app) is finished.
//
// It returns (true, nil) if an upgrade should be initiated (and we killed the process)
// It returns (false, err) if the process died by itself, or there was an issue reading the upgrade-info file.
// It returns (false, nil) if the process exited normally without triggering an upgrade. This is very unlikely
// to happened with "start" but may happened with short-lived commands like `gaiad export ...`
func ( l Launcher ) WaitForUpgradeOrExit ( cmd * exec . Cmd ) ( bool , error ) {
currentUpgrade := l . cfg . UpgradeInfo ( )
var cmdDone = make ( chan error )
go func ( ) {
cmdDone <- cmd . Wait ( )
} ( )
2020-09-11 02:42:11 -07:00
2021-08-11 08:03:48 -07:00
select {
case <- l . fw . MonitorUpdate ( currentUpgrade ) :
// upgrade - kill the process and restart
2021-09-28 03:26:54 -07:00
Logger . Info ( ) . Msg ( "Daemon shutting down in an attempt to restart" )
2021-08-11 08:03:48 -07:00
_ = cmd . Process . Kill ( )
case err := <- cmdDone :
l . fw . Stop ( )
// no error -> command exits normally (eg. short command like `gaiad version`)
if err == nil {
return false , nil
}
// the app x/upgrade causes a panic and the app can die before the filwatcher finds the
// update, so we need to recheck update-info file.
if ! l . fw . CheckUpdate ( currentUpgrade ) {
2021-08-05 13:00:08 -07:00
return false , err
}
2020-08-19 08:58:43 -07:00
}
2021-08-11 08:03:48 -07:00
return true , nil
2020-08-19 08:58:43 -07:00
}
2021-08-05 13:00:08 -07:00
func doBackup ( cfg * Config ) error {
// take backup if `UNSAFE_SKIP_BACKUP` is not set.
if ! cfg . UnsafeSkipBackup {
// check if upgrade-info.json is not empty.
2021-11-09 06:15:43 -08:00
var uInfo upgradetypes . Plan
2021-10-13 00:38:22 -07:00
upgradeInfoFile , err := os . ReadFile ( filepath . Join ( cfg . Home , "data" , "upgrade-info.json" ) )
2021-08-05 13:00:08 -07:00
if err != nil {
return fmt . Errorf ( "error while reading upgrade-info.json: %w" , err )
}
err = json . Unmarshal ( upgradeInfoFile , & uInfo )
if err != nil {
return err
}
if uInfo . Name == "" {
return fmt . Errorf ( "upgrade-info.json is empty" )
}
// a destination directory, Format YYYY-MM-DD
st := time . Now ( )
stStr := fmt . Sprintf ( "%d-%d-%d" , st . Year ( ) , st . Month ( ) , st . Day ( ) )
2022-01-04 06:25:45 -08:00
dst := filepath . Join ( cfg . DataBackupPath , fmt . Sprintf ( "data" + "-backup-%s" , stStr ) )
2021-08-05 13:00:08 -07:00
2021-09-28 03:26:54 -07:00
Logger . Info ( ) . Time ( "backup start time" , st ) . Msg ( "starting to take backup of data directory" )
2021-08-05 13:00:08 -07:00
// copy the $DAEMON_HOME/data to a backup dir
err = copy . Copy ( filepath . Join ( cfg . Home , "data" ) , dst )
if err != nil {
return fmt . Errorf ( "error while taking data backup: %w" , err )
}
// backup is done, lets check endtime to calculate total time taken for backup process
et := time . Now ( )
2021-09-28 03:26:54 -07:00
Logger . Info ( ) . Str ( "backup saved at" , dst ) . Time ( "backup completion time" , et ) . TimeDiff ( "time taken to complete backup" , et , st ) . Msg ( "backup completed" )
2021-08-05 13:00:08 -07:00
}
return nil
}
2021-09-07 00:18:58 -07:00
2021-09-23 08:31:10 -07:00
// doPreUpgrade runs the pre-upgrade command defined by the application and handles respective error codes
// cfg contains the cosmovisor config from env var
2021-09-07 00:18:58 -07:00
func doPreUpgrade ( cfg * Config ) error {
2021-09-23 08:31:10 -07:00
counter := 0
for {
if counter > cfg . PreupgradeMaxRetries {
return fmt . Errorf ( "pre-upgrade command failed. reached max attempt of retries - %d" , cfg . PreupgradeMaxRetries )
}
2021-09-07 00:18:58 -07:00
2021-09-23 08:31:10 -07:00
err := executePreUpgradeCmd ( cfg )
counter += 1
2021-09-07 00:18:58 -07:00
2021-09-23 08:31:10 -07:00
if err != nil {
if err . ( * exec . ExitError ) . ProcessState . ExitCode ( ) == 1 {
2021-09-28 03:26:54 -07:00
Logger . Info ( ) . Msg ( "pre-upgrade command does not exist. continuing the upgrade." )
2021-09-23 08:31:10 -07:00
return nil
}
if err . ( * exec . ExitError ) . ProcessState . ExitCode ( ) == 30 {
return fmt . Errorf ( "pre-upgrade command failed : %w" , err )
}
if err . ( * exec . ExitError ) . ProcessState . ExitCode ( ) == 31 {
2021-09-28 03:26:54 -07:00
Logger . Error ( ) . Err ( err ) . Int ( "attempt" , counter ) . Msg ( "pre-upgrade command failed. retrying" )
2021-09-23 08:31:10 -07:00
continue
}
2021-09-07 00:18:58 -07:00
}
2021-09-23 08:31:10 -07:00
fmt . Println ( "pre-upgrade successful. continuing the upgrade." )
return nil
2021-09-07 00:18:58 -07:00
}
}
2021-09-23 08:31:10 -07:00
// executePreUpgradeCmd runs the pre-upgrade command defined by the application
// cfg contains the cosmosvisor config from the env vars
func executePreUpgradeCmd ( cfg * Config ) error {
bin , err := cfg . CurrentBin ( )
if err != nil {
return err
}
preUpgradeCmd := exec . Command ( bin , "pre-upgrade" )
_ , err = preUpgradeCmd . Output ( )
return err
}
// IsSkipUpgradeHeight checks if pre-upgrade script must be run. If the height in the upgrade plan matches any of the heights provided in --safe-skip-upgrade, the script is not run
2021-11-09 06:15:43 -08:00
func IsSkipUpgradeHeight ( args [ ] string , upgradeInfo upgradetypes . Plan ) bool {
2021-09-07 00:18:58 -07:00
skipUpgradeHeights := UpgradeSkipHeights ( args )
for _ , h := range skipUpgradeHeights {
if h == int ( upgradeInfo . Height ) {
return true
}
}
return false
}
// UpgradeSkipHeights gets all the heights provided when
// simd start --unsafe-skip-upgrades <height1> <optional_height_2> ... <optional_height_N>
func UpgradeSkipHeights ( args [ ] string ) [ ] int {
var heights [ ] int
for i , arg := range args {
if arg == "--unsafe-skip-upgrades" {
j := i + 1
for j < len ( args ) {
tArg := args [ j ]
if strings . HasPrefix ( tArg , "-" ) {
break
}
h , err := strconv . Atoi ( tArg )
if err == nil {
heights = append ( heights , h )
}
j ++
}
break
}
}
return heights
}