diff --git a/node/pkg/common/scissors.go b/node/pkg/common/scissors.go index 7b43542e7..8ea40e68b 100644 --- a/node/pkg/common/scissors.go +++ b/node/pkg/common/scissors.go @@ -10,38 +10,56 @@ import ( ) var ( - ScissorsErrors = promauto.NewCounterVec( + ScissorsErrorsCaught = promauto.NewCounterVec( prometheus.CounterOpts{ Name: "scissor_errors_caught", Help: "Total number of unhandled errors caught", - }, []string{"scissors", "name"}) + }, []string{"name"}) + ScissorsPanicsCaught = promauto.NewCounterVec( + prometheus.CounterOpts{ + Name: "scissor_panics_caught", + Help: "Total number of panics caught", + }, []string{"name"}) ) // Start a go routine with recovering from any panic by sending an error to a error channel func RunWithScissors(ctx context.Context, errC chan error, name string, runnable supervisor.Runnable) { - ScissorsErrors.WithLabelValues("scissors", name).Add(0) + ScissorsErrorsCaught.WithLabelValues(name).Add(0) + ScissorsPanicsCaught.WithLabelValues(name).Add(0) go func() { defer func() { if r := recover(); r != nil { + var err error switch x := r.(type) { case error: - errC <- fmt.Errorf("%s: %w", name, x) + err = fmt.Errorf("%s: %w", name, x) default: - errC <- fmt.Errorf("%s: %v", name, x) + err = fmt.Errorf("%s: %v", name, x) } - ScissorsErrors.WithLabelValues("scissors", name).Inc() + // We don't want this to hang if the listener has already gone away. + select { + case errC <- err: + default: + } + ScissorsPanicsCaught.WithLabelValues(name).Inc() } }() err := runnable(ctx) if err != nil { - errC <- err + // We don't want this to hang if the listener has already gone away. + select { + case errC <- err: + default: + } + ScissorsErrorsCaught.WithLabelValues(name).Inc() } }() } func WrapWithScissors(runnable supervisor.Runnable, name string) supervisor.Runnable { - ScissorsErrors.WithLabelValues("scissors", name).Add(0) + ScissorsErrorsCaught.WithLabelValues(name).Add(0) + ScissorsPanicsCaught.WithLabelValues(name).Add(0) return func(ctx context.Context) (result error) { defer func() { if r := recover(); r != nil { @@ -51,7 +69,7 @@ func WrapWithScissors(runnable supervisor.Runnable, name string) supervisor.Runn default: result = fmt.Errorf("%s: %v", name, x) } - ScissorsErrors.WithLabelValues("scissors", name).Inc() + ScissorsPanicsCaught.WithLabelValues(name).Inc() } }() diff --git a/node/pkg/common/scissors_test.go b/node/pkg/common/scissors_test.go index b19c198cb..94a062d07 100644 --- a/node/pkg/common/scissors_test.go +++ b/node/pkg/common/scissors_test.go @@ -5,10 +5,23 @@ import ( "errors" "fmt" "testing" + "time" "github.com/stretchr/testify/assert" + "github.com/test-go/testify/require" + + "github.com/prometheus/client_golang/prometheus" + dto "github.com/prometheus/client_model/go" ) +func getCounterValue(metric *prometheus.CounterVec, runnableName string) float64 { + var m = &dto.Metric{} + if err := metric.WithLabelValues(runnableName).Write(m); err != nil { + return 0 + } + return m.Counter.GetValue() +} + func throwNil(ctx context.Context) error { var x *int = nil *x = 5 @@ -78,3 +91,115 @@ func TestSupervisor(t *testing.T) { ) } } + +func TestRunWithScissorsCleanExit(t *testing.T) { + ctx := context.Background() + errC := make(chan error) + + itRan := make(chan bool, 1) + RunWithScissors(ctx, errC, "TestRunWithScissorsCleanExit", func(ctx context.Context) error { + itRan <- true + return nil + }) + + shouldHaveRun := <-itRan + require.Equal(t, true, shouldHaveRun) + + // Need to wait a bit to make sure the scissors code completes without hanging. + time.Sleep(100 * time.Millisecond) + + assert.Equal(t, 0.0, getCounterValue(ScissorsErrorsCaught, "TestRunWithScissorsCleanExit")) + assert.Equal(t, 0.0, getCounterValue(ScissorsPanicsCaught, "TestRunWithScissorsCleanExit")) +} + +func TestRunWithScissorsPanicReturned(t *testing.T) { + ctx := context.Background() + errC := make(chan error) + + itRan := make(chan bool, 1) + RunWithScissors(ctx, errC, "TestRunWithScissorsPanicReturned", func(ctx context.Context) error { + itRan <- true + panic("Some random panic") + }) + + var err error + select { + case <-ctx.Done(): + break + case err = <-errC: + break + } + + shouldHaveRun := <-itRan + require.Equal(t, true, shouldHaveRun) + assert.Error(t, err) + assert.Equal(t, "TestRunWithScissorsPanicReturned: Some random panic", err.Error()) + assert.Equal(t, 0.0, getCounterValue(ScissorsErrorsCaught, "TestRunWithScissorsPanicReturned")) + assert.Equal(t, 1.0, getCounterValue(ScissorsPanicsCaught, "TestRunWithScissorsPanicReturned")) +} + +func TestRunWithScissorsPanicDoesNotBlockWhenNoListener(t *testing.T) { + ctx := context.Background() + errC := make(chan error) + + itRan := make(chan bool, 1) + RunWithScissors(ctx, errC, "TestRunWithScissorsPanicDoesNotBlockWhenNoListener", func(ctx context.Context) error { + itRan <- true + panic("Some random panic") + }) + + shouldHaveRun := <-itRan + require.Equal(t, true, shouldHaveRun) + + // Need to wait a bit to make sure the scissors code completes without hanging. + time.Sleep(100 * time.Millisecond) + + assert.Equal(t, 0.0, getCounterValue(ScissorsErrorsCaught, "TestRunWithScissorsPanicDoesNotBlockWhenNoListener")) + assert.Equal(t, 1.0, getCounterValue(ScissorsPanicsCaught, "TestRunWithScissorsPanicDoesNotBlockWhenNoListener")) +} + +func TestRunWithScissorsErrorReturned(t *testing.T) { + ctx := context.Background() + errC := make(chan error) + + itRan := make(chan bool, 1) + RunWithScissors(ctx, errC, "TestRunWithScissorsErrorReturned", func(ctx context.Context) error { + itRan <- true + return fmt.Errorf("Some random error") + }) + + var err error + select { + case <-ctx.Done(): + break + case err = <-errC: + break + } + + shouldHaveRun := <-itRan + require.Equal(t, true, shouldHaveRun) + assert.Error(t, err) + assert.Equal(t, "Some random error", err.Error()) + assert.Equal(t, 1.0, getCounterValue(ScissorsErrorsCaught, "TestRunWithScissorsErrorReturned")) + assert.Equal(t, 0.0, getCounterValue(ScissorsPanicsCaught, "TestRunWithScissorsErrorReturned")) +} + +func TestRunWithScissorsErrorDoesNotBlockWhenNoListener(t *testing.T) { + ctx := context.Background() + errC := make(chan error) + + itRan := make(chan bool, 1) + RunWithScissors(ctx, errC, "TestRunWithScissorsErrorDoesNotBlockWhenNoListener", func(ctx context.Context) error { + itRan <- true + return fmt.Errorf("Some random error") + }) + + shouldHaveRun := <-itRan + require.Equal(t, true, shouldHaveRun) + + // Need to wait a bit to make sure the scissors code completes without hanging. + time.Sleep(100 * time.Millisecond) + + assert.Equal(t, 1.0, getCounterValue(ScissorsErrorsCaught, "TestRunWithScissorsErrorDoesNotBlockWhenNoListener")) + assert.Equal(t, 0.0, getCounterValue(ScissorsPanicsCaught, "TestRunWithScissorsErrorDoesNotBlockWhenNoListener")) +}