Node: RunWithScissors should not hang on error if no listener (#2649)

* Node: RunwithScissors should not hang * Tests should check error text * Fix data race in tests * Add comments to explain the tests.
2023-04-18 09:05:20 -05:00 · 2023-04-18 09:05:20 -05:00 · c8e18ba72c
parent 8a866c3c1d
commit c8e18ba72c
2 changed files with 152 additions and 9 deletions
--- a/node/pkg/common/scissors.go
+++ b/node/pkg/common/scissors.go
@ -10,38 +10,56 @@ import (
 )
 var (
-	ScissorsErrors = promauto.NewCounterVec(
+	ScissorsErrorsCaught = promauto.NewCounterVec(
 		prometheus.CounterOpts{
 			Name: "scissor_errors_caught",
 			Help: "Total number of unhandled errors caught",
-		}, []string{"scissors", "name"})
+		}, []string{"name"})
 	ScissorsPanicsCaught = promauto.NewCounterVec(
 		prometheus.CounterOpts{
 			Name: "scissor_panics_caught",
 			Help: "Total number of panics caught",
 		}, []string{"name"})
 )
 // Start a go routine with recovering from any panic by sending an error to a error channel
 func RunWithScissors(ctx context.Context, errC chan error, name string, runnable supervisor.Runnable) {
-	ScissorsErrors.WithLabelValues("scissors", name).Add(0)
+	ScissorsErrorsCaught.WithLabelValues(name).Add(0)
 	ScissorsPanicsCaught.WithLabelValues(name).Add(0)
 	go func() {
 		defer func() {
 			if r := recover(); r != nil {
 				var err error
 				switch x := r.(type) {
 				case error:
-					errC <- fmt.Errorf("%s: %w", name, x)
+					err = fmt.Errorf("%s: %w", name, x)
 				default:
-					errC <- fmt.Errorf("%s: %v", name, x)
+					err = fmt.Errorf("%s: %v", name, x)
 				}
-				ScissorsErrors.WithLabelValues("scissors", name).Inc()
+				// We don't want this to hang if the listener has already gone away.
 				select {
 				case errC <- err:
 				default:
 				}
 				ScissorsPanicsCaught.WithLabelValues(name).Inc()
 			}
 		}()
 		err := runnable(ctx)
 		if err != nil {
-			errC <- err
+			// We don't want this to hang if the listener has already gone away.
 			select {
 			case errC <- err:
 			default:
 			}
 			ScissorsErrorsCaught.WithLabelValues(name).Inc()
 		}
 	}()
 }
 func WrapWithScissors(runnable supervisor.Runnable, name string) supervisor.Runnable {
-	ScissorsErrors.WithLabelValues("scissors", name).Add(0)
+	ScissorsErrorsCaught.WithLabelValues(name).Add(0)
 	ScissorsPanicsCaught.WithLabelValues(name).Add(0)
 	return func(ctx context.Context) (result error) {
 		defer func() {
 			if r := recover(); r != nil {
@ -51,7 +69,7 @@ func WrapWithScissors(runnable supervisor.Runnable, name string) supervisor.Runn
 				default:
 					result = fmt.Errorf("%s: %v", name, x)
 				}
-				ScissorsErrors.WithLabelValues("scissors", name).Inc()
+				ScissorsPanicsCaught.WithLabelValues(name).Inc()
 			}
 		}()
--- a/node/pkg/common/scissors_test.go
+++ b/node/pkg/common/scissors_test.go
@ -5,10 +5,23 @@ import (
 	"errors"
 	"fmt"
 	"testing"
 	"time"
 	"github.com/stretchr/testify/assert"
 	"github.com/test-go/testify/require"
 	"github.com/prometheus/client_golang/prometheus"
 	dto "github.com/prometheus/client_model/go"
 )
 func getCounterValue(metric *prometheus.CounterVec, runnableName string) float64 {
 	var m = &dto.Metric{}
 	if err := metric.WithLabelValues(runnableName).Write(m); err != nil {
 		return 0
 	}
 	return m.Counter.GetValue()
 }
 func throwNil(ctx context.Context) error {
 	var x *int = nil
 	*x = 5
@ -78,3 +91,115 @@ func TestSupervisor(t *testing.T) {
 		)
 	}
 }
 func TestRunWithScissorsCleanExit(t *testing.T) {
 	ctx := context.Background()
 	errC := make(chan error)
 	itRan := make(chan bool, 1)
 	RunWithScissors(ctx, errC, "TestRunWithScissorsCleanExit", func(ctx context.Context) error {
 		itRan <- true
 		return nil
 	})
 	shouldHaveRun := <-itRan
 	require.Equal(t, true, shouldHaveRun)
 	// Need to wait a bit to make sure the scissors code completes without hanging.
 	time.Sleep(100 * time.Millisecond)
 	assert.Equal(t, 0.0, getCounterValue(ScissorsErrorsCaught, "TestRunWithScissorsCleanExit"))
 	assert.Equal(t, 0.0, getCounterValue(ScissorsPanicsCaught, "TestRunWithScissorsCleanExit"))
 }
 func TestRunWithScissorsPanicReturned(t *testing.T) {
 	ctx := context.Background()
 	errC := make(chan error)
 	itRan := make(chan bool, 1)
 	RunWithScissors(ctx, errC, "TestRunWithScissorsPanicReturned", func(ctx context.Context) error {
 		itRan <- true
 		panic("Some random panic")
 	})
 	var err error
 	select {
 	case <-ctx.Done():
 		break
 	case err = <-errC:
 		break
 	}
 	shouldHaveRun := <-itRan
 	require.Equal(t, true, shouldHaveRun)
 	assert.Error(t, err)
 	assert.Equal(t, "TestRunWithScissorsPanicReturned: Some random panic", err.Error())
 	assert.Equal(t, 0.0, getCounterValue(ScissorsErrorsCaught, "TestRunWithScissorsPanicReturned"))
 	assert.Equal(t, 1.0, getCounterValue(ScissorsPanicsCaught, "TestRunWithScissorsPanicReturned"))
 }
 func TestRunWithScissorsPanicDoesNotBlockWhenNoListener(t *testing.T) {
 	ctx := context.Background()
 	errC := make(chan error)
 	itRan := make(chan bool, 1)
 	RunWithScissors(ctx, errC, "TestRunWithScissorsPanicDoesNotBlockWhenNoListener", func(ctx context.Context) error {
 		itRan <- true
 		panic("Some random panic")
 	})
 	shouldHaveRun := <-itRan
 	require.Equal(t, true, shouldHaveRun)
 	// Need to wait a bit to make sure the scissors code completes without hanging.
 	time.Sleep(100 * time.Millisecond)
 	assert.Equal(t, 0.0, getCounterValue(ScissorsErrorsCaught, "TestRunWithScissorsPanicDoesNotBlockWhenNoListener"))
 	assert.Equal(t, 1.0, getCounterValue(ScissorsPanicsCaught, "TestRunWithScissorsPanicDoesNotBlockWhenNoListener"))
 }
 func TestRunWithScissorsErrorReturned(t *testing.T) {
 	ctx := context.Background()
 	errC := make(chan error)
 	itRan := make(chan bool, 1)
 	RunWithScissors(ctx, errC, "TestRunWithScissorsErrorReturned", func(ctx context.Context) error {
 		itRan <- true
 		return fmt.Errorf("Some random error")
 	})
 	var err error
 	select {
 	case <-ctx.Done():
 		break
 	case err = <-errC:
 		break
 	}
 	shouldHaveRun := <-itRan
 	require.Equal(t, true, shouldHaveRun)
 	assert.Error(t, err)
 	assert.Equal(t, "Some random error", err.Error())
 	assert.Equal(t, 1.0, getCounterValue(ScissorsErrorsCaught, "TestRunWithScissorsErrorReturned"))
 	assert.Equal(t, 0.0, getCounterValue(ScissorsPanicsCaught, "TestRunWithScissorsErrorReturned"))
 }
 func TestRunWithScissorsErrorDoesNotBlockWhenNoListener(t *testing.T) {
 	ctx := context.Background()
 	errC := make(chan error)
 	itRan := make(chan bool, 1)
 	RunWithScissors(ctx, errC, "TestRunWithScissorsErrorDoesNotBlockWhenNoListener", func(ctx context.Context) error {
 		itRan <- true
 		return fmt.Errorf("Some random error")
 	})
 	shouldHaveRun := <-itRan
 	require.Equal(t, true, shouldHaveRun)
 	// Need to wait a bit to make sure the scissors code completes without hanging.
 	time.Sleep(100 * time.Millisecond)
 	assert.Equal(t, 1.0, getCounterValue(ScissorsErrorsCaught, "TestRunWithScissorsErrorDoesNotBlockWhenNoListener"))
 	assert.Equal(t, 0.0, getCounterValue(ScissorsPanicsCaught, "TestRunWithScissorsErrorDoesNotBlockWhenNoListener"))
 }