Node: RunWithScissors should not hang on error if no listener (#2649)

* Node: RunwithScissors should not hang

* Tests should check error text

* Fix data race in tests

* Add comments to explain the tests.
This commit is contained in:
bruce-riley 2023-04-18 09:05:20 -05:00 committed by GitHub
parent 8a866c3c1d
commit c8e18ba72c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 152 additions and 9 deletions

View File

@ -10,38 +10,56 @@ import (
)
var (
ScissorsErrors = promauto.NewCounterVec(
ScissorsErrorsCaught = promauto.NewCounterVec(
prometheus.CounterOpts{
Name: "scissor_errors_caught",
Help: "Total number of unhandled errors caught",
}, []string{"scissors", "name"})
}, []string{"name"})
ScissorsPanicsCaught = promauto.NewCounterVec(
prometheus.CounterOpts{
Name: "scissor_panics_caught",
Help: "Total number of panics caught",
}, []string{"name"})
)
// Start a go routine with recovering from any panic by sending an error to a error channel
func RunWithScissors(ctx context.Context, errC chan error, name string, runnable supervisor.Runnable) {
ScissorsErrors.WithLabelValues("scissors", name).Add(0)
ScissorsErrorsCaught.WithLabelValues(name).Add(0)
ScissorsPanicsCaught.WithLabelValues(name).Add(0)
go func() {
defer func() {
if r := recover(); r != nil {
var err error
switch x := r.(type) {
case error:
errC <- fmt.Errorf("%s: %w", name, x)
err = fmt.Errorf("%s: %w", name, x)
default:
errC <- fmt.Errorf("%s: %v", name, x)
err = fmt.Errorf("%s: %v", name, x)
}
ScissorsErrors.WithLabelValues("scissors", name).Inc()
// We don't want this to hang if the listener has already gone away.
select {
case errC <- err:
default:
}
ScissorsPanicsCaught.WithLabelValues(name).Inc()
}
}()
err := runnable(ctx)
if err != nil {
errC <- err
// We don't want this to hang if the listener has already gone away.
select {
case errC <- err:
default:
}
ScissorsErrorsCaught.WithLabelValues(name).Inc()
}
}()
}
func WrapWithScissors(runnable supervisor.Runnable, name string) supervisor.Runnable {
ScissorsErrors.WithLabelValues("scissors", name).Add(0)
ScissorsErrorsCaught.WithLabelValues(name).Add(0)
ScissorsPanicsCaught.WithLabelValues(name).Add(0)
return func(ctx context.Context) (result error) {
defer func() {
if r := recover(); r != nil {
@ -51,7 +69,7 @@ func WrapWithScissors(runnable supervisor.Runnable, name string) supervisor.Runn
default:
result = fmt.Errorf("%s: %v", name, x)
}
ScissorsErrors.WithLabelValues("scissors", name).Inc()
ScissorsPanicsCaught.WithLabelValues(name).Inc()
}
}()

View File

@ -5,10 +5,23 @@ import (
"errors"
"fmt"
"testing"
"time"
"github.com/stretchr/testify/assert"
"github.com/test-go/testify/require"
"github.com/prometheus/client_golang/prometheus"
dto "github.com/prometheus/client_model/go"
)
func getCounterValue(metric *prometheus.CounterVec, runnableName string) float64 {
var m = &dto.Metric{}
if err := metric.WithLabelValues(runnableName).Write(m); err != nil {
return 0
}
return m.Counter.GetValue()
}
func throwNil(ctx context.Context) error {
var x *int = nil
*x = 5
@ -78,3 +91,115 @@ func TestSupervisor(t *testing.T) {
)
}
}
func TestRunWithScissorsCleanExit(t *testing.T) {
ctx := context.Background()
errC := make(chan error)
itRan := make(chan bool, 1)
RunWithScissors(ctx, errC, "TestRunWithScissorsCleanExit", func(ctx context.Context) error {
itRan <- true
return nil
})
shouldHaveRun := <-itRan
require.Equal(t, true, shouldHaveRun)
// Need to wait a bit to make sure the scissors code completes without hanging.
time.Sleep(100 * time.Millisecond)
assert.Equal(t, 0.0, getCounterValue(ScissorsErrorsCaught, "TestRunWithScissorsCleanExit"))
assert.Equal(t, 0.0, getCounterValue(ScissorsPanicsCaught, "TestRunWithScissorsCleanExit"))
}
func TestRunWithScissorsPanicReturned(t *testing.T) {
ctx := context.Background()
errC := make(chan error)
itRan := make(chan bool, 1)
RunWithScissors(ctx, errC, "TestRunWithScissorsPanicReturned", func(ctx context.Context) error {
itRan <- true
panic("Some random panic")
})
var err error
select {
case <-ctx.Done():
break
case err = <-errC:
break
}
shouldHaveRun := <-itRan
require.Equal(t, true, shouldHaveRun)
assert.Error(t, err)
assert.Equal(t, "TestRunWithScissorsPanicReturned: Some random panic", err.Error())
assert.Equal(t, 0.0, getCounterValue(ScissorsErrorsCaught, "TestRunWithScissorsPanicReturned"))
assert.Equal(t, 1.0, getCounterValue(ScissorsPanicsCaught, "TestRunWithScissorsPanicReturned"))
}
func TestRunWithScissorsPanicDoesNotBlockWhenNoListener(t *testing.T) {
ctx := context.Background()
errC := make(chan error)
itRan := make(chan bool, 1)
RunWithScissors(ctx, errC, "TestRunWithScissorsPanicDoesNotBlockWhenNoListener", func(ctx context.Context) error {
itRan <- true
panic("Some random panic")
})
shouldHaveRun := <-itRan
require.Equal(t, true, shouldHaveRun)
// Need to wait a bit to make sure the scissors code completes without hanging.
time.Sleep(100 * time.Millisecond)
assert.Equal(t, 0.0, getCounterValue(ScissorsErrorsCaught, "TestRunWithScissorsPanicDoesNotBlockWhenNoListener"))
assert.Equal(t, 1.0, getCounterValue(ScissorsPanicsCaught, "TestRunWithScissorsPanicDoesNotBlockWhenNoListener"))
}
func TestRunWithScissorsErrorReturned(t *testing.T) {
ctx := context.Background()
errC := make(chan error)
itRan := make(chan bool, 1)
RunWithScissors(ctx, errC, "TestRunWithScissorsErrorReturned", func(ctx context.Context) error {
itRan <- true
return fmt.Errorf("Some random error")
})
var err error
select {
case <-ctx.Done():
break
case err = <-errC:
break
}
shouldHaveRun := <-itRan
require.Equal(t, true, shouldHaveRun)
assert.Error(t, err)
assert.Equal(t, "Some random error", err.Error())
assert.Equal(t, 1.0, getCounterValue(ScissorsErrorsCaught, "TestRunWithScissorsErrorReturned"))
assert.Equal(t, 0.0, getCounterValue(ScissorsPanicsCaught, "TestRunWithScissorsErrorReturned"))
}
func TestRunWithScissorsErrorDoesNotBlockWhenNoListener(t *testing.T) {
ctx := context.Background()
errC := make(chan error)
itRan := make(chan bool, 1)
RunWithScissors(ctx, errC, "TestRunWithScissorsErrorDoesNotBlockWhenNoListener", func(ctx context.Context) error {
itRan <- true
return fmt.Errorf("Some random error")
})
shouldHaveRun := <-itRan
require.Equal(t, true, shouldHaveRun)
// Need to wait a bit to make sure the scissors code completes without hanging.
time.Sleep(100 * time.Millisecond)
assert.Equal(t, 1.0, getCounterValue(ScissorsErrorsCaught, "TestRunWithScissorsErrorDoesNotBlockWhenNoListener"))
assert.Equal(t, 0.0, getCounterValue(ScissorsPanicsCaught, "TestRunWithScissorsErrorDoesNotBlockWhenNoListener"))
}