From 49907c4537eb9d08a85ae26dbeaaf56438d42edc Mon Sep 17 00:00:00 2001 From: Ivan Kozlovic Date: Mon, 16 Oct 2023 15:23:00 -0600 Subject: [PATCH] [FIXED] Configuration Reload: possible panic if done during Shutdown If a configuration reload is issued as the server is being shutdown, we could get 2 different panics. One due to JetStream if an account is JetStream enabled, and one due to the send to a go channel that has been closed. ``` panic: send on closed channel [recovered] panic: send on closed channel goroutine 440 [running]: testing.tRunner.func1.2({0x1038d58e0, 0x1039e1270}) /usr/local/go/src/testing/testing.go:1545 +0x274 testing.tRunner.func1() /usr/local/go/src/testing/testing.go:1548 +0x448 panic({0x1038d58e0?, 0x1039e1270?}) /usr/local/go/src/runtime/panic.go:920 +0x26c github.com/nats-io/nats-server/v2/server.(*Server).reloadAuthorization(0xc00024fb00) /Users/ik/dev/go/src/github.com/nats-io/nats-server/server/reload.go:1998 +0x788 github.com/nats-io/nats-server/v2/server.(*Server).applyOptions(0xc00024fb00, 0xc00021dc00, {0xc00038e4e0, 0x2, 0xc00021dc28?}) /Users/ik/dev/go/src/github.com/nats-io/nats-server/server/reload.go:1746 +0x2b8 github.com/nats-io/nats-server/v2/server.(*Server).reloadOptions(0xc000293500?, 0xc000118a80, 0xc000293500) /Users/ik/dev/go/src/github.com/nats-io/nats-server/server/reload.go:1121 +0x178 github.com/nats-io/nats-server/v2/server.(*Server).ReloadOptions(0xc00024fb00, 0xc000293500) /Users/ik/dev/go/src/github.com/nats-io/nats-server/server/reload.go:1060 +0x368 github.com/nats-io/nats-server/v2/server.(*Server).Reload(0xc00024fb00) /Users/ik/dev/go/src/github.com/nats-io/nats-server/server/reload.go:995 +0x104 ``` and ``` panic: runtime error: invalid memory address or nil pointer dereference [signal SIGSEGV: segmentation violation code=0x2 addr=0x0 pc=0x10077b224] goroutine 8 [running]: testing.tRunner.func1.2({0x101351640, 0x101b7d2a0}) /usr/local/go/src/testing/testing.go:1545 +0x274 testing.tRunner.func1() /usr/local/go/src/testing/testing.go:1548 +0x448 panic({0x101351640?, 0x101b7d2a0?}) /usr/local/go/src/runtime/panic.go:920 +0x26c github.com/nats-io/nats-server/v2/server.(*Account).EnableJetStream(0xc00020fb80, 0xc000220240) /Users/ivan/dev/go/src/github.com/nats-io/nats-server/server/jetstream.go:1045 +0xa4 github.com/nats-io/nats-server/v2/server.(*Server).configJetStream(0xc000226d80, 0xc00020fb80) /Users/ivan/dev/go/src/github.com/nats-io/nats-server/server/jetstream.go:707 +0xdc github.com/nats-io/nats-server/v2/server.(*Server).configAllJetStreamAccounts(0xc000226d80) /Users/ivan/dev/go/src/github.com/nats-io/nats-server/server/jetstream.go:768 +0x2b0 github.com/nats-io/nats-server/v2/server.(*Server).enableJetStreamAccounts(0xc000226d80?) /Users/ivan/dev/go/src/github.com/nats-io/nats-server/server/jetstream.go:637 +0x128 github.com/nats-io/nats-server/v2/server.(*Server).reloadAuthorization(0xc000226d80) /Users/ivan/dev/go/src/github.com/nats-io/nats-server/server/reload.go:2039 +0x93c github.com/nats-io/nats-server/v2/server.(*Server).applyOptions(0xc000226d80, 0xc000171c50, {0xc000074600, 0x2, 0xc000171c78?}) /Users/ivan/dev/go/src/github.com/nats-io/nats-server/server/reload.go:1746 +0x2b8 github.com/nats-io/nats-server/v2/server.(*Server).reloadOptions(0xc000276000?, 0xc0000a6000, 0xc000276000) /Users/ivan/dev/go/src/github.com/nats-io/nats-server/server/reload.go:1121 +0x178 github.com/nats-io/nats-server/v2/server.(*Server).ReloadOptions(0xc000226d80, 0xc000276000) /Users/ivan/dev/go/src/github.com/nats-io/nats-server/server/reload.go:1060 +0x368 github.com/nats-io/nats-server/v2/server.(*Server).Reload(0xc000226d80) /Users/ivan/dev/go/src/github.com/nats-io/nats-server/server/reload.go:995 +0x104 ``` Signed-off-by: Ivan Kozlovic --- server/events.go | 8 +++++--- server/jetstream.go | 4 ++++ server/reload_test.go | 38 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 47 insertions(+), 3 deletions(-) diff --git a/server/events.go b/server/events.go index 0f761a47..601ed85a 100644 --- a/server/events.go +++ b/server/events.go @@ -1598,14 +1598,16 @@ func (s *Server) shutdownEventing() { s.mu.Lock() clearTimer(&s.sys.sweeper) clearTimer(&s.sys.stmr) - sys := s.sys + rc := s.sys.resetCh + s.sys.resetCh = nil + wg := &s.sys.wg s.mu.Unlock() // We will queue up a shutdown event and wait for the // internal send loop to exit. s.sendShutdownEvent() - sys.wg.Wait() - close(sys.resetCh) + wg.Wait() + close(rc) s.mu.Lock() defer s.mu.Unlock() diff --git a/server/jetstream.go b/server/jetstream.go index 756e75a5..e029f4cd 100644 --- a/server/jetstream.go +++ b/server/jetstream.go @@ -1038,6 +1038,10 @@ func (a *Account) EnableJetStream(limits map[string]JetStreamAccountLimits) erro } s.mu.RLock() + if s.sys == nil { + s.mu.RUnlock() + return ErrServerNotRunning + } sendq := s.sys.sendq s.mu.RUnlock() diff --git a/server/reload_test.go b/server/reload_test.go index 9f0410c7..4fe67a55 100644 --- a/server/reload_test.go +++ b/server/reload_test.go @@ -6100,3 +6100,41 @@ func TestConfigReloadLeafNodeCompressionS2Auto(t *testing.T) { t.Fatalf("Expected compression info to have stayed the same, was %q - %p, got %q - %p", cm, cw, ncm, ncw) } } + +func TestConfigReloadNoPanicOnShutdown(t *testing.T) { + tmpl := ` + port: -1 + jetstream: true + accounts { + A { + users: [{user: A, password: pwd}] + %s + } + B { + users: [{user: B, password: pwd}] + } + } + ` + for i := 0; i < 50; i++ { + conf := createConfFile(t, []byte(fmt.Sprintf(tmpl, _EMPTY_))) + s, _ := RunServerWithConfig(conf) + // Don't use a defer s.Shutdown() here since it would prevent the panic + // to be reported (but the test would still fail because of a runtime timeout). + + err := os.WriteFile(conf, []byte(fmt.Sprintf(tmpl, "jetstream: true")), 0666) + require_NoError(t, err) + + wg := sync.WaitGroup{} + wg.Add(1) + go func() { + defer wg.Done() + time.Sleep(10 * time.Millisecond) + s.Shutdown() + }() + + time.Sleep(8 * time.Millisecond) + err = s.Reload() + require_NoError(t, err) + wg.Wait() + } +}