mirror of
https://github.com/gogrlx/nats-server.git
synced 2026-04-02 11:48:43 -07:00
Merge pull request #799 from nats-io/update_lame_duck_mode
Introduce some delay before closing clients in LameDuck mode.
This commit is contained in:
@@ -1488,7 +1488,12 @@ func (s *Server) reConnectToRoute(rURL *url.URL, rtype RouteType) {
|
||||
if tryForEver {
|
||||
delay += DEFAULT_ROUTE_RECONNECT
|
||||
}
|
||||
time.Sleep(delay)
|
||||
select {
|
||||
case <-time.After(delay):
|
||||
case <-s.quitCh:
|
||||
s.grWG.Done()
|
||||
return
|
||||
}
|
||||
s.connectToRoute(rURL, tryForEver)
|
||||
}
|
||||
|
||||
|
||||
@@ -38,6 +38,12 @@ import (
|
||||
"github.com/nats-io/gnatsd/logger"
|
||||
)
|
||||
|
||||
// Time to wait before starting closing clients when in LD mode.
|
||||
const lameDuckModeDefaultInitialDelay = int64(time.Second)
|
||||
|
||||
// Make this a variable so that we can change during tests
|
||||
var lameDuckModeInitialDelay = int64(lameDuckModeDefaultInitialDelay)
|
||||
|
||||
// Info is the information sent to clients to help them understand information
|
||||
// about this server.
|
||||
type Info struct {
|
||||
@@ -1626,8 +1632,16 @@ func (s *Server) lameDuckMode() {
|
||||
}
|
||||
s.mu.Unlock()
|
||||
|
||||
t := time.NewTimer(10 * time.Second)
|
||||
s.Noticef("Closing existing clients")
|
||||
t := time.NewTimer(time.Duration(atomic.LoadInt64(&lameDuckModeInitialDelay)))
|
||||
// Delay start of closing of client connections in case
|
||||
// we have several servers that we want to signal to enter LD mode
|
||||
// and not have their client reconnect to each other.
|
||||
select {
|
||||
case <-t.C:
|
||||
s.Noticef("Closing existing clients")
|
||||
case <-s.quitCh:
|
||||
return
|
||||
}
|
||||
for i, client := range clients {
|
||||
client.closeConnection(ServerShutdown)
|
||||
if batch == 1 || i%batch == 0 {
|
||||
|
||||
@@ -19,6 +19,7 @@ import (
|
||||
"net"
|
||||
"os"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
@@ -649,6 +650,9 @@ func TestProfilingNoTimeout(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestLameDuckMode(t *testing.T) {
|
||||
atomic.StoreInt64(&lameDuckModeInitialDelay, 0)
|
||||
defer atomic.StoreInt64(&lameDuckModeInitialDelay, lameDuckModeDefaultInitialDelay)
|
||||
|
||||
optsA := DefaultOptions()
|
||||
optsA.Cluster.Host = "127.0.0.1"
|
||||
srvA := RunServer(optsA)
|
||||
@@ -796,4 +800,50 @@ func TestLameDuckMode(t *testing.T) {
|
||||
checkClientsCount(t, srvB, total)
|
||||
|
||||
stopClientsAndSrvB(ncs)
|
||||
|
||||
// Now test that we introduce delay before starting closing client connections.
|
||||
// This allow to "signal" multiple servers and avoid their clients to reconnect
|
||||
// to a server that is going to be going in LD mode.
|
||||
atomic.StoreInt64(&lameDuckModeInitialDelay, int64(100*time.Millisecond))
|
||||
|
||||
optsA.LameDuckDuration = 10 * time.Millisecond
|
||||
srvA = RunServer(optsA)
|
||||
defer srvA.Shutdown()
|
||||
|
||||
optsB.Routes = RoutesFromStr(fmt.Sprintf("nats://127.0.0.1:%d", srvA.ClusterAddr().Port))
|
||||
optsB.LameDuckDuration = 10 * time.Millisecond
|
||||
srvB = RunServer(optsB)
|
||||
defer srvB.Shutdown()
|
||||
|
||||
optsC := DefaultOptions()
|
||||
optsC.Routes = RoutesFromStr(fmt.Sprintf("nats://127.0.0.1:%d", srvA.ClusterAddr().Port))
|
||||
optsC.LameDuckDuration = 10 * time.Millisecond
|
||||
srvC := RunServer(optsC)
|
||||
defer srvC.Shutdown()
|
||||
|
||||
checkClusterFormed(t, srvA, srvB, srvC)
|
||||
|
||||
rt := int32(0)
|
||||
nc, err := nats.Connect(fmt.Sprintf("nats://127.0.0.1:%d", optsA.Port),
|
||||
nats.ReconnectWait(15*time.Millisecond),
|
||||
nats.ReconnectHandler(func(*nats.Conn) {
|
||||
atomic.AddInt32(&rt, 1)
|
||||
}))
|
||||
if err != nil {
|
||||
t.Fatalf("Error on connect: %v", err)
|
||||
}
|
||||
defer nc.Close()
|
||||
|
||||
go srvA.lameDuckMode()
|
||||
// Wait a bit, but less than lameDuckModeInitialDelay that we set in this
|
||||
// test to 100ms.
|
||||
time.Sleep(30 * time.Millisecond)
|
||||
go srvB.lameDuckMode()
|
||||
|
||||
srvA.grWG.Wait()
|
||||
srvB.grWG.Wait()
|
||||
checkClientsCount(t, srvC, 1)
|
||||
if n := atomic.LoadInt32(&rt); n != 1 {
|
||||
t.Fatalf("Expected client to reconnect only once, got %v", n)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user