Make sure preferred peer for stepdown is healthy.

Signed-off-by: Derek Collison <derek@nats.io>
This commit is contained in:
Derek Collison
2023-02-23 12:15:57 -08:00
parent 9d972642e9
commit 45859e6476
2 changed files with 35 additions and 15 deletions

View File

@@ -199,10 +199,10 @@ func TestJetStreamClusterMultiRestartBug(t *testing.T) {
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}
checkFor(t, 10*time.Second, 250*time.Millisecond, func() error {
checkFor(t, 20*time.Second, 250*time.Millisecond, func() error {
si, _ := js2.StreamInfo("TEST")
if si == nil || si.Cluster == nil {
t.Fatalf("Did not get stream info")
return fmt.Errorf("No stream info or cluster")
}
for _, pi := range si.Cluster.Replicas {
if !pi.Current {

View File

@@ -1281,7 +1281,6 @@ func (n *raft) StepDown(preferred ...string) error {
n.debug("Being asked to stepdown")
// See if we have up to date followers.
nowts := time.Now().UnixNano()
maybeLeader := noLeader
if len(preferred) > 0 {
if preferred[0] != _EMPTY_ {
@@ -1290,21 +1289,42 @@ func (n *raft) StepDown(preferred ...string) error {
preferred = nil
}
}
// Can't pick ourselves.
if maybeLeader == n.id {
maybeLeader = noLeader
preferred = nil
}
for peer, ps := range n.peers {
// If not us and alive and caughtup.
if peer != n.id && (nowts-ps.ts) < int64(hbInterval*3) {
if maybeLeader != noLeader && maybeLeader != peer {
continue
}
if si, ok := n.s.nodeToInfo.Load(peer); !ok || si.(nodeInfo).offline {
continue
}
n.debug("Looking at %q which is %v behind", peer, time.Duration(nowts-ps.ts))
maybeLeader = peer
break
nowts := time.Now().UnixNano()
// If we have a preferred check it first.
if maybeLeader != noLeader {
var isHealthy bool
if ps, ok := n.peers[maybeLeader]; ok {
si, ok := n.s.nodeToInfo.Load(maybeLeader)
isHealthy = ok && !si.(nodeInfo).offline && (nowts-ps.ts) < int64(hbInterval*3)
}
if !isHealthy {
maybeLeader = noLeader
}
}
// If we do not have a preferred at this point pick the first healthy one.
// Make sure not ourselves.
if maybeLeader == noLeader {
for peer, ps := range n.peers {
if peer == n.id {
continue
}
si, ok := n.s.nodeToInfo.Load(peer)
isHealthy := ok && !si.(nodeInfo).offline && (nowts-ps.ts) < int64(hbInterval*3)
if isHealthy {
maybeLeader = peer
break
}
}
}
stepdown := n.stepdown
n.Unlock()