diff --git a/server/jetstream_api.go b/server/jetstream_api.go index 0be83e5f..12023985 100644 --- a/server/jetstream_api.go +++ b/server/jetstream_api.go @@ -581,6 +581,7 @@ const JSApiLeaderStepDownResponseType = "io.nats.jetstream.api.v1.meta_leader_st type JSApiMetaServerRemoveRequest struct { // Server name of the peer to be removed. Server string `json:"peer"` + ByID bool `json:"by_id,omitempty"` } // JSApiMetaServerRemoveResponse is the response to a peer removal request in the meta group. @@ -2215,6 +2216,13 @@ func (s *Server) jsLeaderServerRemoveRequest(sub *subscription, c *client, _ *Ac var found string js.mu.RLock() for _, p := range cc.meta.Peers() { + if req.ByID { + if p.ID == req.Server { + found = req.Server + break + } + continue + } si, ok := s.nodeToInfo.Load(p.ID) if ok && si.(nodeInfo).name == req.Server { found = p.ID diff --git a/server/jetstream_cluster_2_test.go b/server/jetstream_cluster_2_test.go index cf42cacb..01f4920e 100644 --- a/server/jetstream_cluster_2_test.go +++ b/server/jetstream_cluster_2_test.go @@ -7259,3 +7259,87 @@ func TestJetStreamClusterCompressedStreamMessages(t *testing.T) { t.Fatalf("Did not receive completion signal") } } + +func TestJetStreamClusterRemovePeerByID(t *testing.T) { + c := createJetStreamClusterExplicit(t, "R3S", 3) + defer c.shutdown() + + s := c.randomNonLeader() + nc, js := jsClientConnect(t, s) + defer nc.Close() + + _, err := js.AddStream(&nats.StreamConfig{ + Name: "TEST", + Subjects: []string{"foo", "bar"}, + Replicas: 3, + }) + require_NoError(t, err) + + // Wait for a leader + c.waitOnStreamLeader(globalAccountName, "TEST") + + // Get the name of the one that is not restarted + srvName := c.opts[2].ServerName + // And its node ID + peerID := c.servers[2].Node() + + nc.Close() + // Now stop the whole cluster + c.stopAll() + // Restart all but one + for i := 0; i < 2; i++ { + opts := c.opts[i] + s, o := RunServerWithConfig(opts.ConfigFile) + c.servers[i] = s + c.opts[i] = o + } + + c.waitOnClusterReadyWithNumPeers(2) + c.waitOnStreamLeader(globalAccountName, "TEST") + + // Now attempt to remove by name, this should fail because the cluster + // was restarted and names are not persisted. + ml := c.leader() + nc, err = nats.Connect(ml.ClientURL(), nats.UserInfo("admin", "s3cr3t!")) + require_NoError(t, err) + defer nc.Close() + + req := &JSApiMetaServerRemoveRequest{Server: srvName} + jsreq, err := json.Marshal(req) + require_NoError(t, err) + rmsg, err := nc.Request(JSApiRemoveServer, jsreq, 2*time.Second) + require_NoError(t, err) + + var resp JSApiMetaServerRemoveResponse + err = json.Unmarshal(rmsg.Data, &resp) + require_NoError(t, err) + require_True(t, resp.Error != nil) + require_True(t, IsNatsErr(resp.Error, JSClusterServerNotMemberErr)) + + // Now try by ID, but first with an ID that does not match any peerID + req.Server = "some_bad_id" + req.ByID = true + jsreq, err = json.Marshal(req) + require_NoError(t, err) + rmsg, err = nc.Request(JSApiRemoveServer, jsreq, 2*time.Second) + require_NoError(t, err) + + resp = JSApiMetaServerRemoveResponse{} + err = json.Unmarshal(rmsg.Data, &resp) + require_NoError(t, err) + require_True(t, resp.Error != nil) + require_True(t, IsNatsErr(resp.Error, JSClusterServerNotMemberErr)) + + // Now with the proper peer ID + req.Server = peerID + jsreq, err = json.Marshal(req) + require_NoError(t, err) + rmsg, err = nc.Request(JSApiRemoveServer, jsreq, 2*time.Second) + require_NoError(t, err) + + resp = JSApiMetaServerRemoveResponse{} + err = json.Unmarshal(rmsg.Data, &resp) + require_NoError(t, err) + require_True(t, resp.Error == nil) + require_True(t, resp.Success) +}