// Copyright 2020-2022 The NATS Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build !skip_js_tests && !skip_js_cluster_tests && !skip_js_cluster_tests_2 && !skip_js_super_cluster_tests // +build !skip_js_tests,!skip_js_cluster_tests,!skip_js_cluster_tests_2,!skip_js_super_cluster_tests package server import ( "encoding/json" "errors" "fmt" "math/rand" "net/http" "net/http/httptest" "reflect" "strings" "sync" "sync/atomic" "testing" "time" "github.com/nats-io/jwt/v2" "github.com/nats-io/nats.go" "github.com/nats-io/nkeys" ) func TestJetStreamSuperClusterMetaPlacement(t *testing.T) { sc := createJetStreamSuperCluster(t, 3, 3) defer sc.shutdown() // We want to influence where the meta leader will place itself when we ask the // current leader to stepdown. ml := sc.leader() cn := ml.ClusterName() var pcn string for _, c := range sc.clusters { if c.name != cn { pcn = c.name break } } // Client based API s := sc.randomServer() nc, err := nats.Connect(s.ClientURL(), nats.UserInfo("admin", "s3cr3t!")) if err != nil { t.Fatalf("Failed to create system client: %v", err) } defer nc.Close() stepdown := func(cn string) *JSApiLeaderStepDownResponse { req := &JSApiLeaderStepdownRequest{Placement: &Placement{Cluster: cn}} jreq, err := json.Marshal(req) if err != nil { t.Fatalf("Unexpected error: %v", err) } resp, err := nc.Request(JSApiLeaderStepDown, jreq, time.Second) if err != nil { t.Fatalf("Error on stepdown request: %v", err) } var sdr JSApiLeaderStepDownResponse if err := json.Unmarshal(resp.Data, &sdr); err != nil { t.Fatalf("Unexpected error: %v", err) } return &sdr } // Make sure we get correct errors for tags and bad or unavailable cluster placement. sdr := stepdown("C22") if sdr.Error == nil || !strings.Contains(sdr.Error.Description, "no replacement peer connected") { t.Fatalf("Got incorrect error result: %+v", sdr.Error) } // Should work. sdr = stepdown(pcn) if sdr.Error != nil { t.Fatalf("Got an error on stepdown: %+v", sdr.Error) } sc.waitOnLeader() ml = sc.leader() cn = ml.ClusterName() if cn != pcn { t.Fatalf("Expected new metaleader to be in cluster %q, got %q", pcn, cn) } } func TestJetStreamSuperClusterUniquePlacementTag(t *testing.T) { tmlp := ` listen: 127.0.0.1:-1 server_name: %s jetstream: {max_mem_store: 256MB, max_file_store: 2GB, store_dir: '%s', unique_tag: az} leaf {listen: 127.0.0.1:-1} cluster { name: %s listen: 127.0.0.1:%d routes = [%s] } # For access to system account. accounts { $SYS { users = [ { user: "admin", pass: "s3cr3t!" } ] } } ` s := createJetStreamSuperClusterWithTemplateAndModHook(t, tmlp, 5, 2, func(serverName, clustername, storeDir, conf string) string { azTag := map[string]string{ "C1-S1": "az:same", "C1-S2": "az:same", "C1-S3": "az:same", "C1-S4": "az:same", "C1-S5": "az:same", "C2-S1": "az:1", "C2-S2": "az:2", "C2-S3": "az:1", "C2-S4": "az:2", "C2-S5": "az:1", } return conf + fmt.Sprintf("\nserver_tags: [cloud:%s-tag, %s]\n", clustername, azTag[serverName]) }, nil) defer s.shutdown() inDifferentAz := func(ci *nats.ClusterInfo) (bool, error) { t.Helper() if len(ci.Replicas) == 0 { return true, nil } // if R2 (has replica, this setup does not support R3), test if the server in a cluster picked the same az, // as determined by modulo2 of server number which aligns with az dummy := 0 srvnum1 := 0 srvnum2 := 0 if n, _ := fmt.Sscanf(ci.Leader, "C%d-S%d", &dummy, &srvnum1); n != 2 { return false, fmt.Errorf("couldn't parse leader") } if n, _ := fmt.Sscanf(ci.Replicas[0].Name, "C%d-S%d", &dummy, &srvnum2); n != 2 { return false, fmt.Errorf("couldn't parse replica") } return srvnum1%2 != srvnum2%2, nil } nc := natsConnect(t, s.randomServer().ClientURL()) defer nc.Close() js, err := nc.JetStream() require_NoError(t, err) for i, test := range []struct { placement *nats.Placement replicas int fail bool cluster string }{ // these pass because replica count is 1 {&nats.Placement{Tags: []string{"az:same"}}, 1, false, "C1"}, {&nats.Placement{Tags: []string{"cloud:C1-tag", "az:same"}}, 1, false, "C1"}, {&nats.Placement{Tags: []string{"cloud:C1-tag"}}, 1, false, "C1"}, // pass because az is set, which disables the filter {&nats.Placement{Tags: []string{"az:same"}}, 2, false, "C1"}, {&nats.Placement{Tags: []string{"cloud:C1-tag", "az:same"}}, 2, false, "C1"}, // fails because this cluster only has the same az {&nats.Placement{Tags: []string{"cloud:C1-tag"}}, 2, true, ""}, // fails because no 3 unique tags exist {&nats.Placement{Tags: []string{"cloud:C2-tag"}}, 3, true, ""}, {nil, 3, true, ""}, // pass because replica count is low enough {nil, 2, false, "C2"}, {&nats.Placement{Tags: []string{"cloud:C2-tag"}}, 2, false, "C2"}, // pass because az is provided {&nats.Placement{Tags: []string{"az:1"}}, 3, false, "C2"}, {&nats.Placement{Tags: []string{"az:2"}}, 2, false, "C2"}, } { name := fmt.Sprintf("test-%d", i) t.Run(name, func(t *testing.T) { si, err := js.AddStream(&nats.StreamConfig{Name: name, Replicas: test.replicas, Placement: test.placement}) if test.fail { require_Error(t, err) require_Contains(t, err.Error(), "no suitable peers for placement", "server tag not unique") return } require_NoError(t, err) if test.cluster != _EMPTY_ { require_Equal(t, si.Cluster.Name, test.cluster) } // skip placement test if tags call for a particular az if test.placement != nil && len(test.placement.Tags) > 0 { for _, tag := range test.placement.Tags { if strings.HasPrefix(tag, "az:") { return } } } diff, err := inDifferentAz(si.Cluster) require_NoError(t, err) require_True(t, diff) }) } t.Run("scale-up-test", func(t *testing.T) { // create enough streams so we hit it eventually for i := 0; i < 10; i++ { cfg := &nats.StreamConfig{Name: fmt.Sprintf("scale-up-%d", i), Replicas: 1, Placement: &nats.Placement{Tags: []string{"cloud:C2-tag"}}} si, err := js.AddStream(cfg) require_NoError(t, err) require_Equal(t, si.Cluster.Name, "C2") cfg.Replicas = 2 si, err = js.UpdateStream(cfg) require_NoError(t, err) require_Equal(t, si.Cluster.Name, "C2") checkFor(t, 10, 250*time.Millisecond, func() error { if si, err := js.StreamInfo(cfg.Name); err != nil { return err } else if diff, err := inDifferentAz(si.Cluster); err != nil { return err } else if !diff { return fmt.Errorf("not in different AZ") } return nil }) } }) } func TestJetStreamSuperClusterBasics(t *testing.T) { sc := createJetStreamSuperCluster(t, 3, 3) defer sc.shutdown() // Client based API s := sc.randomServer() nc, js := jsClientConnect(t, s) defer nc.Close() _, err := js.AddStream(&nats.StreamConfig{Name: "TEST", Replicas: 3}) if err != nil { t.Fatalf("Unexpected error: %v", err) } // Send in 10 messages. msg, toSend := []byte("Hello JS Clustering"), 10 for i := 0; i < toSend; i++ { if _, err = js.Publish("TEST", msg); err != nil { t.Fatalf("Unexpected publish error: %v", err) } } // Now grab info for this stream. si, err := js.StreamInfo("TEST") if err != nil { t.Fatalf("Unexpected error: %v", err) } if si == nil || si.Config.Name != "TEST" { t.Fatalf("StreamInfo is not correct %+v", si) } // Check active state as well, shows that the owner answered. if si.State.Msgs != uint64(toSend) { t.Fatalf("Expected %d msgs, got bad state: %+v", toSend, si.State) } // Check request origin placement. if si.Cluster.Name != s.ClusterName() { t.Fatalf("Expected stream to be placed in %q, but got %q", s.ClusterName(), si.Cluster.Name) } // Check consumers. sub, err := js.SubscribeSync("TEST") if err != nil { t.Fatalf("Unexpected error: %v", err) } checkSubsPending(t, sub, toSend) ci, err := sub.ConsumerInfo() if err != nil { t.Fatalf("Unexpected error: %v", err) } if ci.Delivered.Consumer != uint64(toSend) || ci.NumAckPending != toSend { t.Fatalf("ConsumerInfo is not correct: %+v", ci) } // Now check we can place a stream. pcn := "C3" scResp, err := js.AddStream(&nats.StreamConfig{ Name: "TEST2", Placement: &nats.Placement{Cluster: pcn}, }) if err != nil { t.Fatalf("Unexpected error: %v", err) } if scResp.Cluster.Name != pcn { t.Fatalf("Expected the stream to be placed in %q, got %q", pcn, scResp.Cluster.Name) } } // Test that consumer interest across gateways and superclusters is properly identitifed in a remote cluster. func TestJetStreamSuperClusterCrossClusterConsumerInterest(t *testing.T) { sc := createJetStreamSuperCluster(t, 3, 3) defer sc.shutdown() // Since we need all of the peers accounted for to add the stream wait for all to be present. sc.waitOnPeerCount(9) // Client based API - Connect to Cluster C1. Stream and consumer will live in C2. s := sc.clusterForName("C1").randomServer() nc, js := jsClientConnect(t, s) defer nc.Close() pcn := "C2" _, err := js.AddStream(&nats.StreamConfig{Name: "foo", Replicas: 3, Placement: &nats.Placement{Cluster: pcn}}) if err != nil { t.Fatalf("Unexpected error: %v", err) } // Pull based first. sub, err := js.PullSubscribe("foo", "dlc") if err != nil { t.Fatalf("Unexpected error: %v", err) } // Send a message. if _, err = js.Publish("foo", []byte("CCI")); err != nil { t.Fatalf("Unexpected publish error: %v", err) } fetchMsgs(t, sub, 1, 5*time.Second) // Now check push based delivery. sub, err = js.SubscribeSync("foo", nats.Durable("rip")) if err != nil { t.Fatalf("Unexpected error: %v", err) } checkSubsPending(t, sub, 1) // Send another message. if _, err = js.Publish("foo", []byte("CCI")); err != nil { t.Fatalf("Unexpected publish error: %v", err) } checkSubsPending(t, sub, 2) } func TestJetStreamSuperClusterPeerReassign(t *testing.T) { sc := createJetStreamSuperCluster(t, 3, 3) defer sc.shutdown() // Client based API s := sc.randomServer() nc, js := jsClientConnect(t, s) defer nc.Close() pcn := "C2" // Create a stream in C2 that sources TEST _, err := js.AddStream(&nats.StreamConfig{ Name: "TEST", Placement: &nats.Placement{Cluster: pcn}, Replicas: 3, }) if err != nil { t.Fatalf("Unexpected error: %v", err) } // Send in 10 messages. msg, toSend := []byte("Hello JS Clustering"), 10 for i := 0; i < toSend; i++ { if _, err = js.Publish("TEST", msg); err != nil { t.Fatalf("Unexpected publish error: %v", err) } } // Now grab info for this stream. si, err := js.StreamInfo("TEST") if err != nil { t.Fatalf("Unexpected error: %v", err) } if si == nil || si.Config.Name != "TEST" { t.Fatalf("StreamInfo is not correct %+v", si) } // Check active state as well, shows that the owner answered. if si.State.Msgs != uint64(toSend) { t.Fatalf("Expected %d msgs, got bad state: %+v", toSend, si.State) } // Check request origin placement. if si.Cluster.Name != pcn { t.Fatalf("Expected stream to be placed in %q, but got %q", s.ClusterName(), si.Cluster.Name) } // Now remove a peer that is assigned to the stream. rc := sc.clusterForName(pcn) rs := rc.randomNonStreamLeader("$G", "TEST") rc.removeJetStream(rs) // Check the stream info is eventually correct. checkFor(t, 2*time.Second, 50*time.Millisecond, func() error { si, err := js.StreamInfo("TEST") if err != nil { return fmt.Errorf("Could not fetch stream info: %v", err) } if len(si.Cluster.Replicas) != 2 { return fmt.Errorf("Expected 2 replicas, got %d", len(si.Cluster.Replicas)) } for _, peer := range si.Cluster.Replicas { if !peer.Current { return fmt.Errorf("Expected replica to be current: %+v", peer) } if !strings.HasPrefix(peer.Name, pcn) { t.Fatalf("Stream peer reassigned to wrong cluster: %q", peer.Name) } } return nil }) } func TestJetStreamSuperClusterInterestOnlyMode(t *testing.T) { GatewayDoNotForceInterestOnlyMode(true) defer GatewayDoNotForceInterestOnlyMode(false) template := ` listen: 127.0.0.1:-1 server_name: %s jetstream: {max_mem_store: 256MB, max_file_store: 2GB, store_dir: '%s'} accounts { one { jetstream: enabled users [{user: one, password: password}] } two { %s users [{user: two, password: password}] } } cluster { listen: 127.0.0.1:%d name: %s routes = ["nats://127.0.0.1:%d"] } gateway { name: %s listen: 127.0.0.1:%d gateways = [{name: %s, urls: ["nats://127.0.0.1:%d"]}] } ` storeDir1 := t.TempDir() conf1 := createConfFile(t, []byte(fmt.Sprintf(template, "S1", storeDir1, "", 23222, "A", 23222, "A", 11222, "B", 11223))) s1, o1 := RunServerWithConfig(conf1) defer s1.Shutdown() storeDir2 := t.TempDir() conf2 := createConfFile(t, []byte(fmt.Sprintf(template, "S2", storeDir2, "", 23223, "B", 23223, "B", 11223, "A", 11222))) s2, o2 := RunServerWithConfig(conf2) defer s2.Shutdown() waitForInboundGateways(t, s1, 1, 2*time.Second) waitForInboundGateways(t, s2, 1, 2*time.Second) waitForOutboundGateways(t, s1, 1, 2*time.Second) waitForOutboundGateways(t, s2, 1, 2*time.Second) nc1 := natsConnect(t, fmt.Sprintf("nats://two:password@127.0.0.1:%d", o1.Port)) defer nc1.Close() nc1.Publish("foo", []byte("some message")) nc1.Flush() nc2 := natsConnect(t, fmt.Sprintf("nats://two:password@127.0.0.1:%d", o2.Port)) defer nc2.Close() nc2.Publish("bar", []byte("some message")) nc2.Flush() checkMode := func(accName string, expectedMode GatewayInterestMode) { t.Helper() checkFor(t, 2*time.Second, 15*time.Millisecond, func() error { servers := []*Server{s1, s2} for _, s := range servers { var gws []*client s.getInboundGatewayConnections(&gws) for _, gw := range gws { var mode GatewayInterestMode gw.mu.Lock() ie := gw.gw.insim[accName] if ie != nil { mode = ie.mode } gw.mu.Unlock() if ie == nil { return fmt.Errorf("Account %q not in map", accName) } if mode != expectedMode { return fmt.Errorf("Expected account %q mode to be %v, got: %v", accName, expectedMode, mode) } } } return nil }) } checkMode("one", InterestOnly) checkMode("two", Optimistic) // Now change account "two" to enable JS changeCurrentConfigContentWithNewContent(t, conf1, []byte(fmt.Sprintf(template, "S1", storeDir1, "jetstream: enabled", 23222, "A", 23222, "A", 11222, "B", 11223))) changeCurrentConfigContentWithNewContent(t, conf2, []byte(fmt.Sprintf(template, "S2", storeDir2, "jetstream: enabled", 23223, "B", 23223, "B", 11223, "A", 11222))) if err := s1.Reload(); err != nil { t.Fatalf("Error on s1 reload: %v", err) } if err := s2.Reload(); err != nil { t.Fatalf("Error on s2 reload: %v", err) } checkMode("one", InterestOnly) checkMode("two", InterestOnly) } func TestJetStreamSuperClusterConnectionCount(t *testing.T) { sc := createJetStreamSuperClusterWithTemplate(t, jsClusterAccountsTempl, 3, 2) defer sc.shutdown() sysNc := natsConnect(t, sc.randomServer().ClientURL(), nats.UserInfo("admin", "s3cr3t!")) defer sysNc.Close() _, err := sysNc.Request(fmt.Sprintf(accDirectReqSubj, "ONE", "CONNS"), nil, 100*time.Millisecond) // this is a timeout as the server only responds when it has connections.... // not convinced this should be that way, but also not the issue to investigate. require_True(t, err == nats.ErrTimeout) for i := 1; i <= 2; i++ { func() { nc := natsConnect(t, sc.clusterForName(fmt.Sprintf("C%d", i)).randomServer().ClientURL()) defer nc.Close() js, err := nc.JetStream() require_NoError(t, err) name := fmt.Sprintf("foo%d", 1) _, err = js.AddStream(&nats.StreamConfig{ Name: name, Subjects: []string{name}, Replicas: 3}) require_NoError(t, err) }() } func() { nc := natsConnect(t, sc.clusterForName("C1").randomServer().ClientURL()) defer nc.Close() js, err := nc.JetStream() require_NoError(t, err) _, err = js.AddStream(&nats.StreamConfig{ Name: "src", Sources: []*nats.StreamSource{{Name: "foo1"}, {Name: "foo2"}}, Replicas: 3}) require_NoError(t, err) }() func() { nc := natsConnect(t, sc.clusterForName("C2").randomServer().ClientURL()) defer nc.Close() js, err := nc.JetStream() require_NoError(t, err) _, err = js.AddStream(&nats.StreamConfig{ Name: "mir", Mirror: &nats.StreamSource{Name: "foo2"}, Replicas: 3}) require_NoError(t, err) }() // There should be no active NATS CLIENT connections, but we still need // to wait a little bit... checkFor(t, 2*time.Second, 15*time.Millisecond, func() error { _, err := sysNc.Request(fmt.Sprintf(accDirectReqSubj, "ONE", "CONNS"), nil, 100*time.Millisecond) if err != nats.ErrTimeout { return fmt.Errorf("Expected timeout, got %v", err) } return nil }) sysNc.Close() s := sc.randomServer() checkFor(t, 5*time.Second, 100*time.Millisecond, func() error { acc, err := s.lookupAccount("ONE") if err != nil { t.Fatalf("Could not look up account: %v", err) } if n := acc.NumConnections(); n != 0 { return fmt.Errorf("Expected no connections, got %d", n) } return nil }) } func TestJetStreamSuperClusterConsumersBrokenGateways(t *testing.T) { sc := createJetStreamSuperCluster(t, 1, 2) defer sc.shutdown() // Client based API s := sc.clusterForName("C1").randomServer() nc, js := jsClientConnect(t, s) defer nc.Close() // This will be in C1. _, err := js.AddStream(&nats.StreamConfig{Name: "TEST"}) if err != nil { t.Fatalf("Unexpected error: %v", err) } // Create a stream in C2 that sources TEST _, err = js.AddStream(&nats.StreamConfig{ Name: "S", Placement: &nats.Placement{Cluster: "C2"}, Sources: []*nats.StreamSource{{Name: "TEST"}}, }) if err != nil { t.Fatalf("Unexpected error: %v", err) } // Wait for direct consumer to get registered and detect interest across GW. time.Sleep(time.Second) // Send 100 msgs over 100ms in separate Go routine. msg, toSend, done := []byte("Hello"), 100, make(chan bool) go func() { // Send in 10 messages. for i := 0; i < toSend; i++ { if _, err = js.Publish("TEST", msg); err != nil { t.Errorf("Unexpected publish error: %v", err) } time.Sleep(500 * time.Microsecond) } done <- true }() breakGW := func() { s.gateway.Lock() gw := s.gateway.out["C2"] s.gateway.Unlock() if gw != nil { gw.closeConnection(ClientClosed) } } // Wait til about half way through. time.Sleep(20 * time.Millisecond) // Now break GW connection. breakGW() // Wait for GW to reform. for _, c := range sc.clusters { for _, s := range c.servers { waitForOutboundGateways(t, s, 1, 2*time.Second) } } select { case <-done: case <-time.After(2 * time.Second): t.Fatalf("Did not complete sending first batch of messages") } // Make sure we can deal with data loss at the end. checkFor(t, 10*time.Second, 250*time.Millisecond, func() error { si, err := js.StreamInfo("S") if err != nil { t.Fatalf("Unexpected error: %v", err) } if si.State.Msgs != 100 { return fmt.Errorf("Expected to have %d messages, got %d", 100, si.State.Msgs) } return nil }) // Now send 100 more. Will aos break here in the middle. for i := 0; i < toSend; i++ { if _, err = js.Publish("TEST", msg); err != nil { t.Fatalf("Unexpected publish error: %v", err) } if i == 50 { breakGW() } } // Wait for GW to reform. for _, c := range sc.clusters { for _, s := range c.servers { waitForOutboundGateways(t, s, 1, 2*time.Second) } } si, err := js.StreamInfo("TEST") if err != nil { t.Fatalf("Unexpected error: %v", err) } if si.State.Msgs != 200 { t.Fatalf("Expected to have %d messages, got %d", 200, si.State.Msgs) } checkFor(t, 10*time.Second, 250*time.Millisecond, func() error { si, err := js.StreamInfo("S") if err != nil { return fmt.Errorf("Unexpected error: %v", err) } if si.State.Msgs != 200 { return fmt.Errorf("Expected to have %d messages, got %d", 200, si.State.Msgs) } return nil }) } func TestJetStreamSuperClusterLeafNodesWithSharedSystemAccountAndSameDomain(t *testing.T) { sc := createJetStreamSuperCluster(t, 3, 2) defer sc.shutdown() lnc := sc.createLeafNodes("LNC", 2) defer lnc.shutdown() // We want to make sure there is only one leader and its always in the supercluster. sc.waitOnLeader() if ml := lnc.leader(); ml != nil { t.Fatalf("Detected a meta-leader in the leafnode cluster: %s", ml) } // leafnodes should have been added into the overall peer count. sc.waitOnPeerCount(8) // Check here that we auto detect sharing system account as well and auto place the correct // deny imports and exports. ls := lnc.randomServer() if ls == nil { t.Fatalf("Expected a leafnode server, got none") } gacc := ls.globalAccount().GetName() ls.mu.Lock() var hasDE, hasDI bool for _, ln := range ls.leafs { ln.mu.Lock() if ln.leaf.remote.RemoteLeafOpts.LocalAccount == gacc { re := ln.perms.pub.deny.Match(jsAllAPI) hasDE = len(re.psubs)+len(re.qsubs) > 0 rs := ln.perms.sub.deny.Match(jsAllAPI) hasDI = len(rs.psubs)+len(rs.qsubs) > 0 } ln.mu.Unlock() } ls.mu.Unlock() if !hasDE { t.Fatalf("No deny export on global account") } if !hasDI { t.Fatalf("No deny import on global account") } // Make a stream by connecting to the leafnode cluster. Make sure placement is correct. // Client based API nc, js := jsClientConnect(t, lnc.randomServer()) defer nc.Close() si, err := js.AddStream(&nats.StreamConfig{ Name: "TEST", Subjects: []string{"foo", "bar"}, Replicas: 2, }) if err != nil { t.Fatalf("Unexpected error: %v", err) } if si.Cluster.Name != "LNC" { t.Fatalf("Expected default placement to be %q, got %q", "LNC", si.Cluster.Name) } // Now make sure placement also works if we want to place in a cluster in the supercluster. pcn := "C2" si, err = js.AddStream(&nats.StreamConfig{ Name: "TEST2", Subjects: []string{"baz"}, Replicas: 2, Placement: &nats.Placement{Cluster: pcn}, }) if err != nil { t.Fatalf("Unexpected error: %v", err) } if si.Cluster.Name != pcn { t.Fatalf("Expected default placement to be %q, got %q", pcn, si.Cluster.Name) } } func TestJetStreamSuperClusterLeafNodesWithSharedSystemAccountAndDifferentDomain(t *testing.T) { sc := createJetStreamSuperCluster(t, 3, 2) defer sc.shutdown() lnc := sc.createLeafNodesWithDomain("LNC", 2, "LEAFDOMAIN") defer lnc.shutdown() // We want to make sure there is only one leader and its always in the supercluster. sc.waitOnLeader() lnc.waitOnLeader() // even though system account is shared, because domains differ, sc.waitOnPeerCount(6) lnc.waitOnPeerCount(2) // Check here that we auto detect sharing system account as well and auto place the correct // deny imports and exports. ls := lnc.randomServer() if ls == nil { t.Fatalf("Expected a leafnode server, got none") } gacc := ls.globalAccount().GetName() ls.mu.Lock() var hasDE, hasDI bool for _, ln := range ls.leafs { ln.mu.Lock() if ln.leaf.remote.RemoteLeafOpts.LocalAccount == gacc { re := ln.perms.pub.deny.Match(jsAllAPI) hasDE = len(re.psubs)+len(re.qsubs) > 0 rs := ln.perms.sub.deny.Match(jsAllAPI) hasDI = len(rs.psubs)+len(rs.qsubs) > 0 } ln.mu.Unlock() } ls.mu.Unlock() if !hasDE { t.Fatalf("No deny export on global account") } if !hasDI { t.Fatalf("No deny import on global account") } // Make a stream by connecting to the leafnode cluster. Make sure placement is correct. // Client based API nc, js := jsClientConnect(t, lnc.randomServer()) defer nc.Close() si, err := js.AddStream(&nats.StreamConfig{ Name: "TEST", Subjects: []string{"foo", "bar"}, Replicas: 2, }) if err != nil { t.Fatalf("Unexpected error: %v", err) } if si.Cluster.Name != "LNC" { t.Fatalf("Expected default placement to be %q, got %q", "LNC", si.Cluster.Name) } // Now make sure placement does not works for cluster in different domain pcn := "C2" _, err = js.AddStream(&nats.StreamConfig{ Name: "TEST2", Subjects: []string{"baz"}, Replicas: 2, Placement: &nats.Placement{Cluster: pcn}, }) if err == nil || !strings.Contains(err.Error(), "no suitable peers for placement") { t.Fatalf("Expected no suitable peers for placement, got: %v", err) } } func TestJetStreamSuperClusterSingleLeafNodeWithSharedSystemAccount(t *testing.T) { sc := createJetStreamSuperCluster(t, 3, 2) defer sc.shutdown() ln := sc.createSingleLeafNode(true) defer ln.Shutdown() // We want to make sure there is only one leader and its always in the supercluster. sc.waitOnLeader() // leafnodes should have been added into the overall peer count. sc.waitOnPeerCount(7) // Now make sure we can place a stream in the leaf node. // First connect to the leafnode server itself. nc, js := jsClientConnect(t, ln) defer nc.Close() si, err := js.AddStream(&nats.StreamConfig{ Name: "TEST1", Subjects: []string{"foo"}, }) if err != nil { t.Fatalf("Unexpected error: %v", err) } if si.Cluster.Name != "LNS" { t.Fatalf("Expected to be placed in leafnode with %q as cluster name, got %q", "LNS", si.Cluster.Name) } // Now check we can place on here as well but connect to the hub. nc, js = jsClientConnect(t, sc.randomServer()) defer nc.Close() si, err = js.AddStream(&nats.StreamConfig{ Name: "TEST2", Subjects: []string{"bar"}, Placement: &nats.Placement{Cluster: "LNS"}, }) if err != nil { t.Fatalf("Unexpected error: %v", err) } if si.Cluster.Name != "LNS" { t.Fatalf("Expected to be placed in leafnode with %q as cluster name, got %q", "LNS", si.Cluster.Name) } } // Issue reported with superclusters and leafnodes where first few get next requests for pull subscribers // have the wrong subject. func TestJetStreamSuperClusterGetNextRewrite(t *testing.T) { sc := createJetStreamSuperClusterWithTemplate(t, jsClusterAccountsTempl, 2, 2) defer sc.shutdown() // Will connect the leafnode to cluster C1. We will then connect the "client" to cluster C2 to cross gateways. ln := sc.clusterForName("C1").createSingleLeafNodeNoSystemAccountAndEnablesJetStreamWithDomain("C", "nojs") defer ln.Shutdown() c2 := sc.clusterForName("C2") nc, js := jsClientConnectEx(t, c2.randomServer(), "C", nats.UserInfo("nojs", "p")) defer nc.Close() // Create a stream and add messages. if _, err := js.AddStream(&nats.StreamConfig{Name: "foo"}); err != nil { t.Fatalf("Unexpected error: %v", err) } for i := 0; i < 10; i++ { if _, err := js.Publish("foo", []byte("ok")); err != nil { t.Fatalf("Unexpected publish error: %v", err) } } // Pull messages and make sure subject rewrite works. sub, err := js.PullSubscribe("foo", "dlc") if err != nil { t.Fatalf("Unexpected error: %v", err) } for _, m := range fetchMsgs(t, sub, 5, time.Second) { if m.Subject != "foo" { t.Fatalf("Expected %q as subject but got %q", "foo", m.Subject) } } } func TestJetStreamSuperClusterEphemeralCleanup(t *testing.T) { sc := createJetStreamSuperCluster(t, 3, 2) defer sc.shutdown() // Create a stream in cluster 0 s := sc.clusters[0].randomServer() nc, js := jsClientConnect(t, s) defer nc.Close() for _, test := range []struct { name string sourceInCluster int streamName string sourceName string }{ {"local", 0, "TEST1", "S1"}, {"remote", 1, "TEST2", "S2"}, } { t.Run(test.name, func(t *testing.T) { if _, err := js.AddStream(&nats.StreamConfig{Name: test.streamName, Replicas: 3}); err != nil { t.Fatalf("Error adding %q stream: %v", test.streamName, err) } if _, err := js.Publish(test.streamName, []byte("hello")); err != nil { t.Fatalf("Unexpected publish error: %v", err) } // Now create a source for that stream, either in same or remote cluster. s2 := sc.clusters[test.sourceInCluster].randomServer() nc2, js2 := jsClientConnect(t, s2) defer nc2.Close() if _, err := js2.AddStream(&nats.StreamConfig{ Name: test.sourceName, Storage: nats.FileStorage, Sources: []*nats.StreamSource{{Name: test.streamName}}, Replicas: 1, }); err != nil { t.Fatalf("Error adding source stream: %v", err) } // Check that TEST(n) has 1 consumer and that S(n) is created and has 1 message. checkFor(t, 2*time.Second, 100*time.Millisecond, func() error { si, err := js2.StreamInfo(test.sourceName) if err != nil { return fmt.Errorf("Could not get stream info: %v", err) } if si.State.Msgs != 1 { return fmt.Errorf("Expected 1 msg, got state: %+v", si.State) } return nil }) // Get the consumer because we will want to artificially reduce // the delete threshold. leader := sc.clusters[0].streamLeader("$G", test.streamName) mset, err := leader.GlobalAccount().lookupStream(test.streamName) if err != nil { t.Fatalf("Expected to find a stream for %q, got %v", test.streamName, err) } cons := mset.getConsumers()[0] cons.mu.Lock() cons.dthresh = 1250 * time.Millisecond active := cons.active dtimerSet := cons.dtmr != nil deliver := cons.cfg.DeliverSubject cons.mu.Unlock() if !active || dtimerSet { t.Fatalf("Invalid values for active=%v dtimerSet=%v", active, dtimerSet) } // To add to the mix, let's create a local interest on the delivery subject // and stop it. This is to ensure that this does not stop timers that should // still be running and monitor the GW interest. sub := natsSubSync(t, nc, deliver) natsFlush(t, nc) natsUnsub(t, sub) natsFlush(t, nc) // Now remove the "S(n)" stream... if err := js2.DeleteStream(test.sourceName); err != nil { t.Fatalf("Error deleting stream: %v", err) } // Now check that the stream S(n) is really removed and that // the consumer is gone for stream TEST(n). checkFor(t, 5*time.Second, 25*time.Millisecond, func() error { // First, make sure that stream S(n) has disappeared. if _, err := js2.StreamInfo(test.sourceName); err == nil { return fmt.Errorf("Stream %q should no longer exist", test.sourceName) } if ndc := mset.numDirectConsumers(); ndc != 0 { return fmt.Errorf("Expected %q stream to have 0 consumers, got %v", test.streamName, ndc) } return nil }) }) } } func TestJetStreamSuperClusterGetNextSubRace(t *testing.T) { sc := createJetStreamSuperClusterWithTemplate(t, jsClusterAccountsTempl, 2, 2) defer sc.shutdown() // Will connect the leafnode to cluster C1. We will then connect the "client" to cluster C2 to cross gateways. ln := sc.clusterForName("C1").createSingleLeafNodeNoSystemAccountAndEnablesJetStreamWithDomain("C", "nojs") defer ln.Shutdown() // Shutdown 1 of the server from C1, (the one LN is not connected to) for _, s := range sc.clusterForName("C1").servers { s.mu.Lock() if len(s.leafs) == 0 { s.mu.Unlock() s.Shutdown() break } s.mu.Unlock() } // Wait on meta leader in case shutdown of server above caused an election. sc.waitOnLeader() var c2Srv *Server // Take the server from C2 that has no inbound from C1. c2 := sc.clusterForName("C2") for _, s := range c2.servers { var gwsa [2]*client gws := gwsa[:0] s.getInboundGatewayConnections(&gws) if len(gws) == 0 { c2Srv = s break } } if c2Srv == nil { t.Fatalf("Both servers in C2 had an inbound GW connection!") } nc, js := jsClientConnectEx(t, c2Srv, "C", nats.UserInfo("nojs", "p")) defer nc.Close() _, err := js.AddStream(&nats.StreamConfig{Name: "foo"}) require_NoError(t, err) _, err = js.AddConsumer("foo", &nats.ConsumerConfig{Durable: "dur", AckPolicy: nats.AckExplicitPolicy}) require_NoError(t, err) for i := 0; i < 100; i++ { sendStreamMsg(t, nc, "foo", "ok") } // Wait for all messages to appear in the consumer checkFor(t, 2*time.Second, 50*time.Millisecond, func() error { ci, err := js.ConsumerInfo("foo", "dur") if err != nil { return err } if n := ci.NumPending; n != 100 { return fmt.Errorf("Expected 100 msgs, got %v", n) } return nil }) req := &JSApiConsumerGetNextRequest{Batch: 1, Expires: 5 * time.Second} jreq, err := json.Marshal(req) require_NoError(t, err) // Create this by hand here to make sure we create the subscription // on the reply subject for every single request nextSubj := fmt.Sprintf(JSApiRequestNextT, "foo", "dur") nextSubj = "$JS.C.API" + strings.TrimPrefix(nextSubj, "$JS.API") for i := 0; i < 100; i++ { inbox := nats.NewInbox() sub := natsSubSync(t, nc, inbox) natsPubReq(t, nc, nextSubj, inbox, jreq) msg := natsNexMsg(t, sub, time.Second) if len(msg.Header) != 0 && string(msg.Data) != "ok" { t.Fatalf("Unexpected message: header=%+v data=%s", msg.Header, msg.Data) } sub.Unsubscribe() } } func TestJetStreamSuperClusterPullConsumerAndHeaders(t *testing.T) { sc := createJetStreamSuperCluster(t, 3, 2) defer sc.shutdown() c1 := sc.clusterForName("C1") c2 := sc.clusterForName("C2") nc, js := jsClientConnect(t, c1.randomServer()) defer nc.Close() if _, err := js.AddStream(&nats.StreamConfig{Name: "ORIGIN"}); err != nil { t.Fatalf("Unexpected error: %v", err) } toSend := 50 for i := 0; i < toSend; i++ { if _, err := js.Publish("ORIGIN", []byte("ok")); err != nil { t.Fatalf("Unexpected publish error: %v", err) } } nc2, js2 := jsClientConnect(t, c2.randomServer()) defer nc2.Close() _, err := js2.AddStream(&nats.StreamConfig{ Name: "S", Sources: []*nats.StreamSource{{Name: "ORIGIN"}}, }) if err != nil { t.Fatalf("Unexpected error: %v", err) } // Wait for them to be in the sourced stream. checkFor(t, 5*time.Second, 250*time.Millisecond, func() error { if si, _ := js2.StreamInfo("S"); si.State.Msgs != uint64(toSend) { return fmt.Errorf("Expected %d msgs for %q, got %d", toSend, "S", si.State.Msgs) } return nil }) // Now create a pull consumer for the sourced stream. _, err = js2.AddConsumer("S", &nats.ConsumerConfig{Durable: "dlc", AckPolicy: nats.AckExplicitPolicy}) if err != nil { t.Fatalf("Unexpected error: %v", err) } // Now we will connect and request the next message from each server in C1 cluster and check that headers remain in place. for _, s := range c1.servers { nc, err := nats.Connect(s.ClientURL()) if err != nil { t.Fatalf("Unexpected error: %v", err) } defer nc.Close() m, err := nc.Request("$JS.API.CONSUMER.MSG.NEXT.S.dlc", nil, 2*time.Second) if err != nil { t.Fatalf("Unexpected error: %v", err) } if len(m.Header) != 1 { t.Fatalf("Expected 1 header element, got %+v", m.Header) } } } func TestJetStreamSuperClusterStatszActiveServers(t *testing.T) { sc := createJetStreamSuperCluster(t, 2, 2) defer sc.shutdown() checkActive := func(expected int) { t.Helper() checkFor(t, 10*time.Second, 500*time.Millisecond, func() error { s := sc.randomServer() nc, err := nats.Connect(s.ClientURL(), nats.UserInfo("admin", "s3cr3t!")) if err != nil { t.Fatalf("Failed to create system client: %v", err) } defer nc.Close() resp, err := nc.Request(serverStatsPingReqSubj, nil, time.Second) if err != nil { t.Fatalf("Unexpected error: %v", err) } var ssm ServerStatsMsg if err := json.Unmarshal(resp.Data, &ssm); err != nil { t.Fatalf("Unexpected error: %v", err) } if ssm.Stats.ActiveServers != expected { return fmt.Errorf("Wanted %d, got %d", expected, ssm.Stats.ActiveServers) } return nil }) } checkActive(4) c := sc.randomCluster() ss := c.randomServer() ss.Shutdown() checkActive(3) c.restartServer(ss) checkActive(4) } func TestJetStreamSuperClusterSourceAndMirrorConsumersLeaderChange(t *testing.T) { sc := createJetStreamSuperCluster(t, 3, 2) defer sc.shutdown() c1 := sc.clusterForName("C1") c2 := sc.clusterForName("C2") nc, js := jsClientConnect(t, c1.randomServer()) defer nc.Close() var sources []*nats.StreamSource numStreams := 10 for i := 1; i <= numStreams; i++ { name := fmt.Sprintf("O%d", i) sources = append(sources, &nats.StreamSource{Name: name}) if _, err := js.AddStream(&nats.StreamConfig{Name: name}); err != nil { t.Fatalf("Unexpected error: %v", err) } } // Place our new stream that will source all the others in different cluster. nc, js = jsClientConnect(t, c2.randomServer()) defer nc.Close() _, err := js.AddStream(&nats.StreamConfig{ Name: "S", Replicas: 2, Sources: sources, }) if err != nil { t.Fatalf("Unexpected error: %v", err) } // Force leader change twice. nc.Request(fmt.Sprintf(JSApiStreamLeaderStepDownT, "S"), nil, time.Second) c2.waitOnStreamLeader("$G", "S") nc.Request(fmt.Sprintf(JSApiStreamLeaderStepDownT, "S"), nil, time.Second) c2.waitOnStreamLeader("$G", "S") // Now make sure we only have a single direct consumer on our origin streams. // Pick one at random. name := fmt.Sprintf("O%d", rand.Intn(numStreams-1)+1) c1.waitOnStreamLeader("$G", name) s := c1.streamLeader("$G", name) a, err := s.lookupAccount("$G") if err != nil { t.Fatalf("Unexpected error: %v", err) } mset, err := a.lookupStream(name) if err != nil { t.Fatalf("Unexpected error: %v", err) } checkFor(t, 10*time.Second, 250*time.Millisecond, func() error { if ndc := mset.numDirectConsumers(); ndc != 1 { return fmt.Errorf("Stream %q wanted 1 direct consumer, got %d", name, ndc) } return nil }) // Now create a mirror of selected from above. Will test same scenario. _, err = js.AddStream(&nats.StreamConfig{ Name: "M", Replicas: 2, Mirror: &nats.StreamSource{Name: name}, }) if err != nil { t.Fatalf("Unexpected error: %v", err) } // Force leader change twice. nc.Request(fmt.Sprintf(JSApiStreamLeaderStepDownT, "M"), nil, time.Second) c2.waitOnStreamLeader("$G", "M") nc.Request(fmt.Sprintf(JSApiStreamLeaderStepDownT, "M"), nil, time.Second) c2.waitOnStreamLeader("$G", "M") checkFor(t, 10*time.Second, 250*time.Millisecond, func() error { if ndc := mset.numDirectConsumers(); ndc != 2 { return fmt.Errorf("Stream %q wanted 2 direct consumers, got %d", name, ndc) } return nil }) } func TestJetStreamSuperClusterPushConsumerInterest(t *testing.T) { sc := createJetStreamSuperCluster(t, 3, 2) defer sc.shutdown() for _, test := range []struct { name string queue string }{ {"non queue", _EMPTY_}, {"queue", "queue"}, } { t.Run(test.name, func(t *testing.T) { testInterest := func(s *Server) { t.Helper() nc, js := jsClientConnect(t, s) defer nc.Close() _, err := js.AddStream(&nats.StreamConfig{ Name: "TEST", Subjects: []string{"foo"}, Replicas: 3, }) require_NoError(t, err) var sub *nats.Subscription if test.queue != _EMPTY_ { sub, err = js.QueueSubscribeSync("foo", test.queue) } else { sub, err = js.SubscribeSync("foo", nats.Durable("dur")) } require_NoError(t, err) js.Publish("foo", []byte("msg1")) // Since the GW watcher is checking every 1sec, make sure we are // giving it enough time for the delivery to start. _, err = sub.NextMsg(2 * time.Second) require_NoError(t, err) } // Create the durable push consumer from cluster "0" testInterest(sc.clusters[0].servers[0]) // Now "move" to a server in cluster "1" testInterest(sc.clusters[1].servers[0]) }) } } func TestJetStreamSuperClusterOverflowPlacement(t *testing.T) { sc := createJetStreamSuperClusterWithTemplate(t, jsClusterMaxBytesTempl, 3, 3) defer sc.shutdown() pcn := "C2" s := sc.clusterForName(pcn).randomServer() nc, js := jsClientConnect(t, s) defer nc.Close() // With this setup, we opted in for requiring MaxBytes, so this should error. _, err := js.AddStream(&nats.StreamConfig{ Name: "foo", Replicas: 3, }) require_Error(t, err, NewJSStreamMaxBytesRequiredError()) // R=2 on purpose to leave one server empty. _, err = js.AddStream(&nats.StreamConfig{ Name: "foo", Replicas: 2, MaxBytes: 2 * 1024 * 1024 * 1024, }) require_NoError(t, err) // Now try to add another that will overflow the current cluster's reservation. // Since we asked explicitly for the same cluster this should fail. // Note this will not be testing the peer picker since the update has probably not made it to the meta leader. _, err = js.AddStream(&nats.StreamConfig{ Name: "bar", Replicas: 3, MaxBytes: 2 * 1024 * 1024 * 1024, Placement: &nats.Placement{Cluster: pcn}, }) require_Contains(t, err.Error(), "nats: no suitable peers for placement") // Now test actual overflow placement. So try again with no placement designation. // This will test the peer picker's logic since they are updated at this point and the meta leader // knows it can not place it in C2. si, err := js.AddStream(&nats.StreamConfig{ Name: "bar", Replicas: 3, MaxBytes: 2 * 1024 * 1024 * 1024, }) require_NoError(t, err) // Make sure we did not get place into C2. falt := si.Cluster.Name if falt == pcn { t.Fatalf("Expected to be placed in another cluster besides %q, but got %q", pcn, falt) } // One more time that should spill over again to our last cluster. si, err = js.AddStream(&nats.StreamConfig{ Name: "baz", Replicas: 3, MaxBytes: 2 * 1024 * 1024 * 1024, }) require_NoError(t, err) // Make sure we did not get place into C2. if salt := si.Cluster.Name; salt == pcn || salt == falt { t.Fatalf("Expected to be placed in last cluster besides %q or %q, but got %q", pcn, falt, salt) } // Now place a stream of R1 into C2 which should have space. si, err = js.AddStream(&nats.StreamConfig{ Name: "dlc", MaxBytes: 2 * 1024 * 1024 * 1024, }) require_NoError(t, err) if si.Cluster.Name != pcn { t.Fatalf("Expected to be placed in our origin cluster %q, but got %q", pcn, si.Cluster.Name) } } func TestJetStreamSuperClusterConcurrentOverflow(t *testing.T) { sc := createJetStreamSuperClusterWithTemplate(t, jsClusterMaxBytesTempl, 3, 3) defer sc.shutdown() pcn := "C2" startCh := make(chan bool) var wg sync.WaitGroup var swg sync.WaitGroup start := func(name string) { defer wg.Done() s := sc.clusterForName(pcn).randomServer() nc, js := jsClientConnect(t, s) defer nc.Close() swg.Done() <-startCh _, err := js.AddStream(&nats.StreamConfig{ Name: name, Replicas: 3, MaxBytes: 2 * 1024 * 1024 * 1024, }) require_NoError(t, err) } wg.Add(2) swg.Add(2) go start("foo") go start("bar") swg.Wait() // Now start both at same time. close(startCh) wg.Wait() } func TestJetStreamSuperClusterStreamTagPlacement(t *testing.T) { sc := createJetStreamTaggedSuperCluster(t) defer sc.shutdown() placeOK := func(connectCluster string, tags []string, expectedCluster string) { t.Helper() nc, js := jsClientConnect(t, sc.clusterForName(connectCluster).randomServer()) defer nc.Close() si, err := js.AddStream(&nats.StreamConfig{ Name: "TEST", Subjects: []string{"foo"}, Placement: &nats.Placement{Tags: tags}, }) require_NoError(t, err) if si.Cluster.Name != expectedCluster { t.Fatalf("Failed to place properly in %q, got %q", expectedCluster, si.Cluster.Name) } js.DeleteStream("TEST") } placeOK("C2", []string{"cloud:aws"}, "C1") placeOK("C2", []string{"country:jp"}, "C3") placeOK("C1", []string{"cloud:gcp", "country:uk"}, "C2") // Case shoud not matter. placeOK("C1", []string{"cloud:GCP", "country:UK"}, "C2") placeOK("C2", []string{"Cloud:AwS", "Country:uS"}, "C1") placeErr := func(connectCluster string, tags []string) { t.Helper() nc, js := jsClientConnect(t, sc.clusterForName(connectCluster).randomServer()) defer nc.Close() _, err := js.AddStream(&nats.StreamConfig{ Name: "TEST", Subjects: []string{"foo"}, Placement: &nats.Placement{Tags: tags}, }) require_Contains(t, err.Error(), "no suitable peers for placement", "tags not matched") require_Contains(t, err.Error(), tags...) } placeErr("C1", []string{"cloud:GCP", "country:US"}) placeErr("C1", []string{"country:DN"}) placeErr("C1", []string{"cloud:DO"}) } func TestJetStreamSuperClusterRemovedPeersAndStreamsListAndDelete(t *testing.T) { sc := createJetStreamSuperCluster(t, 3, 3) defer sc.shutdown() pcn := "C2" sc.waitOnLeader() ml := sc.leader() if ml.ClusterName() == pcn { pcn = "C1" } // Client based API nc, js := jsClientConnect(t, ml) defer nc.Close() _, err := js.AddStream(&nats.StreamConfig{ Name: "GONE", Replicas: 3, Placement: &nats.Placement{Cluster: pcn}, }) require_NoError(t, err) _, err = js.AddConsumer("GONE", &nats.ConsumerConfig{Durable: "dlc", AckPolicy: nats.AckExplicitPolicy}) require_NoError(t, err) _, err = js.AddStream(&nats.StreamConfig{ Name: "TEST", Replicas: 3, Placement: &nats.Placement{Cluster: ml.ClusterName()}, }) require_NoError(t, err) // Put messages in.. num := 100 for i := 0; i < num; i++ { js.PublishAsync("GONE", []byte("SLS")) js.PublishAsync("TEST", []byte("SLS")) } select { case <-js.PublishAsyncComplete(): case <-time.After(5 * time.Second): t.Fatalf("Did not receive completion signal") } c := sc.clusterForName(pcn) c.shutdown() // Grab Stream List.. start := time.Now() resp, err := nc.Request(JSApiStreamList, nil, 2*time.Second) require_NoError(t, err) if delta := time.Since(start); delta > 100*time.Millisecond { t.Fatalf("Stream list call took too long to return: %v", delta) } var list JSApiStreamListResponse err = json.Unmarshal(resp.Data, &list) require_NoError(t, err) if len(list.Missing) != 1 || list.Missing[0] != "GONE" { t.Fatalf("Wrong Missing: %+v", list) } // Check behavior of stream info as well. We want it to return the stream is offline and not just timeout. _, err = js.StreamInfo("GONE") // FIXME(dlc) - Go client not putting nats: prefix on for stream but does for consumer. require_Error(t, err, NewJSStreamOfflineError(), errors.New("nats: stream is offline")) // Same for Consumer start = time.Now() resp, err = nc.Request("$JS.API.CONSUMER.LIST.GONE", nil, 2*time.Second) require_NoError(t, err) if delta := time.Since(start); delta > 100*time.Millisecond { t.Fatalf("Consumer list call took too long to return: %v", delta) } var clist JSApiConsumerListResponse err = json.Unmarshal(resp.Data, &clist) require_NoError(t, err) if len(clist.Missing) != 1 || clist.Missing[0] != "dlc" { t.Fatalf("Wrong Missing: %+v", clist) } _, err = js.ConsumerInfo("GONE", "dlc") require_Error(t, err, NewJSConsumerOfflineError(), errors.New("nats: consumer is offline")) // Make sure delete works. err = js.DeleteConsumer("GONE", "dlc") require_NoError(t, err) err = js.DeleteStream("GONE") require_NoError(t, err) // Test it is really gone. _, err = js.StreamInfo("GONE") require_Error(t, err, nats.ErrStreamNotFound) } func TestJetStreamSuperClusterConsumerDeliverNewBug(t *testing.T) { sc := createJetStreamSuperCluster(t, 3, 3) defer sc.shutdown() pcn := "C2" sc.waitOnLeader() ml := sc.leader() if ml.ClusterName() == pcn { pcn = "C1" } // Client based API nc, js := jsClientConnect(t, ml) defer nc.Close() _, err := js.AddStream(&nats.StreamConfig{ Name: "T", Replicas: 3, Placement: &nats.Placement{Cluster: pcn}, }) require_NoError(t, err) // Put messages in.. num := 100 for i := 0; i < num; i++ { js.PublishAsync("T", []byte("OK")) } select { case <-js.PublishAsyncComplete(): case <-time.After(5 * time.Second): t.Fatalf("Did not receive completion signal") } ci, err := js.AddConsumer("T", &nats.ConsumerConfig{ Durable: "d", AckPolicy: nats.AckExplicitPolicy, DeliverPolicy: nats.DeliverNewPolicy, }) require_NoError(t, err) if ci.Delivered.Consumer != 0 || ci.Delivered.Stream != 100 { t.Fatalf("Incorrect consumer delivered info: %+v", ci.Delivered) } c := sc.clusterForName(pcn) for _, s := range c.servers { sd := s.JetStreamConfig().StoreDir s.Shutdown() removeDir(t, sd) s = c.restartServer(s) c.waitOnServerHealthz(s) c.waitOnConsumerLeader("$G", "T", "d") } c.waitOnConsumerLeader("$G", "T", "d") ci, err = js.ConsumerInfo("T", "d") require_NoError(t, err) if ci.Delivered.Consumer != 0 || ci.Delivered.Stream != 100 { t.Fatalf("Incorrect consumer delivered info: %+v", ci.Delivered) } if ci.NumPending != 0 { t.Fatalf("Did not expect NumPending, got %d", ci.NumPending) } } // This will test our ability to move streams and consumers between clusters. func TestJetStreamSuperClusterMovingStreamsAndConsumers(t *testing.T) { sc := createJetStreamTaggedSuperCluster(t) defer sc.shutdown() nc, js := jsClientConnect(t, sc.randomServer()) defer nc.Close() for _, test := range []struct { name string replicas int }{ {"R1", 1}, {"R3", 3}, } { t.Run(test.name, func(t *testing.T) { replicas := test.replicas si, err := js.AddStream(&nats.StreamConfig{ Name: "MOVE", Replicas: replicas, Placement: &nats.Placement{Tags: []string{"cloud:aws"}}, }) require_NoError(t, err) defer js.DeleteStream("MOVE") if si.Cluster.Name != "C1" { t.Fatalf("Failed to place properly in %q, got %q", "C1", si.Cluster.Name) } for i := 0; i < 1000; i++ { _, err := js.PublishAsync("MOVE", []byte("Moving on up")) require_NoError(t, err) } select { case <-js.PublishAsyncComplete(): case <-time.After(5 * time.Second): t.Fatalf("Did not receive completion signal") } // Durable Push Consumer, so same R. dpushSub, err := js.SubscribeSync("MOVE", nats.Durable("dlc")) require_NoError(t, err) defer dpushSub.Unsubscribe() // Ephemeral Push Consumer, R1. epushSub, err := js.SubscribeSync("MOVE") require_NoError(t, err) defer epushSub.Unsubscribe() // Durable Pull Consumer, so same R. dpullSub, err := js.PullSubscribe("MOVE", "dlc-pull") require_NoError(t, err) defer dpullSub.Unsubscribe() // TODO(dlc) - Server supports ephemeral pulls but Go client does not yet. si, err = js.StreamInfo("MOVE") require_NoError(t, err) if si.State.Consumers != 3 { t.Fatalf("Expected 3 attached consumers, got %d", si.State.Consumers) } initialState := si.State checkSubsPending(t, dpushSub, int(initialState.Msgs)) checkSubsPending(t, epushSub, int(initialState.Msgs)) // Ack 100 toAck := 100 for i := 0; i < toAck; i++ { m, err := dpushSub.NextMsg(time.Second) require_NoError(t, err) m.AckSync() // Ephemeral m, err = epushSub.NextMsg(time.Second) require_NoError(t, err) m.AckSync() } // Do same with pull subscriber. for _, m := range fetchMsgs(t, dpullSub, toAck, 5*time.Second) { m.AckSync() } // First make sure we disallow move and replica changes in same update. _, err = js.UpdateStream(&nats.StreamConfig{ Name: "MOVE", Placement: &nats.Placement{Tags: []string{"cloud:gcp"}}, Replicas: replicas + 1, }) require_Error(t, err, NewJSStreamMoveAndScaleError()) // Now move to new cluster. si, err = js.UpdateStream(&nats.StreamConfig{ Name: "MOVE", Replicas: replicas, Placement: &nats.Placement{Tags: []string{"cloud:gcp"}}, }) require_NoError(t, err) if si.Cluster.Name != "C1" { t.Fatalf("Expected cluster of %q but got %q", "C1", si.Cluster.Name) } // Make sure we can not move an inflight stream and consumers, should error. _, err = js.UpdateStream(&nats.StreamConfig{ Name: "MOVE", Replicas: replicas, Placement: &nats.Placement{Tags: []string{"cloud:aws"}}, }) require_Contains(t, err.Error(), "stream move already in progress") checkFor(t, 10*time.Second, 10*time.Millisecond, func() error { si, err := js.StreamInfo("MOVE") if err != nil { return err } // We should see 2X peers. numPeers := len(si.Cluster.Replicas) if si.Cluster.Leader != _EMPTY_ { numPeers++ } if numPeers != 2*replicas { // The move can happen very quick now, so we might already be done. if si.Cluster.Name == "C2" { return nil } return fmt.Errorf("Expected to see %d replicas, got %d", 2*replicas, numPeers) } return nil }) // Expect a new leader to emerge and replicas to drop as a leader is elected. // We have to check fast or it might complete and we will not see intermediate steps. sc.waitOnStreamLeader("$G", "MOVE") checkFor(t, 10*time.Second, 10*time.Millisecond, func() error { si, err := js.StreamInfo("MOVE") if err != nil { return err } if len(si.Cluster.Replicas) >= 2*replicas { return fmt.Errorf("Expected <%d replicas, got %d", 2*replicas, len(si.Cluster.Replicas)) } return nil }) // Should see the cluster designation and leader switch to C2. // We should also shrink back down to original replica count. checkFor(t, 20*time.Second, 100*time.Millisecond, func() error { si, err := js.StreamInfo("MOVE") if err != nil { return err } if si.Cluster.Name != "C2" { return fmt.Errorf("Wrong cluster: %q", si.Cluster.Name) } if si.Cluster.Leader == _EMPTY_ { return fmt.Errorf("No leader yet") } else if !strings.HasPrefix(si.Cluster.Leader, "C2-") { return fmt.Errorf("Wrong leader: %q", si.Cluster.Leader) } // Now we want to see that we shrink back to original. if len(si.Cluster.Replicas) != replicas-1 { return fmt.Errorf("Expected %d replicas, got %d", replicas-1, len(si.Cluster.Replicas)) } return nil }) // Check moved state is same as initial state. si, err = js.StreamInfo("MOVE") require_NoError(t, err) if !reflect.DeepEqual(si.State, initialState) { t.Fatalf("States do not match after migration:\n%+v\nvs\n%+v", si.State, initialState) } // Make sure we can still send messages. addN := toAck for i := 0; i < addN; i++ { _, err := js.Publish("MOVE", []byte("Done Moved")) require_NoError(t, err) } si, err = js.StreamInfo("MOVE") require_NoError(t, err) expectedPushMsgs := initialState.Msgs + uint64(addN) expectedPullMsgs := uint64(addN) if si.State.Msgs != expectedPushMsgs { t.Fatalf("Expected to be able to send new messages") } // Now check consumers, make sure the state is correct and that they transferred state and reflect the new messages. // We Ack'd 100 and sent another 100, so should be same. checkConsumer := func(sub *nats.Subscription, isPull bool) { t.Helper() checkFor(t, 10*time.Second, 100*time.Millisecond, func() error { ci, err := sub.ConsumerInfo() if err != nil { return err } var expectedMsgs uint64 if isPull { expectedMsgs = expectedPullMsgs } else { expectedMsgs = expectedPushMsgs } if ci.Delivered.Consumer != expectedMsgs || ci.Delivered.Stream != expectedMsgs { return fmt.Errorf("Delivered for %q is not correct: %+v", ci.Name, ci.Delivered) } if ci.AckFloor.Consumer != uint64(toAck) || ci.AckFloor.Stream != uint64(toAck) { return fmt.Errorf("AckFloor for %q is not correct: %+v", ci.Name, ci.AckFloor) } if isPull && ci.NumAckPending != 0 { return fmt.Errorf("NumAckPending for %q is not correct: %v", ci.Name, ci.NumAckPending) } else if !isPull && ci.NumAckPending != int(initialState.Msgs) { return fmt.Errorf("NumAckPending for %q is not correct: %v", ci.Name, ci.NumAckPending) } // Make sure the replicas etc are back to what is expected. si, err := js.StreamInfo("MOVE") if err != nil { return err } numExpected := si.Config.Replicas if ci.Config.Durable == _EMPTY_ { numExpected = 1 } numPeers := len(ci.Cluster.Replicas) if ci.Cluster.Leader != _EMPTY_ { numPeers++ } if numPeers != numExpected { return fmt.Errorf("Expected %d peers, got %d", numExpected, numPeers) } // If we are push check sub pending. if !isPull { checkSubsPending(t, sub, int(expectedPushMsgs)-toAck) } return nil }) } checkPushConsumer := func(sub *nats.Subscription) { t.Helper() checkConsumer(sub, false) } checkPullConsumer := func(sub *nats.Subscription) { t.Helper() checkConsumer(sub, true) } checkPushConsumer(dpushSub) checkPushConsumer(epushSub) checkPullConsumer(dpullSub) // Cleanup err = js.DeleteStream("MOVE") require_NoError(t, err) }) } } func TestJetStreamSuperClusterMovingStreamsWithMirror(t *testing.T) { sc := createJetStreamTaggedSuperCluster(t) defer sc.shutdown() nc, js := jsClientConnect(t, sc.randomServer()) defer nc.Close() _, err := js.AddStream(&nats.StreamConfig{ Name: "SOURCE", Subjects: []string{"foo", "bar"}, Replicas: 3, Placement: &nats.Placement{Tags: []string{"cloud:aws"}}, }) require_NoError(t, err) _, err = js.AddStream(&nats.StreamConfig{ Name: "MIRROR", Replicas: 1, Mirror: &nats.StreamSource{Name: "SOURCE"}, Placement: &nats.Placement{Tags: []string{"cloud:gcp"}}, }) require_NoError(t, err) done := make(chan struct{}) exited := make(chan struct{}) errors := make(chan error, 1) numNoResp := uint64(0) // We will run a separate routine and send at 100hz go func() { nc, js := jsClientConnect(t, sc.randomServer()) defer nc.Close() defer close(exited) for { select { case <-done: return case <-time.After(10 * time.Millisecond): _, err := js.Publish("foo", []byte("100HZ")) if err == nil { } else if err == nats.ErrNoStreamResponse { atomic.AddUint64(&numNoResp, 1) continue } if err != nil { errors <- err return } } } }() // Let it get going. time.Sleep(500 * time.Millisecond) // Now move the source to a new cluster. _, err = js.UpdateStream(&nats.StreamConfig{ Name: "SOURCE", Subjects: []string{"foo", "bar"}, Replicas: 3, Placement: &nats.Placement{Tags: []string{"cloud:gcp"}}, }) require_NoError(t, err) checkFor(t, 30*time.Second, 100*time.Millisecond, func() error { si, err := js.StreamInfo("SOURCE") if err != nil { return err } if si.Cluster.Name != "C2" { return fmt.Errorf("Wrong cluster: %q", si.Cluster.Name) } if si.Cluster.Leader == _EMPTY_ { return fmt.Errorf("No leader yet") } else if !strings.HasPrefix(si.Cluster.Leader, "C2-") { return fmt.Errorf("Wrong leader: %q", si.Cluster.Leader) } // Now we want to see that we shrink back to original. if len(si.Cluster.Replicas) != 2 { return fmt.Errorf("Expected %d replicas, got %d", 2, len(si.Cluster.Replicas)) } // Let's get to 50+ msgs. if si.State.Msgs < 50 { return fmt.Errorf("Only see %d msgs", si.State.Msgs) } return nil }) close(done) <-exited if nnr := atomic.LoadUint64(&numNoResp); nnr > 0 { if nnr > 5 { t.Fatalf("Expected no or very few failed message publishes, got %d", nnr) } else { t.Logf("Got a few failed publishes: %d", nnr) } } checkFor(t, 20*time.Second, 100*time.Millisecond, func() error { si, err := js.StreamInfo("SOURCE") require_NoError(t, err) mi, err := js.StreamInfo("MIRROR") require_NoError(t, err) if !reflect.DeepEqual(si.State, mi.State) { return fmt.Errorf("Expected mirror to be the same, got %+v vs %+v", mi.State, si.State) } return nil }) } func TestJetStreamSuperClusterMovingStreamAndMoveBack(t *testing.T) { sc := createJetStreamTaggedSuperCluster(t) defer sc.shutdown() nc, js := jsClientConnect(t, sc.randomServer()) defer nc.Close() for _, test := range []struct { name string replicas int }{ {"R1", 1}, {"R3", 3}, } { t.Run(test.name, func(t *testing.T) { js.DeleteStream("TEST") _, err := js.AddStream(&nats.StreamConfig{ Name: "TEST", Replicas: test.replicas, Placement: &nats.Placement{Tags: []string{"cloud:aws"}}, }) require_NoError(t, err) toSend := 10_000 for i := 0; i < toSend; i++ { _, err := js.Publish("TEST", []byte("HELLO WORLD")) require_NoError(t, err) } _, err = js.UpdateStream(&nats.StreamConfig{ Name: "TEST", Replicas: test.replicas, Placement: &nats.Placement{Tags: []string{"cloud:gcp"}}, }) require_NoError(t, err) checkMove := func(cluster string) { t.Helper() sc.waitOnStreamLeader("$G", "TEST") checkFor(t, 20*time.Second, 100*time.Millisecond, func() error { si, err := js.StreamInfo("TEST") if err != nil { return err } if si.Cluster.Name != cluster { return fmt.Errorf("Wrong cluster: %q", si.Cluster.Name) } if si.Cluster.Leader == _EMPTY_ { return fmt.Errorf("No leader yet") } else if !strings.HasPrefix(si.Cluster.Leader, cluster) { return fmt.Errorf("Wrong leader: %q", si.Cluster.Leader) } // Now we want to see that we shrink back to original. if len(si.Cluster.Replicas) != test.replicas-1 { return fmt.Errorf("Expected %d replicas, got %d", test.replicas-1, len(si.Cluster.Replicas)) } if si.State.Msgs != uint64(toSend) { return fmt.Errorf("Only see %d msgs", si.State.Msgs) } return nil }) } checkMove("C2") _, err = js.UpdateStream(&nats.StreamConfig{ Name: "TEST", Replicas: test.replicas, Placement: &nats.Placement{Tags: []string{"cloud:aws"}}, }) require_NoError(t, err) checkMove("C1") }) } } func TestJetStreamSuperClusterImportConsumerStreamSubjectRemap(t *testing.T) { template := ` listen: 127.0.0.1:-1 server_name: %s jetstream: {max_mem_store: 256MB, max_file_store: 2GB, domain: HUB, store_dir: '%s'} cluster { name: %s listen: 127.0.0.1:%d routes = [%s] } accounts: { JS: { jetstream: enabled users: [ {user: js, password: pwd} ] exports [ # This is streaming to a delivery subject for a push based consumer. { stream: "deliver.ORDERS.*" } # This is to ack received messages. This is a service to support sync ack. { service: "$JS.ACK.ORDERS.*.>" } # To support ordered consumers, flow control. { service: "$JS.FC.>" } ] }, IM: { users: [ {user: im, password: pwd} ] imports [ { stream: { account: JS, subject: "deliver.ORDERS.route" }} { stream: { account: JS, subject: "deliver.ORDERS.gateway" }} { stream: { account: JS, subject: "deliver.ORDERS.leaf1" }} { stream: { account: JS, subject: "deliver.ORDERS.leaf2" }} { service: {account: JS, subject: "$JS.FC.>" }} ] }, $SYS { users = [ { user: "admin", pass: "s3cr3t!" } ] }, } leaf { listen: 127.0.0.1:-1 }` test := func(t *testing.T, queue bool) { c := createJetStreamSuperClusterWithTemplate(t, template, 3, 2) defer c.shutdown() s := c.randomServer() nc, js := jsClientConnect(t, s, nats.UserInfo("js", "pwd")) defer nc.Close() _, err := js.AddStream(&nats.StreamConfig{ Name: "ORDERS", Subjects: []string{"foo"}, // The JS subject. Replicas: 3, Placement: &nats.Placement{Cluster: "C1"}, }) require_NoError(t, err) _, err = js.Publish("foo", []byte("OK")) require_NoError(t, err) for dur, deliver := range map[string]string{ "dur-route": "deliver.ORDERS.route", "dur-gateway": "deliver.ORDERS.gateway", "dur-leaf-1": "deliver.ORDERS.leaf1", "dur-leaf-2": "deliver.ORDERS.leaf2", } { cfg := &nats.ConsumerConfig{ Durable: dur, DeliverSubject: deliver, AckPolicy: nats.AckExplicitPolicy, } if queue { cfg.DeliverGroup = "queue" } _, err = js.AddConsumer("ORDERS", cfg) require_NoError(t, err) } testCase := func(t *testing.T, s *Server, dSubj string) { nc2, err := nats.Connect(s.ClientURL(), nats.UserInfo("im", "pwd")) require_NoError(t, err) defer nc2.Close() var sub *nats.Subscription if queue { sub, err = nc2.QueueSubscribeSync(dSubj, "queue") } else { sub, err = nc2.SubscribeSync(dSubj) } require_NoError(t, err) m, err := sub.NextMsg(time.Second) require_NoError(t, err) if m.Subject != "foo" { t.Fatalf("Subject not mapped correctly across account boundary, expected %q got %q", "foo", m.Subject) } require_False(t, strings.Contains(m.Reply, "@")) } t.Run("route", func(t *testing.T) { // pick random non consumer leader so we receive via route s := c.clusterForName("C1").randomNonConsumerLeader("JS", "ORDERS", "dur-route") testCase(t, s, "deliver.ORDERS.route") }) t.Run("gateway", func(t *testing.T) { // pick server with inbound gateway from consumer leader, so we receive from gateway and have no route in between scl := c.clusterForName("C1").consumerLeader("JS", "ORDERS", "dur-gateway") var sfound *Server for _, s := range c.clusterForName("C2").servers { s.mu.Lock() for _, c := range s.gateway.in { if c.GetName() == scl.info.ID { sfound = s break } } s.mu.Unlock() if sfound != nil { break } } testCase(t, sfound, "deliver.ORDERS.gateway") }) t.Run("leaf-post-export", func(t *testing.T) { // create leaf node server connected post export/import scl := c.clusterForName("C1").consumerLeader("JS", "ORDERS", "dur-leaf-1") cf := createConfFile(t, []byte(fmt.Sprintf(` port: -1 leafnodes { remotes [ { url: "nats://im:pwd@127.0.0.1:%d" } ] } authorization: { user: im, password: pwd } `, scl.getOpts().LeafNode.Port))) s, _ := RunServerWithConfig(cf) defer s.Shutdown() checkLeafNodeConnected(t, scl) testCase(t, s, "deliver.ORDERS.leaf1") }) t.Run("leaf-pre-export", func(t *testing.T) { // create leaf node server connected pre export, perform export/import on leaf node server scl := c.clusterForName("C1").consumerLeader("JS", "ORDERS", "dur-leaf-2") cf := createConfFile(t, []byte(fmt.Sprintf(` port: -1 leafnodes { remotes [ { url: "nats://js:pwd@127.0.0.1:%d", account: JS2 } ] } accounts: { JS2: { users: [ {user: js, password: pwd} ] exports [ # This is streaming to a delivery subject for a push based consumer. { stream: "deliver.ORDERS.leaf2" } # This is to ack received messages. This is a service to support sync ack. { service: "$JS.ACK.ORDERS.*.>" } # To support ordered consumers, flow control. { service: "$JS.FC.>" } ] }, IM2: { users: [ {user: im, password: pwd} ] imports [ { stream: { account: JS2, subject: "deliver.ORDERS.leaf2" }} { service: {account: JS2, subject: "$JS.FC.>" }} ] }, } `, scl.getOpts().LeafNode.Port))) s, _ := RunServerWithConfig(cf) defer s.Shutdown() checkLeafNodeConnected(t, scl) testCase(t, s, "deliver.ORDERS.leaf2") }) } t.Run("noQueue", func(t *testing.T) { test(t, false) }) t.Run("queue", func(t *testing.T) { test(t, true) }) } func TestJetStreamSuperClusterMaxHaAssets(t *testing.T) { sc := createJetStreamSuperClusterWithTemplateAndModHook(t, ` listen: 127.0.0.1:-1 server_name: %s jetstream: {max_mem_store: 256MB, max_file_store: 2GB, store_dir: '%s', limits: {max_ha_assets: 2}} cluster { name: %s listen: 127.0.0.1:%d routes = [%s] } accounts { $SYS { users = [ { user: "admin", pass: "s3cr3t!" } ] } } `, 3, 2, func(serverName, clusterName, storeDir, conf string) string { return conf }, nil) defer sc.shutdown() // speed up statsz reporting for _, c := range sc.clusters { for _, s := range c.servers { s.mu.Lock() s.sys.statsz = 10 * time.Millisecond s.sys.cstatsz = s.sys.statsz s.sys.stmr.Reset(s.sys.statsz) s.mu.Unlock() } } nc, js := jsClientConnect(t, sc.randomServer()) defer nc.Close() ncSys := natsConnect(t, sc.randomServer().ClientURL(), nats.UserInfo("admin", "s3cr3t!")) defer ncSys.Close() statszSub, err := ncSys.SubscribeSync(fmt.Sprintf(serverStatsSubj, "*")) require_NoError(t, err) require_NoError(t, ncSys.Flush()) waitStatsz := func(peers, haassets int) { t.Helper() for peersWithExactHaAssets := 0; peersWithExactHaAssets < peers; { m, err := statszSub.NextMsg(time.Second) require_NoError(t, err) var statsz ServerStatsMsg err = json.Unmarshal(m.Data, &statsz) require_NoError(t, err) if statsz.Stats.JetStream == nil { continue } if haassets == statsz.Stats.JetStream.Stats.HAAssets { peersWithExactHaAssets++ } } } waitStatsz(6, 1) // counts _meta_ _, err = js.AddStream(&nats.StreamConfig{Name: "S0", Replicas: 1, Placement: &nats.Placement{Cluster: "C1"}}) require_NoError(t, err) waitStatsz(6, 1) _, err = js.AddStream(&nats.StreamConfig{Name: "S1", Replicas: 3, Placement: &nats.Placement{Cluster: "C1"}}) require_NoError(t, err) waitStatsz(3, 2) waitStatsz(3, 1) _, err = js.AddStream(&nats.StreamConfig{Name: "S2", Replicas: 3, Placement: &nats.Placement{Cluster: "C1"}}) require_NoError(t, err) waitStatsz(3, 3) waitStatsz(3, 1) _, err = js.AddStream(&nats.StreamConfig{Name: "S3", Replicas: 3, Placement: &nats.Placement{Cluster: "C1"}}) require_Error(t, err) require_Contains(t, err.Error(), "nats: no suitable peers for placement") require_Contains(t, err.Error(), "miscellaneous issue") require_NoError(t, js.DeleteStream("S1")) waitStatsz(3, 2) waitStatsz(3, 1) _, err = js.AddConsumer("S2", &nats.ConsumerConfig{Durable: "DUR1", AckPolicy: nats.AckExplicitPolicy}) require_NoError(t, err) waitStatsz(3, 3) waitStatsz(3, 1) _, err = js.AddConsumer("S2", &nats.ConsumerConfig{Durable: "DUR2", AckPolicy: nats.AckExplicitPolicy}) require_Error(t, err) require_Equal(t, err.Error(), "nats: insufficient resources") _, err = js.AddConsumer("S2", &nats.ConsumerConfig{AckPolicy: nats.AckExplicitPolicy}) require_NoError(t, err) waitStatsz(3, 3) waitStatsz(3, 1) _, err = js.UpdateStream(&nats.StreamConfig{Name: "S2", Replicas: 3, Description: "foobar"}) require_NoError(t, err) waitStatsz(3, 3) waitStatsz(3, 1) si, err := js.AddStream(&nats.StreamConfig{Name: "S4", Replicas: 3}) require_NoError(t, err) require_Equal(t, si.Cluster.Name, "C2") waitStatsz(3, 3) waitStatsz(3, 2) si, err = js.AddStream(&nats.StreamConfig{Name: "S5", Replicas: 3}) require_NoError(t, err) require_Equal(t, si.Cluster.Name, "C2") waitStatsz(6, 3) _, err = js.AddConsumer("S4", &nats.ConsumerConfig{Durable: "DUR2", AckPolicy: nats.AckExplicitPolicy}) require_Error(t, err) require_Equal(t, err.Error(), "nats: insufficient resources") _, err = js.UpdateStream(&nats.StreamConfig{Name: "S2", Replicas: 3, Placement: &nats.Placement{Cluster: "C2"}}) require_Error(t, err) require_Contains(t, err.Error(), "nats: no suitable peers for placement") require_Contains(t, err.Error(), "miscellaneous issue") } func TestJetStreamSuperClusterStreamAlternates(t *testing.T) { sc := createJetStreamTaggedSuperCluster(t) defer sc.shutdown() nc, js := jsClientConnect(t, sc.randomServer()) defer nc.Close() // C1 _, err := js.AddStream(&nats.StreamConfig{ Name: "SOURCE", Subjects: []string{"foo", "bar", "baz"}, Replicas: 3, Placement: &nats.Placement{Tags: []string{"cloud:aws", "country:us"}}, }) require_NoError(t, err) // C2 _, err = js.AddStream(&nats.StreamConfig{ Name: "MIRROR-1", Replicas: 1, Mirror: &nats.StreamSource{Name: "SOURCE"}, Placement: &nats.Placement{Tags: []string{"cloud:gcp", "country:uk"}}, }) require_NoError(t, err) // C3 _, err = js.AddStream(&nats.StreamConfig{ Name: "MIRROR-2", Replicas: 2, Mirror: &nats.StreamSource{Name: "SOURCE"}, Placement: &nats.Placement{Tags: []string{"cloud:az", "country:jp"}}, }) require_NoError(t, err) // No client support yet, so do by hand. getStreamInfo := func(nc *nats.Conn, expected string) { t.Helper() resp, err := nc.Request(fmt.Sprintf(JSApiStreamInfoT, "SOURCE"), nil, time.Second) require_NoError(t, err) var si StreamInfo err = json.Unmarshal(resp.Data, &si) require_NoError(t, err) require_True(t, len(si.Alternates) == 3) require_True(t, si.Alternates[0].Cluster == expected) seen := make(map[string]struct{}) for _, alt := range si.Alternates { seen[alt.Cluster] = struct{}{} } require_True(t, len(seen) == 3) } // Connect to different clusters to check ordering. nc, _ = jsClientConnect(t, sc.clusterForName("C1").randomServer()) defer nc.Close() getStreamInfo(nc, "C1") nc, _ = jsClientConnect(t, sc.clusterForName("C2").randomServer()) defer nc.Close() getStreamInfo(nc, "C2") nc, _ = jsClientConnect(t, sc.clusterForName("C3").randomServer()) defer nc.Close() getStreamInfo(nc, "C3") } // We had a scenario where a consumer would not recover properly on restart due to // the cluster state not being set properly when checking source subjects. func TestJetStreamSuperClusterStateOnRestartPreventsConsumerRecovery(t *testing.T) { sc := createJetStreamTaggedSuperCluster(t) defer sc.shutdown() nc, js := jsClientConnect(t, sc.randomServer()) defer nc.Close() // C1 _, err := js.AddStream(&nats.StreamConfig{ Name: "SOURCE", Subjects: []string{"foo", "bar"}, Replicas: 3, Placement: &nats.Placement{Tags: []string{"cloud:aws", "country:us"}}, }) require_NoError(t, err) // C2 _, err = js.AddStream(&nats.StreamConfig{ Name: "DS", Subjects: []string{"baz"}, Replicas: 3, Sources: []*nats.StreamSource{{Name: "SOURCE"}}, Placement: &nats.Placement{Tags: []string{"cloud:gcp", "country:uk"}}, }) require_NoError(t, err) // Bind to DS and match filter subject of SOURCE. _, err = js.AddConsumer("DS", &nats.ConsumerConfig{ Durable: "dlc", AckPolicy: nats.AckExplicitPolicy, FilterSubject: "foo", DeliverSubject: "d", }) require_NoError(t, err) // Send a few messages. for i := 0; i < 100; i++ { _, err := js.Publish("foo", []byte("HELLO")) require_NoError(t, err) } sub := natsSubSync(t, nc, "d") natsNexMsg(t, sub, time.Second) c := sc.clusterForName("C2") cl := c.consumerLeader("$G", "DS", "dlc") // Pull source out from underneath the downstream stream. err = js.DeleteStream("SOURCE") require_NoError(t, err) cl.Shutdown() cl = c.restartServer(cl) c.waitOnServerHealthz(cl) // Now make sure the consumer is still on this server and has restarted properly. mset, err := cl.GlobalAccount().lookupStream("DS") require_NoError(t, err) if o := mset.lookupConsumer("dlc"); o == nil { t.Fatalf("Consumer was not properly restarted") } } // We allow mirrors to opt-in to direct get in a distributed queue group. func TestJetStreamSuperClusterStreamDirectGetMirrorQueueGroup(t *testing.T) { sc := createJetStreamTaggedSuperCluster(t) defer sc.shutdown() nc, js := jsClientConnect(t, sc.randomServer()) defer nc.Close() // C1 // Do by hand for now. cfg := &StreamConfig{ Name: "SOURCE", Subjects: []string{"kv.>"}, MaxMsgsPer: 1, Placement: &Placement{Tags: []string{"cloud:aws", "country:us"}}, AllowDirect: true, Replicas: 3, Storage: MemoryStorage, } addStream(t, nc, cfg) num := 100 for i := 0; i < num; i++ { js.PublishAsync(fmt.Sprintf("kv.%d", i), []byte("VAL")) } select { case <-js.PublishAsyncComplete(): case <-time.After(5 * time.Second): t.Fatalf("Did not receive completion signal") } // C2 cfg = &StreamConfig{ Name: "M1", Mirror: &StreamSource{Name: "SOURCE"}, Placement: &Placement{Tags: []string{"cloud:gcp", "country:uk"}}, MirrorDirect: true, Storage: MemoryStorage, } addStream(t, nc, cfg) // C3 (clustered) cfg = &StreamConfig{ Name: "M2", Mirror: &StreamSource{Name: "SOURCE"}, Replicas: 3, Placement: &Placement{Tags: []string{"country:jp"}}, MirrorDirect: true, Storage: MemoryStorage, } addStream(t, nc, cfg) checkFor(t, 5*time.Second, 100*time.Millisecond, func() error { si, err := js.StreamInfo("M2") require_NoError(t, err) if si.State.Msgs != uint64(num) { return fmt.Errorf("Expected %d msgs, got state: %d", num, si.State.Msgs) } return nil }) // Since last one was an R3, check and wait for the direct subscription. checkFor(t, 2*time.Second, 100*time.Millisecond, func() error { sl := sc.clusterForName("C3").streamLeader("$G", "M2") if mset, err := sl.GlobalAccount().lookupStream("M2"); err == nil { mset.mu.RLock() ok := mset.mirror.dsub != nil mset.mu.RUnlock() if ok { return nil } } return fmt.Errorf("No dsub yet") }) // Always do a direct get to the source, but check that we are getting answers from the mirrors when connected to their cluster. getSubj := fmt.Sprintf(JSDirectMsgGetT, "SOURCE") req := []byte(`{"last_by_subj":"kv.22"}`) getMsg := func(c *nats.Conn) *nats.Msg { m, err := c.Request(getSubj, req, time.Second) require_NoError(t, err) require_True(t, string(m.Data) == "VAL") require_True(t, m.Header.Get(JSSequence) == "23") require_True(t, m.Header.Get(JSSubject) == "kv.22") return m } // C1 -> SOURCE nc, _ = jsClientConnect(t, sc.clusterForName("C1").randomServer()) defer nc.Close() m := getMsg(nc) require_True(t, m.Header.Get(JSStream) == "SOURCE") // C2 -> M1 nc, _ = jsClientConnect(t, sc.clusterForName("C2").randomServer()) defer nc.Close() m = getMsg(nc) require_True(t, m.Header.Get(JSStream) == "M1") // C3 -> M2 nc, _ = jsClientConnect(t, sc.clusterForName("C3").randomServer()) defer nc.Close() m = getMsg(nc) require_True(t, m.Header.Get(JSStream) == "M2") } func TestJetStreamSuperClusterTagInducedMoveCancel(t *testing.T) { server := map[string]struct{}{} sc := createJetStreamSuperClusterWithTemplateAndModHook(t, jsClusterTempl, 4, 2, func(serverName, clusterName, storeDir, conf string) string { server[serverName] = struct{}{} return fmt.Sprintf("%s\nserver_tags: [%s]", conf, clusterName) }, nil) defer sc.shutdown() // Client based API c := sc.randomCluster() srv := c.randomNonLeader() nc, js := jsClientConnect(t, srv) defer nc.Close() cfg := &nats.StreamConfig{ Name: "TEST", Subjects: []string{"foo"}, Placement: &nats.Placement{Tags: []string{"C1"}}, Replicas: 3, } siCreate, err := js.AddStream(cfg) require_NoError(t, err) require_Equal(t, siCreate.Cluster.Name, "C1") toSend := uint64(1_000) for i := uint64(0); i < toSend; i++ { _, err = js.Publish("foo", nil) require_NoError(t, err) } ncsys, err := nats.Connect(srv.ClientURL(), nats.UserInfo("admin", "s3cr3t!")) require_NoError(t, err) defer ncsys.Close() // cause a move by altering placement tags cfg.Placement.Tags = []string{"C2"} _, err = js.UpdateStream(cfg) require_NoError(t, err) rmsg, err := ncsys.Request(fmt.Sprintf(JSApiServerStreamCancelMoveT, "$G", "TEST"), nil, 5*time.Second) require_NoError(t, err) var cancelResp JSApiStreamUpdateResponse require_NoError(t, json.Unmarshal(rmsg.Data, &cancelResp)) if cancelResp.Error != nil && ErrorIdentifier(cancelResp.Error.ErrCode) == JSStreamMoveNotInProgress { t.Skip("This can happen with delays, when Move completed before Cancel", cancelResp.Error) return } require_True(t, cancelResp.Error == nil) checkFor(t, 10*time.Second, 250*time.Millisecond, func() error { si, err := js.StreamInfo("TEST") require_NoError(t, err) if si.Config.Placement != nil { return fmt.Errorf("expected placement to be cleared got: %+v", si.Config.Placement) } return nil }) } func TestJetStreamSuperClusterMoveCancel(t *testing.T) { usageTickOld := usageTick usageTick = 250 * time.Millisecond defer func() { usageTick = usageTickOld }() server := map[string]struct{}{} sc := createJetStreamSuperClusterWithTemplateAndModHook(t, jsClusterTempl, 4, 2, func(serverName, clusterName, storeDir, conf string) string { server[serverName] = struct{}{} return fmt.Sprintf("%s\nserver_tags: [%s]", conf, serverName) }, nil) defer sc.shutdown() // Client based API c := sc.randomCluster() srv := c.randomNonLeader() nc, js := jsClientConnect(t, srv) defer nc.Close() siCreate, err := js.AddStream(&nats.StreamConfig{ Name: "TEST", Subjects: []string{"foo"}, Replicas: 3, }) require_NoError(t, err) streamPeerSrv := []string{siCreate.Cluster.Leader, siCreate.Cluster.Replicas[0].Name, siCreate.Cluster.Replicas[1].Name} // determine empty server for _, s := range streamPeerSrv { delete(server, s) } // pick left over server in same cluster as other server emptySrv := _EMPTY_ for s := range server { // server name is prefixed with cluster name if strings.HasPrefix(s, c.name) { emptySrv = s break } } expectedPeers := map[string]struct{}{ getHash(streamPeerSrv[0]): {}, getHash(streamPeerSrv[1]): {}, getHash(streamPeerSrv[2]): {}, } _, err = js.AddConsumer("TEST", &nats.ConsumerConfig{Durable: "DUR", AckPolicy: nats.AckExplicitPolicy}) require_NoError(t, err) ci, err := js.AddConsumer("TEST", &nats.ConsumerConfig{InactiveThreshold: time.Hour, AckPolicy: nats.AckExplicitPolicy}) require_NoError(t, err) ephName := ci.Name toSend := uint64(1_000) for i := uint64(0); i < toSend; i++ { _, err = js.Publish("foo", nil) require_NoError(t, err) } serverEmpty := func(fromSrv string) error { if jszAfter, err := c.serverByName(fromSrv).Jsz(nil); err != nil { return fmt.Errorf("could not fetch JS info for server: %v", err) } else if jszAfter.Streams != 0 { return fmt.Errorf("empty server still has %d streams", jszAfter.Streams) } else if jszAfter.Consumers != 0 { return fmt.Errorf("empty server still has %d consumers", jszAfter.Consumers) } else if jszAfter.Bytes != 0 { return fmt.Errorf("empty server still has %d storage", jszAfter.Store) } return nil } checkSrvInvariant := func(s *Server, expectedPeers map[string]struct{}) error { js, cc := s.getJetStreamCluster() js.mu.Lock() defer js.mu.Unlock() if sa, ok := cc.streams["$G"]["TEST"]; !ok { return fmt.Errorf("stream not found") } else if len(sa.Group.Peers) != len(expectedPeers) { return fmt.Errorf("stream peer group size not %d, but %d", len(expectedPeers), len(sa.Group.Peers)) } else if da, ok := sa.consumers["DUR"]; !ok { return fmt.Errorf("durable not found") } else if len(da.Group.Peers) != len(expectedPeers) { return fmt.Errorf("durable peer group size not %d, but %d", len(expectedPeers), len(da.Group.Peers)) } else if ea, ok := sa.consumers[ephName]; !ok { return fmt.Errorf("ephemeral not found") } else if len(ea.Group.Peers) != 1 { return fmt.Errorf("ephemeral peer group size not 1, but %d", len(ea.Group.Peers)) } else if _, ok := expectedPeers[ea.Group.Peers[0]]; !ok { return fmt.Errorf("ephemeral peer not an expected peer") } else { for _, p := range sa.Group.Peers { if _, ok := expectedPeers[p]; !ok { return fmt.Errorf("peer not expected") } found := false for _, dp := range da.Group.Peers { if p == dp { found = true break } } if !found { t.Logf("durable peer group does not match stream peer group") } } } return nil } ncsys, err := nats.Connect(srv.ClientURL(), nats.UserInfo("admin", "s3cr3t!")) require_NoError(t, err) defer ncsys.Close() time.Sleep(2 * usageTick) aiBefore, err := js.AccountInfo() require_NoError(t, err) for _, moveFromSrv := range streamPeerSrv { moveReq, err := json.Marshal(&JSApiMetaServerStreamMoveRequest{Server: moveFromSrv, Tags: []string{emptySrv}}) require_NoError(t, err) rmsg, err := ncsys.Request(fmt.Sprintf(JSApiServerStreamMoveT, "$G", "TEST"), moveReq, 5*time.Second) require_NoError(t, err) var moveResp JSApiStreamUpdateResponse require_NoError(t, json.Unmarshal(rmsg.Data, &moveResp)) require_True(t, moveResp.Error == nil) rmsg, err = ncsys.Request(fmt.Sprintf(JSApiServerStreamCancelMoveT, "$G", "TEST"), nil, 5*time.Second) require_NoError(t, err) var cancelResp JSApiStreamUpdateResponse require_NoError(t, json.Unmarshal(rmsg.Data, &cancelResp)) if cancelResp.Error != nil && ErrorIdentifier(cancelResp.Error.ErrCode) == JSStreamMoveNotInProgress { t.Skip("This can happen with delays, when Move completed before Cancel", cancelResp.Error) return } require_True(t, cancelResp.Error == nil) for _, sExpected := range streamPeerSrv { s := c.serverByName(sExpected) require_True(t, s.JetStreamIsStreamAssigned("$G", "TEST")) checkFor(t, 20*time.Second, 100*time.Millisecond, func() error { return checkSrvInvariant(s, expectedPeers) }) } checkFor(t, 10*time.Second, 100*time.Millisecond, func() error { return serverEmpty(emptySrv) }) checkFor(t, 3*usageTick, 100*time.Millisecond, func() error { if aiAfter, err := js.AccountInfo(); err != nil { return err } else if aiAfter.Store != aiBefore.Store { return fmt.Errorf("store before %d and after %d don't match", aiBefore.Store, aiAfter.Store) } else { return nil } }) } } func TestJetStreamSuperClusterDoubleStreamMove(t *testing.T) { server := map[string]struct{}{} sc := createJetStreamSuperClusterWithTemplateAndModHook(t, jsClusterTempl, 4, 2, func(serverName, clusterName, storeDir, conf string) string { server[serverName] = struct{}{} return fmt.Sprintf("%s\nserver_tags: [%s]", conf, serverName) }, nil) defer sc.shutdown() // Client based API c := sc.randomCluster() srv := c.randomNonLeader() nc, js := jsClientConnect(t, srv) defer nc.Close() siCreate, err := js.AddStream(&nats.StreamConfig{ Name: "TEST", Subjects: []string{"foo"}, Replicas: 3, }) require_NoError(t, err) srvMoveList := []string{siCreate.Cluster.Leader, siCreate.Cluster.Replicas[0].Name, siCreate.Cluster.Replicas[1].Name} // determine empty server for _, s := range srvMoveList { delete(server, s) } // pick left over server in same cluster as other server for s := range server { // server name is prefixed with cluster name if strings.HasPrefix(s, c.name) { srvMoveList = append(srvMoveList, s) break } } servers := []*Server{ c.serverByName(srvMoveList[0]), c.serverByName(srvMoveList[1]), c.serverByName(srvMoveList[2]), c.serverByName(srvMoveList[3]), // starts out empty } _, err = js.AddConsumer("TEST", &nats.ConsumerConfig{Durable: "DUR", AckPolicy: nats.AckExplicitPolicy}) require_NoError(t, err) ci, err := js.AddConsumer("TEST", &nats.ConsumerConfig{InactiveThreshold: time.Hour, AckPolicy: nats.AckExplicitPolicy}) require_NoError(t, err) ephName := ci.Name toSend := uint64(100) for i := uint64(0); i < toSend; i++ { _, err = js.Publish("foo", nil) require_NoError(t, err) } ncsys, err := nats.Connect(srv.ClientURL(), nats.UserInfo("admin", "s3cr3t!")) require_NoError(t, err) defer ncsys.Close() move := func(fromSrv string, toTags ...string) { sEmpty := c.serverByName(fromSrv) jszBefore, err := sEmpty.Jsz(nil) require_NoError(t, err) require_True(t, jszBefore.Streams == 1) moveReq, err := json.Marshal(&JSApiMetaServerStreamMoveRequest{ Server: fromSrv, Tags: toTags}) require_NoError(t, err) rmsg, err := ncsys.Request(fmt.Sprintf(JSApiServerStreamMoveT, "$G", "TEST"), moveReq, 100*time.Second) require_NoError(t, err) var moveResp JSApiStreamUpdateResponse require_NoError(t, json.Unmarshal(rmsg.Data, &moveResp)) require_True(t, moveResp.Error == nil) } serverEmpty := func(fromSrv string) error { if jszAfter, err := c.serverByName(fromSrv).Jsz(nil); err != nil { return fmt.Errorf("could not fetch JS info for server: %v", err) } else if jszAfter.Streams != 0 { return fmt.Errorf("empty server still has %d streams", jszAfter.Streams) } else if jszAfter.Consumers != 0 { return fmt.Errorf("empty server still has %d consumers", jszAfter.Consumers) } else if jszAfter.Store != 0 { return fmt.Errorf("empty server still has %d storage", jszAfter.Store) } return nil } moveComplete := func(toSrv string, expectedSet ...string) error { eSet := map[string]int{} foundInExpected := false for i, sExpected := range expectedSet { eSet[sExpected] = i s := c.serverByName(sExpected) if !s.JetStreamIsStreamAssigned("$G", "TEST") { return fmt.Errorf("expected stream to be assigned to %s", sExpected) } // test list order invariant js, cc := s.getJetStreamCluster() sExpHash := getHash(sExpected) js.mu.Lock() if sa, ok := cc.streams["$G"]["TEST"]; !ok { js.mu.Unlock() return fmt.Errorf("stream not found in cluster") } else if len(sa.Group.Peers) != 3 { js.mu.Unlock() return fmt.Errorf("peers not reset") } else if sa.Group.Peers[i] != sExpHash { js.mu.Unlock() return fmt.Errorf("stream: expected peer %s on index %d, got %s/%s", sa.Group.Peers[i], i, sExpHash, sExpected) } else if ca, ok := sa.consumers["DUR"]; !ok { js.mu.Unlock() return fmt.Errorf("durable not found in stream") } else { found := false for _, peer := range ca.Group.Peers { if peer == sExpHash { found = true break } } if !found { js.mu.Unlock() return fmt.Errorf("consumer expected peer %s/%s bud didn't find in %+v", sExpHash, sExpected, ca.Group.Peers) } if ephA, ok := sa.consumers[ephName]; ok { if len(ephA.Group.Peers) != 1 { return fmt.Errorf("ephemeral peers not reset") } foundInExpected = foundInExpected || (ephA.Group.Peers[0] == cc.meta.ID()) } } js.mu.Unlock() } if len(expectedSet) > 0 && !foundInExpected { return fmt.Errorf("ephemeral peer not expected") } for _, s := range servers { if jszAfter, err := c.serverByName(toSrv).Jsz(nil); err != nil { return fmt.Errorf("could not fetch JS info for server: %v", err) } else if jszAfter.Messages != toSend { return fmt.Errorf("messages not yet copied, got %d, expected %d", jszAfter.Messages, toSend) } nc, js := jsClientConnect(t, s) defer nc.Close() if si, err := js.StreamInfo("TEST", nats.MaxWait(time.Second)); err != nil { return fmt.Errorf("could not fetch stream info: %v", err) } else if len(si.Cluster.Replicas)+1 != si.Config.Replicas { return fmt.Errorf("not yet downsized replica should be empty has: %d %s", len(si.Cluster.Replicas), si.Cluster.Leader) } else if si.Cluster.Leader == _EMPTY_ { return fmt.Errorf("leader not found") } else if len(expectedSet) > 0 { if _, ok := eSet[si.Cluster.Leader]; !ok { return fmt.Errorf("leader %s not in expected set %+v", si.Cluster.Leader, eSet) } else if _, ok := eSet[si.Cluster.Replicas[0].Name]; !ok { return fmt.Errorf("leader %s not in expected set %+v", si.Cluster.Replicas[0].Name, eSet) } else if _, ok := eSet[si.Cluster.Replicas[1].Name]; !ok { return fmt.Errorf("leader %s not in expected set %+v", si.Cluster.Replicas[1].Name, eSet) } } nc.Close() } return nil } moveAndCheck := func(from, to string, expectedSet ...string) { t.Helper() move(from, to) checkFor(t, 40*time.Second, 100*time.Millisecond, func() error { return moveComplete(to, expectedSet...) }) checkFor(t, 20*time.Second, 100*time.Millisecond, func() error { return serverEmpty(from) }) } checkFor(t, 20*time.Second, 1000*time.Millisecond, func() error { return serverEmpty(srvMoveList[3]) }) // first iteration establishes order of server 0-2 (the internal order in the server could be 1,0,2) moveAndCheck(srvMoveList[0], srvMoveList[3]) moveAndCheck(srvMoveList[1], srvMoveList[0]) moveAndCheck(srvMoveList[2], srvMoveList[1]) moveAndCheck(srvMoveList[3], srvMoveList[2], srvMoveList[0], srvMoveList[1], srvMoveList[2]) // second iteration iterates in order moveAndCheck(srvMoveList[0], srvMoveList[3], srvMoveList[1], srvMoveList[2], srvMoveList[3]) moveAndCheck(srvMoveList[1], srvMoveList[0], srvMoveList[2], srvMoveList[3], srvMoveList[0]) moveAndCheck(srvMoveList[2], srvMoveList[1], srvMoveList[3], srvMoveList[0], srvMoveList[1]) moveAndCheck(srvMoveList[3], srvMoveList[2], srvMoveList[0], srvMoveList[1], srvMoveList[2]) // iterate in the opposite direction and establish order 2-0 moveAndCheck(srvMoveList[2], srvMoveList[3], srvMoveList[0], srvMoveList[1], srvMoveList[3]) moveAndCheck(srvMoveList[1], srvMoveList[2], srvMoveList[0], srvMoveList[3], srvMoveList[2]) moveAndCheck(srvMoveList[0], srvMoveList[1], srvMoveList[3], srvMoveList[2], srvMoveList[1]) moveAndCheck(srvMoveList[3], srvMoveList[0], srvMoveList[2], srvMoveList[1], srvMoveList[0]) // move server in the middle of list moveAndCheck(srvMoveList[1], srvMoveList[3], srvMoveList[2], srvMoveList[0], srvMoveList[3]) moveAndCheck(srvMoveList[0], srvMoveList[1], srvMoveList[2], srvMoveList[3], srvMoveList[1]) moveAndCheck(srvMoveList[3], srvMoveList[0], srvMoveList[2], srvMoveList[1], srvMoveList[0]) // repeatedly use end moveAndCheck(srvMoveList[0], srvMoveList[3], srvMoveList[2], srvMoveList[1], srvMoveList[3]) moveAndCheck(srvMoveList[3], srvMoveList[0], srvMoveList[2], srvMoveList[1], srvMoveList[0]) moveAndCheck(srvMoveList[0], srvMoveList[3], srvMoveList[2], srvMoveList[1], srvMoveList[3]) moveAndCheck(srvMoveList[3], srvMoveList[0], srvMoveList[2], srvMoveList[1], srvMoveList[0]) } func TestJetStreamSuperClusterPeerEvacuationAndStreamReassignment(t *testing.T) { s := createJetStreamSuperClusterWithTemplateAndModHook(t, jsClusterTempl, 4, 2, func(serverName, clusterName, storeDir, conf string) string { return fmt.Sprintf("%s\nserver_tags: [cluster:%s, server:%s]", conf, clusterName, serverName) }, nil) defer s.shutdown() c := s.clusterForName("C1") // Client based API srv := c.randomNonLeader() nc, js := jsClientConnect(t, srv) defer nc.Close() test := func(t *testing.T, r int, moveTags []string, targetCluster string, testMigrateTo bool, listFrom bool) { si, err := js.AddStream(&nats.StreamConfig{ Name: "TEST", Subjects: []string{"foo"}, Replicas: r, }) require_NoError(t, err) defer js.DeleteStream("TEST") startSet := map[string]struct{}{ si.Cluster.Leader: {}, } for _, p := range si.Cluster.Replicas { startSet[p.Name] = struct{}{} } _, err = js.AddConsumer("TEST", &nats.ConsumerConfig{Durable: "DUR", AckPolicy: nats.AckExplicitPolicy}) require_NoError(t, err) sub, err := js.SubscribeSync("foo") require_NoError(t, err) for i := 0; i < 100; i++ { _, err = js.Publish("foo", nil) require_NoError(t, err) } toMoveFrom := si.Cluster.Leader if !listFrom { toMoveFrom = _EMPTY_ } sLdr := c.serverByName(si.Cluster.Leader) jszBefore, err := sLdr.Jsz(nil) require_NoError(t, err) require_True(t, jszBefore.Streams == 1) require_True(t, jszBefore.Consumers >= 1) require_True(t, jszBefore.Store != 0) migrateToServer := _EMPTY_ if testMigrateTo { // find an empty server for _, s := range c.servers { name := s.Name() found := si.Cluster.Leader == name if !found { for _, r := range si.Cluster.Replicas { if r.Name == name { found = true break } } } if !found { migrateToServer = name break } } jszAfter, err := c.serverByName(migrateToServer).Jsz(nil) require_NoError(t, err) require_True(t, jszAfter.Streams == 0) moveTags = append(moveTags, fmt.Sprintf("server:%s", migrateToServer)) } ncsys, err := nats.Connect(srv.ClientURL(), nats.UserInfo("admin", "s3cr3t!")) require_NoError(t, err) defer ncsys.Close() moveReq, err := json.Marshal(&JSApiMetaServerStreamMoveRequest{Server: toMoveFrom, Tags: moveTags}) require_NoError(t, err) rmsg, err := ncsys.Request(fmt.Sprintf(JSApiServerStreamMoveT, "$G", "TEST"), moveReq, 100*time.Second) require_NoError(t, err) var moveResp JSApiStreamUpdateResponse require_NoError(t, json.Unmarshal(rmsg.Data, &moveResp)) require_True(t, moveResp.Error == nil) // test move to particular server if testMigrateTo { toSrv := c.serverByName(migrateToServer) checkFor(t, 20*time.Second, 1000*time.Millisecond, func() error { jszAfter, err := toSrv.Jsz(nil) if err != nil { return fmt.Errorf("could not fetch JS info for server: %v", err) } if jszAfter.Streams != 1 { return fmt.Errorf("server expected to have one stream, has %d", jszAfter.Streams) } return nil }) } // Now wait until the stream is now current. checkFor(t, 50*time.Second, 100*time.Millisecond, func() error { si, err := js.StreamInfo("TEST", nats.MaxWait(time.Second)) if err != nil { return fmt.Errorf("could not fetch stream info: %v", err) } if si.Cluster.Leader == toMoveFrom { return fmt.Errorf("peer not removed yet: %+v", toMoveFrom) } if si.Cluster.Leader == _EMPTY_ { return fmt.Errorf("no leader yet") } if len(si.Cluster.Replicas) != r-1 { return fmt.Errorf("not yet downsized replica should be %d has: %d", r-1, len(si.Cluster.Replicas)) } if si.Config.Replicas != r { return fmt.Errorf("bad replica count %d", si.Config.Replicas) } if si.Cluster.Name != targetCluster { return fmt.Errorf("stream expected in %s but found in %s", si.Cluster.Name, targetCluster) } sNew := s.serverByName(si.Cluster.Leader) if jszNew, err := sNew.Jsz(nil); err != nil { return err } else if jszNew.Streams != 1 { return fmt.Errorf("new leader has %d streams, not one", jszNew.Streams) } else if jszNew.Store != jszBefore.Store { return fmt.Errorf("new leader has %d storage, should have %d", jszNew.Store, jszBefore.Store) } return nil }) // test draining checkFor(t, 20*time.Second, time.Second, func() error { if !listFrom { // when needed determine which server move moved away from si, err := js.StreamInfo("TEST", nats.MaxWait(2*time.Second)) if err != nil { return fmt.Errorf("could not fetch stream info: %v", err) } for n := range startSet { if n != si.Cluster.Leader { var found bool for _, p := range si.Cluster.Replicas { if p.Name == n { found = true break } } if !found { toMoveFrom = n } } } } if toMoveFrom == _EMPTY_ { return fmt.Errorf("server to move away from not found") } sEmpty := c.serverByName(toMoveFrom) jszAfter, err := sEmpty.Jsz(nil) if err != nil { return fmt.Errorf("could not fetch JS info for server: %v", err) } if jszAfter.Streams != 0 { return fmt.Errorf("empty server still has %d streams", jszAfter.Streams) } if jszAfter.Consumers != 0 { return fmt.Errorf("empty server still has %d consumers", jszAfter.Consumers) } if jszAfter.Store != 0 { return fmt.Errorf("empty server still has %d storage", jszAfter.Store) } return nil }) // consume messages from ephemeral consumer for i := 0; i < 100; i++ { _, err := sub.NextMsg(time.Second) require_NoError(t, err) } } for i := 1; i <= 3; i++ { t.Run(fmt.Sprintf("r%d", i), func(t *testing.T) { test(t, i, nil, "C1", false, true) }) t.Run(fmt.Sprintf("r%d-explicit", i), func(t *testing.T) { test(t, i, nil, "C1", true, true) }) t.Run(fmt.Sprintf("r%d-nosrc", i), func(t *testing.T) { test(t, i, nil, "C1", false, false) }) } t.Run("r3-cluster-move", func(t *testing.T) { test(t, 3, []string{"cluster:C2"}, "C2", false, false) }) t.Run("r3-cluster-move-nosrc", func(t *testing.T) { test(t, 3, []string{"cluster:C2"}, "C2", false, true) }) } func TestJetStreamSuperClusterMirrorInheritsAllowDirect(t *testing.T) { sc := createJetStreamTaggedSuperCluster(t) defer sc.shutdown() nc, js := jsClientConnect(t, sc.randomServer()) defer nc.Close() _, err := js.AddStream(&nats.StreamConfig{ Name: "KV", Subjects: []string{"key.*"}, Placement: &nats.Placement{Tags: []string{"cloud:aws", "country:us"}}, MaxMsgsPerSubject: 1, AllowDirect: true, }) require_NoError(t, err) _, err = js.AddStream(&nats.StreamConfig{ Name: "M", Mirror: &nats.StreamSource{Name: "KV"}, Placement: &nats.Placement{Tags: []string{"cloud:gcp", "country:uk"}}, }) require_NoError(t, err) // Do direct grab for now. resp, err := nc.Request(fmt.Sprintf(JSApiStreamInfoT, "M"), nil, time.Second) require_NoError(t, err) var si StreamInfo err = json.Unmarshal(resp.Data, &si) require_NoError(t, err) if !si.Config.MirrorDirect { t.Fatalf("Expected MirrorDirect to be inherited as true") } } func TestJetStreamSuperClusterSystemLimitsPlacement(t *testing.T) { const largeSystemLimit = 1024 const smallSystemLimit = 512 tmpl := ` listen: 127.0.0.1:-1 server_name: %s jetstream: { max_mem_store: _MAXMEM_ max_file_store: _MAXFILE_ store_dir: '%s', } server_tags: [ _TAG_ ] leaf { listen: 127.0.0.1:-1 } cluster { name: %s listen: 127.0.0.1:%d routes = [%s] } accounts { $SYS { users = [ { user: "admin", pass: "s3cr3t!" } ] } } ` storeCnf := func(serverName, clusterName, storeDir, conf string) string { switch { case strings.HasPrefix(serverName, "C1"): conf = strings.Replace(conf, "_MAXMEM_", fmt.Sprint(largeSystemLimit), 1) conf = strings.Replace(conf, "_MAXFILE_", fmt.Sprint(largeSystemLimit), 1) return strings.Replace(conf, "_TAG_", serverName, 1) case strings.HasPrefix(serverName, "C2"): conf = strings.Replace(conf, "_MAXMEM_", fmt.Sprint(smallSystemLimit), 1) conf = strings.Replace(conf, "_MAXFILE_", fmt.Sprint(smallSystemLimit), 1) return strings.Replace(conf, "_TAG_", serverName, 1) default: return conf } } sCluster := createJetStreamSuperClusterWithTemplateAndModHook(t, tmpl, 3, 2, storeCnf, nil) defer sCluster.shutdown() requestLeaderStepDown := func(clientURL string) error { nc, err := nats.Connect(clientURL) if err != nil { return err } defer nc.Close() ncResp, err := nc.Request(JSApiLeaderStepDown, nil, 3*time.Second) if err != nil { return err } var resp JSApiLeaderStepDownResponse if err := json.Unmarshal(ncResp.Data, &resp); err != nil { return err } if resp.Error != nil { return resp.Error } if !resp.Success { return fmt.Errorf("leader step down request not successful") } return nil } // Force large cluster to be leader var largeLeader *Server err := checkForErr(15*time.Second, 500*time.Millisecond, func() error { // Range over cluster A, which is the large cluster. servers := sCluster.clusters[0].servers for _, s := range servers { if s.JetStreamIsLeader() { largeLeader = s return nil } } if err := requestLeaderStepDown(servers[0].ClientURL()); err != nil { return fmt.Errorf("failed to request leader step down: %s", err) } return fmt.Errorf("leader is not in large cluster") }) if err != nil { t.Skipf("failed to get desired layout: %s", err) } getStreams := func(jsm nats.JetStreamManager) []string { var streams []string for s := range jsm.StreamNames() { streams = append(streams, s) } return streams } nc, js := jsClientConnect(t, largeLeader) defer nc.Close() cases := []struct { name string storage nats.StorageType createMaxBytes int64 serverTag string wantErr bool }{ { name: "file create large stream on small cluster b0", storage: nats.FileStorage, createMaxBytes: smallSystemLimit + 1, serverTag: "C2-S1", wantErr: true, }, { name: "memory create large stream on small cluster b0", storage: nats.MemoryStorage, createMaxBytes: smallSystemLimit + 1, serverTag: "C2-S1", wantErr: true, }, { name: "file create large stream on small cluster b1", storage: nats.FileStorage, createMaxBytes: smallSystemLimit + 1, serverTag: "C2-S2", wantErr: true, }, { name: "memory create large stream on small cluster b1", storage: nats.MemoryStorage, createMaxBytes: smallSystemLimit + 1, serverTag: "C2-S2", wantErr: true, }, { name: "file create large stream on small cluster b2", storage: nats.FileStorage, createMaxBytes: smallSystemLimit + 1, serverTag: "C2-S3", wantErr: true, }, { name: "memory create large stream on small cluster b2", storage: nats.MemoryStorage, createMaxBytes: smallSystemLimit + 1, serverTag: "C2-S3", wantErr: true, }, { name: "file create large stream on large cluster a0", storage: nats.FileStorage, createMaxBytes: smallSystemLimit + 1, serverTag: "C1-S1", }, { name: "memory create large stream on large cluster a0", storage: nats.MemoryStorage, createMaxBytes: smallSystemLimit + 1, serverTag: "C1-S1", }, { name: "file create large stream on large cluster a1", storage: nats.FileStorage, createMaxBytes: smallSystemLimit + 1, serverTag: "C1-S2", }, { name: "memory create large stream on large cluster a1", storage: nats.MemoryStorage, createMaxBytes: smallSystemLimit + 1, serverTag: "C1-S2", }, { name: "file create large stream on large cluster a2", storage: nats.FileStorage, createMaxBytes: smallSystemLimit + 1, serverTag: "C1-S3", }, { name: "memory create large stream on large cluster a2", storage: nats.MemoryStorage, createMaxBytes: smallSystemLimit + 1, serverTag: "C1-S3", }, } for i := 0; i < len(cases) && !t.Failed(); i++ { c := cases[i] t.Run(c.name, func(st *testing.T) { var clusterName string if strings.HasPrefix(c.serverTag, "a") { clusterName = "cluster-a" } else if strings.HasPrefix(c.serverTag, "b") { clusterName = "cluster-b" } if s := getStreams(js); len(s) != 0 { st.Fatalf("unexpected stream count, got=%d, want=0", len(s)) } streamName := fmt.Sprintf("TEST-%s", c.serverTag) si, err := js.AddStream(&nats.StreamConfig{ Name: streamName, Subjects: []string{"foo"}, Storage: c.storage, MaxBytes: c.createMaxBytes, Placement: &nats.Placement{ Cluster: clusterName, Tags: []string{c.serverTag}, }, }) if c.wantErr && err == nil { if s := getStreams(js); len(s) != 1 { st.Logf("unexpected stream count, got=%d, want=1, streams=%v", len(s), s) } cfg := si.Config st.Fatalf("unexpected success, maxBytes=%d, cluster=%s, tags=%v", cfg.MaxBytes, cfg.Placement.Cluster, cfg.Placement.Tags) } else if !c.wantErr && err != nil { if s := getStreams(js); len(s) != 0 { st.Logf("unexpected stream count, got=%d, want=0, streams=%v", len(s), s) } require_NoError(st, err) } if err == nil { if s := getStreams(js); len(s) != 1 { st.Fatalf("unexpected stream count, got=%d, want=1", len(s)) } } // Delete regardless. js.DeleteStream(streamName) }) } } func TestJetStreamSuperClusterMixedModeSwitchToInterestOnlyStaticConfig(t *testing.T) { tmpl := ` listen: 127.0.0.1:-1 server_name: %s jetstream: { domain: ngs, max_mem_store: 256MB, max_file_store: 2GB, store_dir: '%s'} leaf: { listen: 127.0.0.1:-1 } cluster { name: %s listen: 127.0.0.1:%d routes = [%s] } accounts { ONE { users = [ { user: "one", pass: "pwd" } ] jetstream: enabled } TWO { users = [ { user: "two", pass: "pwd" } ] } $SYS { users = [ { user: "admin", pass: "s3cr3t!" } ] } } ` sc := createJetStreamSuperClusterWithTemplateAndModHook(t, tmpl, 5, 3, func(serverName, clusterName, storeDir, conf string) string { sname := serverName[strings.Index(serverName, "-")+1:] switch sname { case "S4", "S5": conf = strings.ReplaceAll(conf, "jetstream: { ", "#jetstream: { ") default: conf = strings.ReplaceAll(conf, "leaf: { ", "#leaf: { ") } return conf }, nil) defer sc.shutdown() // Connect our client to a non JS server c := sc.randomCluster() var s *Server for _, as := range c.servers { if !as.JetStreamEnabled() { s = as break } } if s == nil { t.Fatal("Did not find a non JS server!") } nc, js := jsClientConnect(t, s, nats.UserInfo("one", "pwd")) defer nc.Close() // Just create a stream and then make sure that all gateways have switched // to interest-only mode. si, err := js.AddStream(&nats.StreamConfig{Name: "interest", Replicas: 3}) require_NoError(t, err) sc.waitOnStreamLeader("ONE", "interest") check := func(accName string) { t.Helper() for _, c := range sc.clusters { for _, s := range c.servers { // Check only JS servers outbound GW connections if !s.JetStreamEnabled() { continue } opts := s.getOpts() for _, gw := range opts.Gateway.Gateways { if gw.Name == opts.Gateway.Name { continue } checkGWInterestOnlyMode(t, s, gw.Name, accName) } } } } // Starting v2.9.0, all accounts should be switched to interest-only mode check("ONE") check("TWO") var gwsa [16]*client gws := gwsa[:0] s = sc.serverByName(si.Cluster.Leader) // Get the GW outbound connections s.getOutboundGatewayConnections(&gws) for _, gwc := range gws { gwc.mu.Lock() gwc.nc.Close() gwc.mu.Unlock() } waitForOutboundGateways(t, s, 2, 5*time.Second) check("ONE") check("TWO") } func TestJetStreamSuperClusterMixedModeSwitchToInterestOnlyOperatorConfig(t *testing.T) { kp, _ := nkeys.FromSeed(oSeed) skp, _ := nkeys.CreateAccount() spub, _ := skp.PublicKey() nac := jwt.NewAccountClaims(spub) sjwt, err := nac.Encode(kp) require_NoError(t, err) akp, _ := nkeys.CreateAccount() apub, _ := akp.PublicKey() nac = jwt.NewAccountClaims(apub) // Set some limits to enable JS. nac.Limits.JetStreamLimits.DiskStorage = 1024 * 1024 nac.Limits.JetStreamLimits.Streams = 10 ajwt, err := nac.Encode(kp) require_NoError(t, err) ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { if strings.HasSuffix(r.URL.Path, spub) { w.Write([]byte(sjwt)) } else { w.Write([]byte(ajwt)) } })) defer ts.Close() operator := fmt.Sprintf(` operator: %s resolver: URL("%s/ngs/v1/accounts/jwt/") `, ojwt, ts.URL) tmpl := ` listen: 127.0.0.1:-1 server_name: %s jetstream: { domain: ngs, max_mem_store: 256MB, max_file_store: 2GB, store_dir: '%s'} leaf: { listen: 127.0.0.1:-1 } cluster { name: %s listen: 127.0.0.1:%d routes = [%s] } ` + operator sc := createJetStreamSuperClusterWithTemplateAndModHook(t, tmpl, 5, 3, func(serverName, clusterName, storeDir, conf string) string { conf = strings.ReplaceAll(conf, "system_account: \"$SYS\"", fmt.Sprintf("system_account: \"%s\"", spub)) sname := serverName[strings.Index(serverName, "-")+1:] switch sname { case "S4", "S5": conf = strings.ReplaceAll(conf, "jetstream: { ", "#jetstream: { ") default: conf = strings.ReplaceAll(conf, "leaf: { ", "#leaf: { ") } return conf }, nil) defer sc.shutdown() // Connect our client to a non JS server c := sc.randomCluster() var s *Server for _, as := range c.servers { if !as.JetStreamEnabled() { s = as break } } if s == nil { t.Fatal("Did not find a non JS server!") } nc, js := jsClientConnect(t, s, createUserCreds(t, nil, akp)) defer nc.Close() // Just create a stream and then make sure that all gateways have switched // to interest-only mode. si, err := js.AddStream(&nats.StreamConfig{Name: "interest", Replicas: 3}) require_NoError(t, err) sc.waitOnStreamLeader(apub, "interest") check := func(s *Server) { opts := s.getOpts() for _, gw := range opts.Gateway.Gateways { if gw.Name == opts.Gateway.Name { continue } checkGWInterestOnlyMode(t, s, gw.Name, apub) } } s = sc.serverByName(si.Cluster.Leader) check(s) // Let's cause a leadership change and verify that it still works. _, err = nc.Request(fmt.Sprintf(JSApiStreamLeaderStepDownT, "interest"), nil, time.Second) require_NoError(t, err) sc.waitOnStreamLeader(apub, "interest") si, err = js.StreamInfo("interest") require_NoError(t, err) s = sc.serverByName(si.Cluster.Leader) check(s) var gwsa [16]*client gws := gwsa[:0] // Get the GW outbound connections s.getOutboundGatewayConnections(&gws) for _, gwc := range gws { gwc.mu.Lock() gwc.nc.Close() gwc.mu.Unlock() } waitForOutboundGateways(t, s, 2, 5*time.Second) check(s) } type captureGWRewriteLogger struct { DummyLogger ch chan string } func (l *captureGWRewriteLogger) Tracef(format string, args ...interface{}) { msg := fmt.Sprintf(format, args...) if strings.Contains(msg, "$JS.SNAPSHOT.ACK.TEST") && strings.Contains(msg, gwReplyPrefix) { select { case l.ch <- msg: default: } } } func TestJetStreamSuperClusterGWReplyRewrite(t *testing.T) { sc := createJetStreamSuperCluster(t, 3, 2) defer sc.shutdown() nc, js := jsClientConnect(t, sc.serverByName("C1-S1")) defer nc.Close() _, err := js.AddStream(&nats.StreamConfig{ Name: "TEST", Subjects: []string{"foo"}, Replicas: 3, }) require_NoError(t, err) sc.waitOnStreamLeader(globalAccountName, "TEST") for i := 0; i < 10; i++ { sendStreamMsg(t, nc, "foo", "msg") } nc2, _ := jsClientConnect(t, sc.serverByName("C2-S2")) defer nc2.Close() s := sc.clusters[0].streamLeader(globalAccountName, "TEST") var gws []*client s.getOutboundGatewayConnections(&gws) for _, gw := range gws { gw.mu.Lock() gw.trace = true gw.mu.Unlock() } l := &captureGWRewriteLogger{ch: make(chan string, 1)} s.SetLogger(l, false, true) // Send a request through the gateway sreq := &JSApiStreamSnapshotRequest{ DeliverSubject: nats.NewInbox(), ChunkSize: 512, } natsSub(t, nc2, sreq.DeliverSubject, func(m *nats.Msg) { m.Respond(nil) }) natsFlush(t, nc2) req, _ := json.Marshal(sreq) rmsg, err := nc2.Request(fmt.Sprintf(JSApiStreamSnapshotT, "TEST"), req, time.Second) require_NoError(t, err) var resp JSApiStreamSnapshotResponse err = json.Unmarshal(rmsg.Data, &resp) require_NoError(t, err) if resp.Error != nil { t.Fatalf("Did not get correct error response: %+v", resp.Error) } // Now we just want to make sure that the reply has the gateway prefix select { case <-l.ch: case <-time.After(10 * time.Second): } } func TestJetStreamSuperClusterGWOfflineSatus(t *testing.T) { orgEventsHBInterval := eventsHBInterval eventsHBInterval = 500 * time.Millisecond //time.Second defer func() { eventsHBInterval = orgEventsHBInterval }() tmpl := ` listen: 127.0.0.1:-1 server_name: %s jetstream: {max_mem_store: 256MB, max_file_store: 2GB, store_dir: '%s'} gateway { name: "local" listen: 127.0.0.1:-1 } cluster { name: %s listen: 127.0.0.1:%d routes = [%s] } accounts { SYS { users [{user: sys, password: pwd}] } ONE { jetstream: enabled users [{user: one, password: pwd}] } } system_account=SYS ` c := createJetStreamClusterWithTemplate(t, tmpl, "local", 3) defer c.shutdown() var gwURLs string for i, s := range c.servers { if i > 0 { gwURLs += "," } gwURLs += `"nats://` + s.GatewayAddr().String() + `"` } tmpl2 := ` listen: 127.0.0.1:-1 server_name: %s jetstream: {max_mem_store: 256MB, max_file_store: 2GB, store_dir: '%s'} gateway { name: "remote" listen: 127.0.0.1:-1 __remote__ } cluster { name: %s listen: 127.0.0.1:%d routes = [%s] } accounts { SYS { users [{user: sys, password: pwd}] } ONE { jetstream: enabled users [{user: one, password: pwd}] } } system_account=SYS ` c2 := createJetStreamClusterAndModHook(t, tmpl2, "remote", "R", 2, 16022, false, func(serverName, clusterName, storeDir, conf string) string { conf = strings.Replace(conf, "__remote__", fmt.Sprintf("gateways [ { name: 'local', urls: [%s] } ]", gwURLs), 1) return conf }) defer c2.shutdown() for _, s := range c.servers { waitForOutboundGateways(t, s, 1, 2*time.Second) } for _, s := range c2.servers { waitForOutboundGateways(t, s, 1, 2*time.Second) } c.waitOnPeerCount(5) // Simulate going offline without sending shutdown protocol for _, s := range c2.servers { c := s.getOutboundGatewayConnection("local") c.setNoReconnect() c.mu.Lock() c.nc.Close() c.mu.Unlock() } c2.shutdown() checkFor(t, 10*time.Second, 100*time.Millisecond, func() error { var ok int for _, s := range c.servers { jsz, err := s.Jsz(nil) if err != nil { return err } for _, r := range jsz.Meta.Replicas { if r.Name == "RS-1" && r.Offline { ok++ } else if r.Name == "RS-2" && r.Offline { ok++ } } } if ok != 2 { return fmt.Errorf("RS-1 or RS-2 still marked as online") } return nil }) } func TestJetStreamSuperClusterMovingR1Stream(t *testing.T) { // Make C2 have some latency. gwm := gwProxyMap{ "C2": &gwProxy{ rtt: 10 * time.Millisecond, up: 1 * 1024 * 1024 * 1024, // 1gbit down: 1 * 1024 * 1024 * 1024, // 1gbit }, } sc := createJetStreamTaggedSuperClusterWithGWProxy(t, gwm) defer sc.shutdown() nc, js := jsClientConnect(t, sc.clusterForName("C1").randomServer()) defer nc.Close() _, err := js.AddStream(&nats.StreamConfig{ Name: "TEST", }) require_NoError(t, err) toSend := 10_000 for i := 0; i < toSend; i++ { _, err := js.PublishAsync("TEST", []byte("HELLO WORLD")) require_NoError(t, err) } select { case <-js.PublishAsyncComplete(): case <-time.After(5 * time.Second): t.Fatalf("Did not receive completion signal") } // Have it move to GCP. _, err = js.UpdateStream(&nats.StreamConfig{ Name: "TEST", Placement: &nats.Placement{Tags: []string{"cloud:gcp"}}, }) require_NoError(t, err) checkFor(t, 5*time.Second, 100*time.Millisecond, func() error { sc.waitOnStreamLeader(globalAccountName, "TEST") si, err := js.StreamInfo("TEST") if err != nil { return err } if si.Cluster.Name != "C2" { return fmt.Errorf("Wrong cluster: %q", si.Cluster.Name) } if si.Cluster.Leader == _EMPTY_ { return fmt.Errorf("No leader yet") } else if !strings.HasPrefix(si.Cluster.Leader, "C2") { return fmt.Errorf("Wrong leader: %q", si.Cluster.Leader) } // Now we want to see that we shrink back to original. if len(si.Cluster.Replicas) != 0 { return fmt.Errorf("Expected 0 replicas, got %d", len(si.Cluster.Replicas)) } if si.State.Msgs != uint64(toSend) { return fmt.Errorf("Only see %d msgs", si.State.Msgs) } return nil }) }