mirror of
https://github.com/gogrlx/nats-server.git
synced 2026-04-02 03:38:42 -07:00
3961 lines
115 KiB
Go
3961 lines
115 KiB
Go
// Copyright 2020-2022 The NATS Authors
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
//go:build !skip_js_tests && !skip_js_cluster_tests && !skip_js_cluster_tests_2 && !skip_js_super_cluster_tests
|
|
// +build !skip_js_tests,!skip_js_cluster_tests,!skip_js_cluster_tests_2,!skip_js_super_cluster_tests
|
|
|
|
package server
|
|
|
|
import (
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"math/rand"
|
|
"net/http"
|
|
"net/http/httptest"
|
|
"reflect"
|
|
"strings"
|
|
"sync"
|
|
"sync/atomic"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/nats-io/jwt/v2"
|
|
"github.com/nats-io/nats.go"
|
|
"github.com/nats-io/nkeys"
|
|
)
|
|
|
|
func TestJetStreamSuperClusterMetaPlacement(t *testing.T) {
|
|
sc := createJetStreamSuperCluster(t, 3, 3)
|
|
defer sc.shutdown()
|
|
|
|
// We want to influence where the meta leader will place itself when we ask the
|
|
// current leader to stepdown.
|
|
ml := sc.leader()
|
|
cn := ml.ClusterName()
|
|
var pcn string
|
|
for _, c := range sc.clusters {
|
|
if c.name != cn {
|
|
pcn = c.name
|
|
break
|
|
}
|
|
}
|
|
|
|
// Client based API
|
|
s := sc.randomServer()
|
|
nc, err := nats.Connect(s.ClientURL(), nats.UserInfo("admin", "s3cr3t!"))
|
|
if err != nil {
|
|
t.Fatalf("Failed to create system client: %v", err)
|
|
}
|
|
defer nc.Close()
|
|
|
|
stepdown := func(cn string) *JSApiLeaderStepDownResponse {
|
|
req := &JSApiLeaderStepdownRequest{Placement: &Placement{Cluster: cn}}
|
|
jreq, err := json.Marshal(req)
|
|
if err != nil {
|
|
t.Fatalf("Unexpected error: %v", err)
|
|
}
|
|
|
|
resp, err := nc.Request(JSApiLeaderStepDown, jreq, time.Second)
|
|
if err != nil {
|
|
t.Fatalf("Error on stepdown request: %v", err)
|
|
}
|
|
var sdr JSApiLeaderStepDownResponse
|
|
if err := json.Unmarshal(resp.Data, &sdr); err != nil {
|
|
t.Fatalf("Unexpected error: %v", err)
|
|
}
|
|
return &sdr
|
|
}
|
|
|
|
// Make sure we get correct errors for tags and bad or unavailable cluster placement.
|
|
sdr := stepdown("C22")
|
|
if sdr.Error == nil || !strings.Contains(sdr.Error.Description, "no replacement peer connected") {
|
|
t.Fatalf("Got incorrect error result: %+v", sdr.Error)
|
|
}
|
|
// Should work.
|
|
sdr = stepdown(pcn)
|
|
if sdr.Error != nil {
|
|
t.Fatalf("Got an error on stepdown: %+v", sdr.Error)
|
|
}
|
|
|
|
sc.waitOnLeader()
|
|
ml = sc.leader()
|
|
cn = ml.ClusterName()
|
|
|
|
if cn != pcn {
|
|
t.Fatalf("Expected new metaleader to be in cluster %q, got %q", pcn, cn)
|
|
}
|
|
}
|
|
|
|
func TestJetStreamSuperClusterUniquePlacementTag(t *testing.T) {
|
|
tmlp := `
|
|
listen: 127.0.0.1:-1
|
|
server_name: %s
|
|
jetstream: {max_mem_store: 256MB, max_file_store: 2GB, store_dir: '%s', unique_tag: az}
|
|
leaf {listen: 127.0.0.1:-1}
|
|
cluster {
|
|
name: %s
|
|
listen: 127.0.0.1:%d
|
|
routes = [%s]
|
|
}
|
|
# For access to system account.
|
|
accounts { $SYS { users = [ { user: "admin", pass: "s3cr3t!" } ] } }
|
|
`
|
|
s := createJetStreamSuperClusterWithTemplateAndModHook(t, tmlp, 5, 2,
|
|
func(serverName, clustername, storeDir, conf string) string {
|
|
azTag := map[string]string{
|
|
"C1-S1": "az:same",
|
|
"C1-S2": "az:same",
|
|
"C1-S3": "az:same",
|
|
"C1-S4": "az:same",
|
|
"C1-S5": "az:same",
|
|
"C2-S1": "az:1",
|
|
"C2-S2": "az:2",
|
|
"C2-S3": "az:1",
|
|
"C2-S4": "az:2",
|
|
"C2-S5": "az:1",
|
|
}
|
|
return conf + fmt.Sprintf("\nserver_tags: [cloud:%s-tag, %s]\n", clustername, azTag[serverName])
|
|
}, nil)
|
|
defer s.shutdown()
|
|
|
|
inDifferentAz := func(ci *nats.ClusterInfo) (bool, error) {
|
|
t.Helper()
|
|
if len(ci.Replicas) == 0 {
|
|
return true, nil
|
|
}
|
|
// if R2 (has replica, this setup does not support R3), test if the server in a cluster picked the same az,
|
|
// as determined by modulo2 of server number which aligns with az
|
|
dummy := 0
|
|
srvnum1 := 0
|
|
srvnum2 := 0
|
|
if n, _ := fmt.Sscanf(ci.Leader, "C%d-S%d", &dummy, &srvnum1); n != 2 {
|
|
return false, fmt.Errorf("couldn't parse leader")
|
|
}
|
|
if n, _ := fmt.Sscanf(ci.Replicas[0].Name, "C%d-S%d", &dummy, &srvnum2); n != 2 {
|
|
return false, fmt.Errorf("couldn't parse replica")
|
|
}
|
|
return srvnum1%2 != srvnum2%2, nil
|
|
}
|
|
|
|
nc := natsConnect(t, s.randomServer().ClientURL())
|
|
defer nc.Close()
|
|
|
|
js, err := nc.JetStream()
|
|
require_NoError(t, err)
|
|
|
|
for i, test := range []struct {
|
|
placement *nats.Placement
|
|
replicas int
|
|
fail bool
|
|
cluster string
|
|
}{
|
|
// these pass because replica count is 1
|
|
{&nats.Placement{Tags: []string{"az:same"}}, 1, false, "C1"},
|
|
{&nats.Placement{Tags: []string{"cloud:C1-tag", "az:same"}}, 1, false, "C1"},
|
|
{&nats.Placement{Tags: []string{"cloud:C1-tag"}}, 1, false, "C1"},
|
|
// pass because az is set, which disables the filter
|
|
{&nats.Placement{Tags: []string{"az:same"}}, 2, false, "C1"},
|
|
{&nats.Placement{Tags: []string{"cloud:C1-tag", "az:same"}}, 2, false, "C1"},
|
|
// fails because this cluster only has the same az
|
|
{&nats.Placement{Tags: []string{"cloud:C1-tag"}}, 2, true, ""},
|
|
// fails because no 3 unique tags exist
|
|
{&nats.Placement{Tags: []string{"cloud:C2-tag"}}, 3, true, ""},
|
|
{nil, 3, true, ""},
|
|
// pass because replica count is low enough
|
|
{nil, 2, false, "C2"},
|
|
{&nats.Placement{Tags: []string{"cloud:C2-tag"}}, 2, false, "C2"},
|
|
// pass because az is provided
|
|
{&nats.Placement{Tags: []string{"az:1"}}, 3, false, "C2"},
|
|
{&nats.Placement{Tags: []string{"az:2"}}, 2, false, "C2"},
|
|
} {
|
|
name := fmt.Sprintf("test-%d", i)
|
|
t.Run(name, func(t *testing.T) {
|
|
si, err := js.AddStream(&nats.StreamConfig{Name: name, Replicas: test.replicas, Placement: test.placement})
|
|
if test.fail {
|
|
require_Error(t, err)
|
|
require_Contains(t, err.Error(), "no suitable peers for placement", "server tag not unique")
|
|
return
|
|
}
|
|
require_NoError(t, err)
|
|
if test.cluster != _EMPTY_ {
|
|
require_Equal(t, si.Cluster.Name, test.cluster)
|
|
}
|
|
// skip placement test if tags call for a particular az
|
|
if test.placement != nil && len(test.placement.Tags) > 0 {
|
|
for _, tag := range test.placement.Tags {
|
|
if strings.HasPrefix(tag, "az:") {
|
|
return
|
|
}
|
|
}
|
|
}
|
|
diff, err := inDifferentAz(si.Cluster)
|
|
require_NoError(t, err)
|
|
require_True(t, diff)
|
|
})
|
|
}
|
|
|
|
t.Run("scale-up-test", func(t *testing.T) {
|
|
// create enough streams so we hit it eventually
|
|
for i := 0; i < 10; i++ {
|
|
cfg := &nats.StreamConfig{Name: fmt.Sprintf("scale-up-%d", i), Replicas: 1,
|
|
Placement: &nats.Placement{Tags: []string{"cloud:C2-tag"}}}
|
|
si, err := js.AddStream(cfg)
|
|
require_NoError(t, err)
|
|
require_Equal(t, si.Cluster.Name, "C2")
|
|
cfg.Replicas = 2
|
|
si, err = js.UpdateStream(cfg)
|
|
require_NoError(t, err)
|
|
require_Equal(t, si.Cluster.Name, "C2")
|
|
checkFor(t, 10, 250*time.Millisecond, func() error {
|
|
if si, err := js.StreamInfo(cfg.Name); err != nil {
|
|
return err
|
|
} else if diff, err := inDifferentAz(si.Cluster); err != nil {
|
|
return err
|
|
} else if !diff {
|
|
return fmt.Errorf("not in different AZ")
|
|
}
|
|
return nil
|
|
})
|
|
}
|
|
})
|
|
}
|
|
|
|
func TestJetStreamSuperClusterBasics(t *testing.T) {
|
|
sc := createJetStreamSuperCluster(t, 3, 3)
|
|
defer sc.shutdown()
|
|
|
|
// Client based API
|
|
s := sc.randomServer()
|
|
nc, js := jsClientConnect(t, s)
|
|
defer nc.Close()
|
|
|
|
_, err := js.AddStream(&nats.StreamConfig{Name: "TEST", Replicas: 3})
|
|
if err != nil {
|
|
t.Fatalf("Unexpected error: %v", err)
|
|
}
|
|
|
|
// Send in 10 messages.
|
|
msg, toSend := []byte("Hello JS Clustering"), 10
|
|
for i := 0; i < toSend; i++ {
|
|
if _, err = js.Publish("TEST", msg); err != nil {
|
|
t.Fatalf("Unexpected publish error: %v", err)
|
|
}
|
|
}
|
|
// Now grab info for this stream.
|
|
si, err := js.StreamInfo("TEST")
|
|
if err != nil {
|
|
t.Fatalf("Unexpected error: %v", err)
|
|
}
|
|
if si == nil || si.Config.Name != "TEST" {
|
|
t.Fatalf("StreamInfo is not correct %+v", si)
|
|
}
|
|
// Check active state as well, shows that the owner answered.
|
|
if si.State.Msgs != uint64(toSend) {
|
|
t.Fatalf("Expected %d msgs, got bad state: %+v", toSend, si.State)
|
|
}
|
|
// Check request origin placement.
|
|
if si.Cluster.Name != s.ClusterName() {
|
|
t.Fatalf("Expected stream to be placed in %q, but got %q", s.ClusterName(), si.Cluster.Name)
|
|
}
|
|
|
|
// Check consumers.
|
|
sub, err := js.SubscribeSync("TEST")
|
|
if err != nil {
|
|
t.Fatalf("Unexpected error: %v", err)
|
|
}
|
|
checkSubsPending(t, sub, toSend)
|
|
ci, err := sub.ConsumerInfo()
|
|
if err != nil {
|
|
t.Fatalf("Unexpected error: %v", err)
|
|
}
|
|
if ci.Delivered.Consumer != uint64(toSend) || ci.NumAckPending != toSend {
|
|
t.Fatalf("ConsumerInfo is not correct: %+v", ci)
|
|
}
|
|
|
|
// Now check we can place a stream.
|
|
pcn := "C3"
|
|
scResp, err := js.AddStream(&nats.StreamConfig{
|
|
Name: "TEST2",
|
|
Placement: &nats.Placement{Cluster: pcn},
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("Unexpected error: %v", err)
|
|
}
|
|
|
|
if scResp.Cluster.Name != pcn {
|
|
t.Fatalf("Expected the stream to be placed in %q, got %q", pcn, scResp.Cluster.Name)
|
|
}
|
|
}
|
|
|
|
// Test that consumer interest across gateways and superclusters is properly identitifed in a remote cluster.
|
|
func TestJetStreamSuperClusterCrossClusterConsumerInterest(t *testing.T) {
|
|
sc := createJetStreamSuperCluster(t, 3, 3)
|
|
defer sc.shutdown()
|
|
|
|
// Since we need all of the peers accounted for to add the stream wait for all to be present.
|
|
sc.waitOnPeerCount(9)
|
|
|
|
// Client based API - Connect to Cluster C1. Stream and consumer will live in C2.
|
|
s := sc.clusterForName("C1").randomServer()
|
|
nc, js := jsClientConnect(t, s)
|
|
defer nc.Close()
|
|
|
|
pcn := "C2"
|
|
_, err := js.AddStream(&nats.StreamConfig{Name: "foo", Replicas: 3, Placement: &nats.Placement{Cluster: pcn}})
|
|
if err != nil {
|
|
t.Fatalf("Unexpected error: %v", err)
|
|
}
|
|
|
|
// Pull based first.
|
|
sub, err := js.PullSubscribe("foo", "dlc")
|
|
if err != nil {
|
|
t.Fatalf("Unexpected error: %v", err)
|
|
}
|
|
|
|
// Send a message.
|
|
if _, err = js.Publish("foo", []byte("CCI")); err != nil {
|
|
t.Fatalf("Unexpected publish error: %v", err)
|
|
}
|
|
|
|
fetchMsgs(t, sub, 1, 5*time.Second)
|
|
|
|
// Now check push based delivery.
|
|
sub, err = js.SubscribeSync("foo", nats.Durable("rip"))
|
|
if err != nil {
|
|
t.Fatalf("Unexpected error: %v", err)
|
|
}
|
|
checkSubsPending(t, sub, 1)
|
|
|
|
// Send another message.
|
|
if _, err = js.Publish("foo", []byte("CCI")); err != nil {
|
|
t.Fatalf("Unexpected publish error: %v", err)
|
|
}
|
|
checkSubsPending(t, sub, 2)
|
|
}
|
|
|
|
func TestJetStreamSuperClusterPeerReassign(t *testing.T) {
|
|
sc := createJetStreamSuperCluster(t, 3, 3)
|
|
defer sc.shutdown()
|
|
|
|
// Client based API
|
|
s := sc.randomServer()
|
|
nc, js := jsClientConnect(t, s)
|
|
defer nc.Close()
|
|
|
|
pcn := "C2"
|
|
|
|
// Create a stream in C2 that sources TEST
|
|
_, err := js.AddStream(&nats.StreamConfig{
|
|
Name: "TEST",
|
|
Placement: &nats.Placement{Cluster: pcn},
|
|
Replicas: 3,
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("Unexpected error: %v", err)
|
|
}
|
|
|
|
// Send in 10 messages.
|
|
msg, toSend := []byte("Hello JS Clustering"), 10
|
|
for i := 0; i < toSend; i++ {
|
|
if _, err = js.Publish("TEST", msg); err != nil {
|
|
t.Fatalf("Unexpected publish error: %v", err)
|
|
}
|
|
}
|
|
// Now grab info for this stream.
|
|
si, err := js.StreamInfo("TEST")
|
|
if err != nil {
|
|
t.Fatalf("Unexpected error: %v", err)
|
|
}
|
|
if si == nil || si.Config.Name != "TEST" {
|
|
t.Fatalf("StreamInfo is not correct %+v", si)
|
|
}
|
|
// Check active state as well, shows that the owner answered.
|
|
if si.State.Msgs != uint64(toSend) {
|
|
t.Fatalf("Expected %d msgs, got bad state: %+v", toSend, si.State)
|
|
}
|
|
// Check request origin placement.
|
|
if si.Cluster.Name != pcn {
|
|
t.Fatalf("Expected stream to be placed in %q, but got %q", s.ClusterName(), si.Cluster.Name)
|
|
}
|
|
|
|
// Now remove a peer that is assigned to the stream.
|
|
rc := sc.clusterForName(pcn)
|
|
rs := rc.randomNonStreamLeader("$G", "TEST")
|
|
rc.removeJetStream(rs)
|
|
|
|
// Check the stream info is eventually correct.
|
|
checkFor(t, 2*time.Second, 50*time.Millisecond, func() error {
|
|
si, err := js.StreamInfo("TEST")
|
|
if err != nil {
|
|
return fmt.Errorf("Could not fetch stream info: %v", err)
|
|
}
|
|
if len(si.Cluster.Replicas) != 2 {
|
|
return fmt.Errorf("Expected 2 replicas, got %d", len(si.Cluster.Replicas))
|
|
}
|
|
for _, peer := range si.Cluster.Replicas {
|
|
if !peer.Current {
|
|
return fmt.Errorf("Expected replica to be current: %+v", peer)
|
|
}
|
|
if !strings.HasPrefix(peer.Name, pcn) {
|
|
t.Fatalf("Stream peer reassigned to wrong cluster: %q", peer.Name)
|
|
}
|
|
}
|
|
return nil
|
|
})
|
|
}
|
|
|
|
func TestJetStreamSuperClusterInterestOnlyMode(t *testing.T) {
|
|
GatewayDoNotForceInterestOnlyMode(true)
|
|
defer GatewayDoNotForceInterestOnlyMode(false)
|
|
|
|
template := `
|
|
listen: 127.0.0.1:-1
|
|
server_name: %s
|
|
jetstream: {max_mem_store: 256MB, max_file_store: 2GB, store_dir: '%s'}
|
|
accounts {
|
|
one {
|
|
jetstream: enabled
|
|
users [{user: one, password: password}]
|
|
}
|
|
two {
|
|
%s
|
|
users [{user: two, password: password}]
|
|
}
|
|
}
|
|
cluster {
|
|
listen: 127.0.0.1:%d
|
|
name: %s
|
|
routes = ["nats://127.0.0.1:%d"]
|
|
}
|
|
gateway {
|
|
name: %s
|
|
listen: 127.0.0.1:%d
|
|
gateways = [{name: %s, urls: ["nats://127.0.0.1:%d"]}]
|
|
}
|
|
`
|
|
storeDir1 := t.TempDir()
|
|
conf1 := createConfFile(t, []byte(fmt.Sprintf(template,
|
|
"S1", storeDir1, "", 23222, "A", 23222, "A", 11222, "B", 11223)))
|
|
s1, o1 := RunServerWithConfig(conf1)
|
|
defer s1.Shutdown()
|
|
|
|
storeDir2 := t.TempDir()
|
|
conf2 := createConfFile(t, []byte(fmt.Sprintf(template,
|
|
"S2", storeDir2, "", 23223, "B", 23223, "B", 11223, "A", 11222)))
|
|
s2, o2 := RunServerWithConfig(conf2)
|
|
defer s2.Shutdown()
|
|
|
|
waitForInboundGateways(t, s1, 1, 2*time.Second)
|
|
waitForInboundGateways(t, s2, 1, 2*time.Second)
|
|
waitForOutboundGateways(t, s1, 1, 2*time.Second)
|
|
waitForOutboundGateways(t, s2, 1, 2*time.Second)
|
|
|
|
nc1 := natsConnect(t, fmt.Sprintf("nats://two:password@127.0.0.1:%d", o1.Port))
|
|
defer nc1.Close()
|
|
nc1.Publish("foo", []byte("some message"))
|
|
nc1.Flush()
|
|
|
|
nc2 := natsConnect(t, fmt.Sprintf("nats://two:password@127.0.0.1:%d", o2.Port))
|
|
defer nc2.Close()
|
|
nc2.Publish("bar", []byte("some message"))
|
|
nc2.Flush()
|
|
|
|
checkMode := func(accName string, expectedMode GatewayInterestMode) {
|
|
t.Helper()
|
|
checkFor(t, 2*time.Second, 15*time.Millisecond, func() error {
|
|
servers := []*Server{s1, s2}
|
|
for _, s := range servers {
|
|
var gws []*client
|
|
s.getInboundGatewayConnections(&gws)
|
|
for _, gw := range gws {
|
|
var mode GatewayInterestMode
|
|
gw.mu.Lock()
|
|
ie := gw.gw.insim[accName]
|
|
if ie != nil {
|
|
mode = ie.mode
|
|
}
|
|
gw.mu.Unlock()
|
|
if ie == nil {
|
|
return fmt.Errorf("Account %q not in map", accName)
|
|
}
|
|
if mode != expectedMode {
|
|
return fmt.Errorf("Expected account %q mode to be %v, got: %v", accName, expectedMode, mode)
|
|
}
|
|
}
|
|
}
|
|
return nil
|
|
})
|
|
}
|
|
|
|
checkMode("one", InterestOnly)
|
|
checkMode("two", Optimistic)
|
|
|
|
// Now change account "two" to enable JS
|
|
changeCurrentConfigContentWithNewContent(t, conf1, []byte(fmt.Sprintf(template,
|
|
"S1", storeDir1, "jetstream: enabled", 23222, "A", 23222, "A", 11222, "B", 11223)))
|
|
changeCurrentConfigContentWithNewContent(t, conf2, []byte(fmt.Sprintf(template,
|
|
"S2", storeDir2, "jetstream: enabled", 23223, "B", 23223, "B", 11223, "A", 11222)))
|
|
|
|
if err := s1.Reload(); err != nil {
|
|
t.Fatalf("Error on s1 reload: %v", err)
|
|
}
|
|
if err := s2.Reload(); err != nil {
|
|
t.Fatalf("Error on s2 reload: %v", err)
|
|
}
|
|
|
|
checkMode("one", InterestOnly)
|
|
checkMode("two", InterestOnly)
|
|
}
|
|
|
|
func TestJetStreamSuperClusterConnectionCount(t *testing.T) {
|
|
sc := createJetStreamSuperClusterWithTemplate(t, jsClusterAccountsTempl, 3, 2)
|
|
defer sc.shutdown()
|
|
|
|
sysNc := natsConnect(t, sc.randomServer().ClientURL(), nats.UserInfo("admin", "s3cr3t!"))
|
|
defer sysNc.Close()
|
|
_, err := sysNc.Request(fmt.Sprintf(accDirectReqSubj, "ONE", "CONNS"), nil, 100*time.Millisecond)
|
|
// this is a timeout as the server only responds when it has connections....
|
|
// not convinced this should be that way, but also not the issue to investigate.
|
|
require_True(t, err == nats.ErrTimeout)
|
|
|
|
for i := 1; i <= 2; i++ {
|
|
func() {
|
|
nc := natsConnect(t, sc.clusterForName(fmt.Sprintf("C%d", i)).randomServer().ClientURL())
|
|
defer nc.Close()
|
|
js, err := nc.JetStream()
|
|
require_NoError(t, err)
|
|
name := fmt.Sprintf("foo%d", 1)
|
|
_, err = js.AddStream(&nats.StreamConfig{
|
|
Name: name,
|
|
Subjects: []string{name},
|
|
Replicas: 3})
|
|
require_NoError(t, err)
|
|
}()
|
|
}
|
|
func() {
|
|
nc := natsConnect(t, sc.clusterForName("C1").randomServer().ClientURL())
|
|
defer nc.Close()
|
|
js, err := nc.JetStream()
|
|
require_NoError(t, err)
|
|
_, err = js.AddStream(&nats.StreamConfig{
|
|
Name: "src",
|
|
Sources: []*nats.StreamSource{{Name: "foo.1"}, {Name: "foo.2"}},
|
|
Replicas: 3})
|
|
require_NoError(t, err)
|
|
}()
|
|
func() {
|
|
nc := natsConnect(t, sc.clusterForName("C2").randomServer().ClientURL())
|
|
defer nc.Close()
|
|
js, err := nc.JetStream()
|
|
require_NoError(t, err)
|
|
_, err = js.AddStream(&nats.StreamConfig{
|
|
Name: "mir",
|
|
Mirror: &nats.StreamSource{Name: "foo.2"},
|
|
Replicas: 3})
|
|
require_NoError(t, err)
|
|
}()
|
|
|
|
// There should be no active NATS CLIENT connections, but we still need
|
|
// to wait a little bit...
|
|
checkFor(t, 2*time.Second, 15*time.Millisecond, func() error {
|
|
_, err := sysNc.Request(fmt.Sprintf(accDirectReqSubj, "ONE", "CONNS"), nil, 100*time.Millisecond)
|
|
if err != nats.ErrTimeout {
|
|
return fmt.Errorf("Expected timeout, got %v", err)
|
|
}
|
|
return nil
|
|
})
|
|
sysNc.Close()
|
|
|
|
s := sc.randomServer()
|
|
checkFor(t, 5*time.Second, 100*time.Millisecond, func() error {
|
|
acc, err := s.lookupAccount("ONE")
|
|
if err != nil {
|
|
t.Fatalf("Could not look up account: %v", err)
|
|
}
|
|
if n := acc.NumConnections(); n != 0 {
|
|
return fmt.Errorf("Expected no connections, got %d", n)
|
|
}
|
|
return nil
|
|
})
|
|
}
|
|
|
|
func TestJetStreamSuperClusterConsumersBrokenGateways(t *testing.T) {
|
|
sc := createJetStreamSuperCluster(t, 1, 2)
|
|
defer sc.shutdown()
|
|
|
|
// Client based API
|
|
s := sc.clusterForName("C1").randomServer()
|
|
nc, js := jsClientConnect(t, s)
|
|
defer nc.Close()
|
|
|
|
// This will be in C1.
|
|
_, err := js.AddStream(&nats.StreamConfig{Name: "TEST"})
|
|
if err != nil {
|
|
t.Fatalf("Unexpected error: %v", err)
|
|
}
|
|
|
|
// Create a stream in C2 that sources TEST
|
|
_, err = js.AddStream(&nats.StreamConfig{
|
|
Name: "S",
|
|
Placement: &nats.Placement{Cluster: "C2"},
|
|
Sources: []*nats.StreamSource{{Name: "TEST"}},
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("Unexpected error: %v", err)
|
|
}
|
|
|
|
// Wait for direct consumer to get registered and detect interest across GW.
|
|
time.Sleep(time.Second)
|
|
|
|
// Send 100 msgs over 100ms in separate Go routine.
|
|
msg, toSend, done := []byte("Hello"), 100, make(chan bool)
|
|
go func() {
|
|
// Send in 10 messages.
|
|
for i := 0; i < toSend; i++ {
|
|
if _, err = js.Publish("TEST", msg); err != nil {
|
|
t.Errorf("Unexpected publish error: %v", err)
|
|
}
|
|
time.Sleep(500 * time.Microsecond)
|
|
}
|
|
done <- true
|
|
}()
|
|
|
|
breakGW := func() {
|
|
s.gateway.Lock()
|
|
gw := s.gateway.out["C2"]
|
|
s.gateway.Unlock()
|
|
if gw != nil {
|
|
gw.closeConnection(ClientClosed)
|
|
}
|
|
}
|
|
|
|
// Wait til about half way through.
|
|
time.Sleep(20 * time.Millisecond)
|
|
// Now break GW connection.
|
|
breakGW()
|
|
|
|
// Wait for GW to reform.
|
|
for _, c := range sc.clusters {
|
|
for _, s := range c.servers {
|
|
waitForOutboundGateways(t, s, 1, 2*time.Second)
|
|
}
|
|
}
|
|
|
|
select {
|
|
case <-done:
|
|
case <-time.After(2 * time.Second):
|
|
t.Fatalf("Did not complete sending first batch of messages")
|
|
}
|
|
|
|
// Make sure we can deal with data loss at the end.
|
|
checkFor(t, 10*time.Second, 250*time.Millisecond, func() error {
|
|
si, err := js.StreamInfo("S")
|
|
if err != nil {
|
|
t.Fatalf("Unexpected error: %v", err)
|
|
}
|
|
if si.State.Msgs != 100 {
|
|
return fmt.Errorf("Expected to have %d messages, got %d", 100, si.State.Msgs)
|
|
}
|
|
return nil
|
|
})
|
|
|
|
// Now send 100 more. Will aos break here in the middle.
|
|
for i := 0; i < toSend; i++ {
|
|
if _, err = js.Publish("TEST", msg); err != nil {
|
|
t.Fatalf("Unexpected publish error: %v", err)
|
|
}
|
|
if i == 50 {
|
|
breakGW()
|
|
}
|
|
}
|
|
|
|
// Wait for GW to reform.
|
|
for _, c := range sc.clusters {
|
|
for _, s := range c.servers {
|
|
waitForOutboundGateways(t, s, 1, 2*time.Second)
|
|
}
|
|
}
|
|
|
|
si, err := js.StreamInfo("TEST")
|
|
if err != nil {
|
|
t.Fatalf("Unexpected error: %v", err)
|
|
}
|
|
if si.State.Msgs != 200 {
|
|
t.Fatalf("Expected to have %d messages, got %d", 200, si.State.Msgs)
|
|
}
|
|
|
|
checkFor(t, 10*time.Second, 250*time.Millisecond, func() error {
|
|
si, err := js.StreamInfo("S")
|
|
if err != nil {
|
|
return fmt.Errorf("Unexpected error: %v", err)
|
|
}
|
|
if si.State.Msgs != 200 {
|
|
return fmt.Errorf("Expected to have %d messages, got %d", 200, si.State.Msgs)
|
|
}
|
|
return nil
|
|
})
|
|
}
|
|
|
|
func TestJetStreamSuperClusterLeafNodesWithSharedSystemAccountAndSameDomain(t *testing.T) {
|
|
sc := createJetStreamSuperCluster(t, 3, 2)
|
|
defer sc.shutdown()
|
|
|
|
lnc := sc.createLeafNodes("LNC", 2)
|
|
defer lnc.shutdown()
|
|
|
|
// We want to make sure there is only one leader and its always in the supercluster.
|
|
sc.waitOnLeader()
|
|
|
|
if ml := lnc.leader(); ml != nil {
|
|
t.Fatalf("Detected a meta-leader in the leafnode cluster: %s", ml)
|
|
}
|
|
|
|
// leafnodes should have been added into the overall peer count.
|
|
sc.waitOnPeerCount(8)
|
|
|
|
// Check here that we auto detect sharing system account as well and auto place the correct
|
|
// deny imports and exports.
|
|
ls := lnc.randomServer()
|
|
if ls == nil {
|
|
t.Fatalf("Expected a leafnode server, got none")
|
|
}
|
|
gacc := ls.globalAccount().GetName()
|
|
|
|
ls.mu.Lock()
|
|
var hasDE, hasDI bool
|
|
for _, ln := range ls.leafs {
|
|
ln.mu.Lock()
|
|
if ln.leaf.remote.RemoteLeafOpts.LocalAccount == gacc {
|
|
re := ln.perms.pub.deny.Match(jsAllAPI)
|
|
hasDE = len(re.psubs)+len(re.qsubs) > 0
|
|
rs := ln.perms.sub.deny.Match(jsAllAPI)
|
|
hasDI = len(rs.psubs)+len(rs.qsubs) > 0
|
|
}
|
|
ln.mu.Unlock()
|
|
}
|
|
ls.mu.Unlock()
|
|
|
|
if !hasDE {
|
|
t.Fatalf("No deny export on global account")
|
|
}
|
|
if !hasDI {
|
|
t.Fatalf("No deny import on global account")
|
|
}
|
|
|
|
// Make a stream by connecting to the leafnode cluster. Make sure placement is correct.
|
|
// Client based API
|
|
nc, js := jsClientConnect(t, lnc.randomServer())
|
|
defer nc.Close()
|
|
|
|
si, err := js.AddStream(&nats.StreamConfig{
|
|
Name: "TEST",
|
|
Subjects: []string{"foo", "bar"},
|
|
Replicas: 2,
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("Unexpected error: %v", err)
|
|
}
|
|
if si.Cluster.Name != "LNC" {
|
|
t.Fatalf("Expected default placement to be %q, got %q", "LNC", si.Cluster.Name)
|
|
}
|
|
|
|
// Now make sure placement also works if we want to place in a cluster in the supercluster.
|
|
pcn := "C2"
|
|
si, err = js.AddStream(&nats.StreamConfig{
|
|
Name: "TEST2",
|
|
Subjects: []string{"baz"},
|
|
Replicas: 2,
|
|
Placement: &nats.Placement{Cluster: pcn},
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("Unexpected error: %v", err)
|
|
}
|
|
if si.Cluster.Name != pcn {
|
|
t.Fatalf("Expected default placement to be %q, got %q", pcn, si.Cluster.Name)
|
|
}
|
|
}
|
|
|
|
func TestJetStreamSuperClusterLeafNodesWithSharedSystemAccountAndDifferentDomain(t *testing.T) {
|
|
sc := createJetStreamSuperCluster(t, 3, 2)
|
|
defer sc.shutdown()
|
|
|
|
lnc := sc.createLeafNodesWithDomain("LNC", 2, "LEAFDOMAIN")
|
|
defer lnc.shutdown()
|
|
|
|
// We want to make sure there is only one leader and its always in the supercluster.
|
|
sc.waitOnLeader()
|
|
lnc.waitOnLeader()
|
|
|
|
// even though system account is shared, because domains differ,
|
|
sc.waitOnPeerCount(6)
|
|
lnc.waitOnPeerCount(2)
|
|
|
|
// Check here that we auto detect sharing system account as well and auto place the correct
|
|
// deny imports and exports.
|
|
ls := lnc.randomServer()
|
|
if ls == nil {
|
|
t.Fatalf("Expected a leafnode server, got none")
|
|
}
|
|
gacc := ls.globalAccount().GetName()
|
|
|
|
ls.mu.Lock()
|
|
var hasDE, hasDI bool
|
|
for _, ln := range ls.leafs {
|
|
ln.mu.Lock()
|
|
if ln.leaf.remote.RemoteLeafOpts.LocalAccount == gacc {
|
|
re := ln.perms.pub.deny.Match(jsAllAPI)
|
|
hasDE = len(re.psubs)+len(re.qsubs) > 0
|
|
rs := ln.perms.sub.deny.Match(jsAllAPI)
|
|
hasDI = len(rs.psubs)+len(rs.qsubs) > 0
|
|
}
|
|
ln.mu.Unlock()
|
|
}
|
|
ls.mu.Unlock()
|
|
|
|
if !hasDE {
|
|
t.Fatalf("No deny export on global account")
|
|
}
|
|
if !hasDI {
|
|
t.Fatalf("No deny import on global account")
|
|
}
|
|
|
|
// Make a stream by connecting to the leafnode cluster. Make sure placement is correct.
|
|
// Client based API
|
|
nc, js := jsClientConnect(t, lnc.randomServer())
|
|
defer nc.Close()
|
|
|
|
si, err := js.AddStream(&nats.StreamConfig{
|
|
Name: "TEST",
|
|
Subjects: []string{"foo", "bar"},
|
|
Replicas: 2,
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("Unexpected error: %v", err)
|
|
}
|
|
if si.Cluster.Name != "LNC" {
|
|
t.Fatalf("Expected default placement to be %q, got %q", "LNC", si.Cluster.Name)
|
|
}
|
|
|
|
// Now make sure placement does not works for cluster in different domain
|
|
pcn := "C2"
|
|
_, err = js.AddStream(&nats.StreamConfig{
|
|
Name: "TEST2",
|
|
Subjects: []string{"baz"},
|
|
Replicas: 2,
|
|
Placement: &nats.Placement{Cluster: pcn},
|
|
})
|
|
if err == nil || !strings.Contains(err.Error(), "no suitable peers for placement") {
|
|
t.Fatalf("Expected no suitable peers for placement, got: %v", err)
|
|
}
|
|
}
|
|
|
|
func TestJetStreamSuperClusterSingleLeafNodeWithSharedSystemAccount(t *testing.T) {
|
|
sc := createJetStreamSuperCluster(t, 3, 2)
|
|
defer sc.shutdown()
|
|
|
|
ln := sc.createSingleLeafNode(true)
|
|
defer ln.Shutdown()
|
|
|
|
// We want to make sure there is only one leader and its always in the supercluster.
|
|
sc.waitOnLeader()
|
|
|
|
// leafnodes should have been added into the overall peer count.
|
|
sc.waitOnPeerCount(7)
|
|
|
|
// Now make sure we can place a stream in the leaf node.
|
|
// First connect to the leafnode server itself.
|
|
nc, js := jsClientConnect(t, ln)
|
|
defer nc.Close()
|
|
|
|
si, err := js.AddStream(&nats.StreamConfig{
|
|
Name: "TEST1",
|
|
Subjects: []string{"foo"},
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("Unexpected error: %v", err)
|
|
}
|
|
if si.Cluster.Name != "LNS" {
|
|
t.Fatalf("Expected to be placed in leafnode with %q as cluster name, got %q", "LNS", si.Cluster.Name)
|
|
}
|
|
// Now check we can place on here as well but connect to the hub.
|
|
nc, js = jsClientConnect(t, sc.randomServer())
|
|
defer nc.Close()
|
|
|
|
si, err = js.AddStream(&nats.StreamConfig{
|
|
Name: "TEST2",
|
|
Subjects: []string{"bar"},
|
|
Placement: &nats.Placement{Cluster: "LNS"},
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("Unexpected error: %v", err)
|
|
}
|
|
if si.Cluster.Name != "LNS" {
|
|
t.Fatalf("Expected to be placed in leafnode with %q as cluster name, got %q", "LNS", si.Cluster.Name)
|
|
}
|
|
}
|
|
|
|
// Issue reported with superclusters and leafnodes where first few get next requests for pull subscribers
|
|
// have the wrong subject.
|
|
func TestJetStreamSuperClusterGetNextRewrite(t *testing.T) {
|
|
sc := createJetStreamSuperClusterWithTemplate(t, jsClusterAccountsTempl, 2, 2)
|
|
defer sc.shutdown()
|
|
|
|
// Will connect the leafnode to cluster C1. We will then connect the "client" to cluster C2 to cross gateways.
|
|
ln := sc.clusterForName("C1").createSingleLeafNodeNoSystemAccountAndEnablesJetStreamWithDomain("C", "nojs")
|
|
defer ln.Shutdown()
|
|
|
|
c2 := sc.clusterForName("C2")
|
|
nc, js := jsClientConnectEx(t, c2.randomServer(), "C", nats.UserInfo("nojs", "p"))
|
|
defer nc.Close()
|
|
|
|
// Create a stream and add messages.
|
|
if _, err := js.AddStream(&nats.StreamConfig{Name: "foo"}); err != nil {
|
|
t.Fatalf("Unexpected error: %v", err)
|
|
}
|
|
for i := 0; i < 10; i++ {
|
|
if _, err := js.Publish("foo", []byte("ok")); err != nil {
|
|
t.Fatalf("Unexpected publish error: %v", err)
|
|
}
|
|
}
|
|
|
|
// Pull messages and make sure subject rewrite works.
|
|
sub, err := js.PullSubscribe("foo", "dlc")
|
|
if err != nil {
|
|
t.Fatalf("Unexpected error: %v", err)
|
|
}
|
|
|
|
for _, m := range fetchMsgs(t, sub, 5, time.Second) {
|
|
if m.Subject != "foo" {
|
|
t.Fatalf("Expected %q as subject but got %q", "foo", m.Subject)
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestJetStreamSuperClusterEphemeralCleanup(t *testing.T) {
|
|
sc := createJetStreamSuperCluster(t, 3, 2)
|
|
defer sc.shutdown()
|
|
|
|
// Create a stream in cluster 0
|
|
s := sc.clusters[0].randomServer()
|
|
nc, js := jsClientConnect(t, s)
|
|
defer nc.Close()
|
|
|
|
for _, test := range []struct {
|
|
name string
|
|
sourceInCluster int
|
|
streamName string
|
|
sourceName string
|
|
}{
|
|
{"local", 0, "TEST1", "S1"},
|
|
{"remote", 1, "TEST2", "S2"},
|
|
} {
|
|
t.Run(test.name, func(t *testing.T) {
|
|
if _, err := js.AddStream(&nats.StreamConfig{Name: test.streamName, Replicas: 3}); err != nil {
|
|
t.Fatalf("Error adding %q stream: %v", test.streamName, err)
|
|
}
|
|
if _, err := js.Publish(test.streamName, []byte("hello")); err != nil {
|
|
t.Fatalf("Unexpected publish error: %v", err)
|
|
}
|
|
|
|
// Now create a source for that stream, either in same or remote cluster.
|
|
s2 := sc.clusters[test.sourceInCluster].randomServer()
|
|
nc2, js2 := jsClientConnect(t, s2)
|
|
defer nc2.Close()
|
|
|
|
if _, err := js2.AddStream(&nats.StreamConfig{
|
|
Name: test.sourceName,
|
|
Storage: nats.FileStorage,
|
|
Sources: []*nats.StreamSource{{Name: test.streamName}},
|
|
Replicas: 1,
|
|
}); err != nil {
|
|
t.Fatalf("Error adding source stream: %v", err)
|
|
}
|
|
|
|
// Check that TEST(n) has 1 consumer and that S(n) is created and has 1 message.
|
|
checkFor(t, 2*time.Second, 100*time.Millisecond, func() error {
|
|
si, err := js2.StreamInfo(test.sourceName)
|
|
if err != nil {
|
|
return fmt.Errorf("Could not get stream info: %v", err)
|
|
}
|
|
if si.State.Msgs != 1 {
|
|
return fmt.Errorf("Expected 1 msg, got state: %+v", si.State)
|
|
}
|
|
return nil
|
|
})
|
|
|
|
// Get the consumer because we will want to artificially reduce
|
|
// the delete threshold.
|
|
leader := sc.clusters[0].streamLeader("$G", test.streamName)
|
|
mset, err := leader.GlobalAccount().lookupStream(test.streamName)
|
|
if err != nil {
|
|
t.Fatalf("Expected to find a stream for %q, got %v", test.streamName, err)
|
|
}
|
|
cons := mset.getConsumers()[0]
|
|
cons.mu.Lock()
|
|
cons.dthresh = 1250 * time.Millisecond
|
|
active := cons.active
|
|
dtimerSet := cons.dtmr != nil
|
|
deliver := cons.cfg.DeliverSubject
|
|
cons.mu.Unlock()
|
|
|
|
if !active || dtimerSet {
|
|
t.Fatalf("Invalid values for active=%v dtimerSet=%v", active, dtimerSet)
|
|
}
|
|
// To add to the mix, let's create a local interest on the delivery subject
|
|
// and stop it. This is to ensure that this does not stop timers that should
|
|
// still be running and monitor the GW interest.
|
|
sub := natsSubSync(t, nc, deliver)
|
|
natsFlush(t, nc)
|
|
natsUnsub(t, sub)
|
|
natsFlush(t, nc)
|
|
|
|
// Now remove the "S(n)" stream...
|
|
if err := js2.DeleteStream(test.sourceName); err != nil {
|
|
t.Fatalf("Error deleting stream: %v", err)
|
|
}
|
|
|
|
// Now check that the stream S(n) is really removed and that
|
|
// the consumer is gone for stream TEST(n).
|
|
checkFor(t, 5*time.Second, 25*time.Millisecond, func() error {
|
|
// First, make sure that stream S(n) has disappeared.
|
|
if _, err := js2.StreamInfo(test.sourceName); err == nil {
|
|
return fmt.Errorf("Stream %q should no longer exist", test.sourceName)
|
|
}
|
|
if ndc := mset.numDirectConsumers(); ndc != 0 {
|
|
return fmt.Errorf("Expected %q stream to have 0 consumers, got %v", test.streamName, ndc)
|
|
}
|
|
return nil
|
|
})
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestJetStreamSuperClusterGetNextSubRace(t *testing.T) {
|
|
sc := createJetStreamSuperClusterWithTemplate(t, jsClusterAccountsTempl, 2, 2)
|
|
defer sc.shutdown()
|
|
|
|
// Will connect the leafnode to cluster C1. We will then connect the "client" to cluster C2 to cross gateways.
|
|
ln := sc.clusterForName("C1").createSingleLeafNodeNoSystemAccountAndEnablesJetStreamWithDomain("C", "nojs")
|
|
defer ln.Shutdown()
|
|
|
|
// Shutdown 1 of the server from C1, (the one LN is not connected to)
|
|
for _, s := range sc.clusterForName("C1").servers {
|
|
s.mu.Lock()
|
|
if len(s.leafs) == 0 {
|
|
s.mu.Unlock()
|
|
s.Shutdown()
|
|
break
|
|
}
|
|
s.mu.Unlock()
|
|
}
|
|
|
|
// Wait on meta leader in case shutdown of server above caused an election.
|
|
sc.waitOnLeader()
|
|
|
|
var c2Srv *Server
|
|
// Take the server from C2 that has no inbound from C1.
|
|
c2 := sc.clusterForName("C2")
|
|
for _, s := range c2.servers {
|
|
var gwsa [2]*client
|
|
gws := gwsa[:0]
|
|
s.getInboundGatewayConnections(&gws)
|
|
if len(gws) == 0 {
|
|
c2Srv = s
|
|
break
|
|
}
|
|
}
|
|
if c2Srv == nil {
|
|
t.Fatalf("Both servers in C2 had an inbound GW connection!")
|
|
}
|
|
|
|
nc, js := jsClientConnectEx(t, c2Srv, "C", nats.UserInfo("nojs", "p"))
|
|
defer nc.Close()
|
|
|
|
_, err := js.AddStream(&nats.StreamConfig{Name: "foo"})
|
|
require_NoError(t, err)
|
|
|
|
_, err = js.AddConsumer("foo", &nats.ConsumerConfig{Durable: "dur", AckPolicy: nats.AckExplicitPolicy})
|
|
require_NoError(t, err)
|
|
|
|
for i := 0; i < 100; i++ {
|
|
sendStreamMsg(t, nc, "foo", "ok")
|
|
}
|
|
|
|
// Wait for all messages to appear in the consumer
|
|
checkFor(t, 2*time.Second, 50*time.Millisecond, func() error {
|
|
ci, err := js.ConsumerInfo("foo", "dur")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if n := ci.NumPending; n != 100 {
|
|
return fmt.Errorf("Expected 100 msgs, got %v", n)
|
|
}
|
|
return nil
|
|
})
|
|
|
|
req := &JSApiConsumerGetNextRequest{Batch: 1, Expires: 5 * time.Second}
|
|
jreq, err := json.Marshal(req)
|
|
require_NoError(t, err)
|
|
// Create this by hand here to make sure we create the subscription
|
|
// on the reply subject for every single request
|
|
nextSubj := fmt.Sprintf(JSApiRequestNextT, "foo", "dur")
|
|
nextSubj = "$JS.C.API" + strings.TrimPrefix(nextSubj, "$JS.API")
|
|
for i := 0; i < 100; i++ {
|
|
inbox := nats.NewInbox()
|
|
sub := natsSubSync(t, nc, inbox)
|
|
natsPubReq(t, nc, nextSubj, inbox, jreq)
|
|
msg := natsNexMsg(t, sub, time.Second)
|
|
if len(msg.Header) != 0 && string(msg.Data) != "ok" {
|
|
t.Fatalf("Unexpected message: header=%+v data=%s", msg.Header, msg.Data)
|
|
}
|
|
sub.Unsubscribe()
|
|
}
|
|
}
|
|
|
|
func TestJetStreamSuperClusterPullConsumerAndHeaders(t *testing.T) {
|
|
sc := createJetStreamSuperCluster(t, 3, 2)
|
|
defer sc.shutdown()
|
|
|
|
c1 := sc.clusterForName("C1")
|
|
c2 := sc.clusterForName("C2")
|
|
|
|
nc, js := jsClientConnect(t, c1.randomServer())
|
|
defer nc.Close()
|
|
|
|
if _, err := js.AddStream(&nats.StreamConfig{Name: "ORIGIN"}); err != nil {
|
|
t.Fatalf("Unexpected error: %v", err)
|
|
}
|
|
toSend := 50
|
|
for i := 0; i < toSend; i++ {
|
|
if _, err := js.Publish("ORIGIN", []byte("ok")); err != nil {
|
|
t.Fatalf("Unexpected publish error: %v", err)
|
|
}
|
|
}
|
|
|
|
nc2, js2 := jsClientConnect(t, c2.randomServer())
|
|
defer nc2.Close()
|
|
|
|
_, err := js2.AddStream(&nats.StreamConfig{
|
|
Name: "S",
|
|
Sources: []*nats.StreamSource{{Name: "ORIGIN"}},
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("Unexpected error: %v", err)
|
|
}
|
|
// Wait for them to be in the sourced stream.
|
|
checkFor(t, 5*time.Second, 250*time.Millisecond, func() error {
|
|
if si, _ := js2.StreamInfo("S"); si.State.Msgs != uint64(toSend) {
|
|
return fmt.Errorf("Expected %d msgs for %q, got %d", toSend, "S", si.State.Msgs)
|
|
}
|
|
return nil
|
|
})
|
|
|
|
// Now create a pull consumer for the sourced stream.
|
|
_, err = js2.AddConsumer("S", &nats.ConsumerConfig{Durable: "dlc", AckPolicy: nats.AckExplicitPolicy})
|
|
if err != nil {
|
|
t.Fatalf("Unexpected error: %v", err)
|
|
}
|
|
|
|
// Now we will connect and request the next message from each server in C1 cluster and check that headers remain in place.
|
|
for _, s := range c1.servers {
|
|
nc, err := nats.Connect(s.ClientURL())
|
|
if err != nil {
|
|
t.Fatalf("Unexpected error: %v", err)
|
|
}
|
|
defer nc.Close()
|
|
m, err := nc.Request("$JS.API.CONSUMER.MSG.NEXT.S.dlc", nil, 2*time.Second)
|
|
if err != nil {
|
|
t.Fatalf("Unexpected error: %v", err)
|
|
}
|
|
if len(m.Header) != 1 {
|
|
t.Fatalf("Expected 1 header element, got %+v", m.Header)
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestJetStreamSuperClusterStatszActiveServers(t *testing.T) {
|
|
sc := createJetStreamSuperCluster(t, 2, 2)
|
|
defer sc.shutdown()
|
|
|
|
checkActive := func(expected int) {
|
|
t.Helper()
|
|
checkFor(t, 10*time.Second, 500*time.Millisecond, func() error {
|
|
s := sc.randomServer()
|
|
nc, err := nats.Connect(s.ClientURL(), nats.UserInfo("admin", "s3cr3t!"))
|
|
if err != nil {
|
|
t.Fatalf("Failed to create system client: %v", err)
|
|
}
|
|
defer nc.Close()
|
|
|
|
resp, err := nc.Request(serverStatsPingReqSubj, nil, time.Second)
|
|
if err != nil {
|
|
t.Fatalf("Unexpected error: %v", err)
|
|
}
|
|
var ssm ServerStatsMsg
|
|
if err := json.Unmarshal(resp.Data, &ssm); err != nil {
|
|
t.Fatalf("Unexpected error: %v", err)
|
|
}
|
|
if ssm.Stats.ActiveServers != expected {
|
|
return fmt.Errorf("Wanted %d, got %d", expected, ssm.Stats.ActiveServers)
|
|
}
|
|
return nil
|
|
})
|
|
}
|
|
|
|
checkActive(4)
|
|
c := sc.randomCluster()
|
|
ss := c.randomServer()
|
|
ss.Shutdown()
|
|
checkActive(3)
|
|
c.restartServer(ss)
|
|
checkActive(4)
|
|
}
|
|
|
|
func TestJetStreamSuperClusterSourceAndMirrorConsumersLeaderChange(t *testing.T) {
|
|
sc := createJetStreamSuperCluster(t, 3, 2)
|
|
defer sc.shutdown()
|
|
|
|
c1 := sc.clusterForName("C1")
|
|
c2 := sc.clusterForName("C2")
|
|
|
|
nc, js := jsClientConnect(t, c1.randomServer())
|
|
defer nc.Close()
|
|
|
|
var sources []*nats.StreamSource
|
|
numStreams := 10
|
|
|
|
for i := 1; i <= numStreams; i++ {
|
|
name := fmt.Sprintf("O%d", i)
|
|
sources = append(sources, &nats.StreamSource{Name: name})
|
|
if _, err := js.AddStream(&nats.StreamConfig{Name: name}); err != nil {
|
|
t.Fatalf("Unexpected error: %v", err)
|
|
}
|
|
}
|
|
|
|
// Place our new stream that will source all the others in different cluster.
|
|
nc, js = jsClientConnect(t, c2.randomServer())
|
|
defer nc.Close()
|
|
|
|
_, err := js.AddStream(&nats.StreamConfig{
|
|
Name: "S",
|
|
Replicas: 2,
|
|
Sources: sources,
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("Unexpected error: %v", err)
|
|
}
|
|
|
|
// Force leader change twice.
|
|
nc.Request(fmt.Sprintf(JSApiStreamLeaderStepDownT, "S"), nil, time.Second)
|
|
c2.waitOnStreamLeader("$G", "S")
|
|
nc.Request(fmt.Sprintf(JSApiStreamLeaderStepDownT, "S"), nil, time.Second)
|
|
c2.waitOnStreamLeader("$G", "S")
|
|
|
|
// Now make sure we only have a single direct consumer on our origin streams.
|
|
// Pick one at random.
|
|
name := fmt.Sprintf("O%d", rand.Intn(numStreams-1)+1)
|
|
c1.waitOnStreamLeader("$G", name)
|
|
s := c1.streamLeader("$G", name)
|
|
a, err := s.lookupAccount("$G")
|
|
if err != nil {
|
|
t.Fatalf("Unexpected error: %v", err)
|
|
}
|
|
mset, err := a.lookupStream(name)
|
|
if err != nil {
|
|
t.Fatalf("Unexpected error: %v", err)
|
|
}
|
|
|
|
checkFor(t, 10*time.Second, 250*time.Millisecond, func() error {
|
|
if ndc := mset.numDirectConsumers(); ndc != 1 {
|
|
return fmt.Errorf("Stream %q wanted 1 direct consumer, got %d", name, ndc)
|
|
}
|
|
return nil
|
|
})
|
|
|
|
// Now create a mirror of selected from above. Will test same scenario.
|
|
_, err = js.AddStream(&nats.StreamConfig{
|
|
Name: "M",
|
|
Replicas: 2,
|
|
Mirror: &nats.StreamSource{Name: name},
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("Unexpected error: %v", err)
|
|
}
|
|
// Force leader change twice.
|
|
nc.Request(fmt.Sprintf(JSApiStreamLeaderStepDownT, "M"), nil, time.Second)
|
|
c2.waitOnStreamLeader("$G", "M")
|
|
nc.Request(fmt.Sprintf(JSApiStreamLeaderStepDownT, "M"), nil, time.Second)
|
|
c2.waitOnStreamLeader("$G", "M")
|
|
|
|
checkFor(t, 10*time.Second, 250*time.Millisecond, func() error {
|
|
if ndc := mset.numDirectConsumers(); ndc != 2 {
|
|
return fmt.Errorf("Stream %q wanted 2 direct consumers, got %d", name, ndc)
|
|
}
|
|
return nil
|
|
})
|
|
}
|
|
|
|
func TestJetStreamSuperClusterPushConsumerInterest(t *testing.T) {
|
|
sc := createJetStreamSuperCluster(t, 3, 2)
|
|
defer sc.shutdown()
|
|
|
|
for _, test := range []struct {
|
|
name string
|
|
queue string
|
|
}{
|
|
{"non queue", _EMPTY_},
|
|
{"queue", "queue"},
|
|
} {
|
|
t.Run(test.name, func(t *testing.T) {
|
|
testInterest := func(s *Server) {
|
|
t.Helper()
|
|
nc, js := jsClientConnect(t, s)
|
|
defer nc.Close()
|
|
|
|
_, err := js.AddStream(&nats.StreamConfig{
|
|
Name: "TEST",
|
|
Subjects: []string{"foo"},
|
|
Replicas: 3,
|
|
})
|
|
require_NoError(t, err)
|
|
|
|
var sub *nats.Subscription
|
|
if test.queue != _EMPTY_ {
|
|
sub, err = js.QueueSubscribeSync("foo", test.queue)
|
|
} else {
|
|
sub, err = js.SubscribeSync("foo", nats.Durable("dur"))
|
|
}
|
|
require_NoError(t, err)
|
|
|
|
js.Publish("foo", []byte("msg1"))
|
|
// Since the GW watcher is checking every 1sec, make sure we are
|
|
// giving it enough time for the delivery to start.
|
|
_, err = sub.NextMsg(2 * time.Second)
|
|
require_NoError(t, err)
|
|
}
|
|
|
|
// Create the durable push consumer from cluster "0"
|
|
testInterest(sc.clusters[0].servers[0])
|
|
|
|
// Now "move" to a server in cluster "1"
|
|
testInterest(sc.clusters[1].servers[0])
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestJetStreamSuperClusterOverflowPlacement(t *testing.T) {
|
|
sc := createJetStreamSuperClusterWithTemplate(t, jsClusterMaxBytesTempl, 3, 3)
|
|
defer sc.shutdown()
|
|
|
|
pcn := "C2"
|
|
s := sc.clusterForName(pcn).randomServer()
|
|
nc, js := jsClientConnect(t, s)
|
|
defer nc.Close()
|
|
|
|
// With this setup, we opted in for requiring MaxBytes, so this should error.
|
|
_, err := js.AddStream(&nats.StreamConfig{
|
|
Name: "foo",
|
|
Replicas: 3,
|
|
})
|
|
require_Error(t, err, NewJSStreamMaxBytesRequiredError())
|
|
|
|
// R=2 on purpose to leave one server empty.
|
|
_, err = js.AddStream(&nats.StreamConfig{
|
|
Name: "foo",
|
|
Replicas: 2,
|
|
MaxBytes: 2 * 1024 * 1024 * 1024,
|
|
})
|
|
require_NoError(t, err)
|
|
|
|
// Now try to add another that will overflow the current cluster's reservation.
|
|
// Since we asked explicitly for the same cluster this should fail.
|
|
// Note this will not be testing the peer picker since the update has probably not made it to the meta leader.
|
|
_, err = js.AddStream(&nats.StreamConfig{
|
|
Name: "bar",
|
|
Replicas: 3,
|
|
MaxBytes: 2 * 1024 * 1024 * 1024,
|
|
Placement: &nats.Placement{Cluster: pcn},
|
|
})
|
|
require_Contains(t, err.Error(), "nats: no suitable peers for placement")
|
|
// Now test actual overflow placement. So try again with no placement designation.
|
|
// This will test the peer picker's logic since they are updated at this point and the meta leader
|
|
// knows it can not place it in C2.
|
|
si, err := js.AddStream(&nats.StreamConfig{
|
|
Name: "bar",
|
|
Replicas: 3,
|
|
MaxBytes: 2 * 1024 * 1024 * 1024,
|
|
})
|
|
require_NoError(t, err)
|
|
|
|
// Make sure we did not get place into C2.
|
|
falt := si.Cluster.Name
|
|
if falt == pcn {
|
|
t.Fatalf("Expected to be placed in another cluster besides %q, but got %q", pcn, falt)
|
|
}
|
|
|
|
// One more time that should spill over again to our last cluster.
|
|
si, err = js.AddStream(&nats.StreamConfig{
|
|
Name: "baz",
|
|
Replicas: 3,
|
|
MaxBytes: 2 * 1024 * 1024 * 1024,
|
|
})
|
|
require_NoError(t, err)
|
|
|
|
// Make sure we did not get place into C2.
|
|
if salt := si.Cluster.Name; salt == pcn || salt == falt {
|
|
t.Fatalf("Expected to be placed in last cluster besides %q or %q, but got %q", pcn, falt, salt)
|
|
}
|
|
|
|
// Now place a stream of R1 into C2 which should have space.
|
|
si, err = js.AddStream(&nats.StreamConfig{
|
|
Name: "dlc",
|
|
MaxBytes: 2 * 1024 * 1024 * 1024,
|
|
})
|
|
require_NoError(t, err)
|
|
|
|
if si.Cluster.Name != pcn {
|
|
t.Fatalf("Expected to be placed in our origin cluster %q, but got %q", pcn, si.Cluster.Name)
|
|
}
|
|
}
|
|
|
|
func TestJetStreamSuperClusterConcurrentOverflow(t *testing.T) {
|
|
sc := createJetStreamSuperClusterWithTemplate(t, jsClusterMaxBytesTempl, 3, 3)
|
|
defer sc.shutdown()
|
|
|
|
pcn := "C2"
|
|
|
|
startCh := make(chan bool)
|
|
var wg sync.WaitGroup
|
|
var swg sync.WaitGroup
|
|
|
|
start := func(name string) {
|
|
defer wg.Done()
|
|
|
|
s := sc.clusterForName(pcn).randomServer()
|
|
nc, js := jsClientConnect(t, s)
|
|
defer nc.Close()
|
|
|
|
swg.Done()
|
|
<-startCh
|
|
|
|
_, err := js.AddStream(&nats.StreamConfig{
|
|
Name: name,
|
|
Replicas: 3,
|
|
MaxBytes: 2 * 1024 * 1024 * 1024,
|
|
})
|
|
require_NoError(t, err)
|
|
}
|
|
wg.Add(2)
|
|
swg.Add(2)
|
|
go start("foo")
|
|
go start("bar")
|
|
swg.Wait()
|
|
// Now start both at same time.
|
|
close(startCh)
|
|
wg.Wait()
|
|
}
|
|
|
|
func TestJetStreamSuperClusterStreamTagPlacement(t *testing.T) {
|
|
sc := createJetStreamTaggedSuperCluster(t)
|
|
defer sc.shutdown()
|
|
|
|
placeOK := func(connectCluster string, tags []string, expectedCluster string) {
|
|
t.Helper()
|
|
nc, js := jsClientConnect(t, sc.clusterForName(connectCluster).randomServer())
|
|
defer nc.Close()
|
|
si, err := js.AddStream(&nats.StreamConfig{
|
|
Name: "TEST",
|
|
Subjects: []string{"foo"},
|
|
Placement: &nats.Placement{Tags: tags},
|
|
})
|
|
require_NoError(t, err)
|
|
if si.Cluster.Name != expectedCluster {
|
|
t.Fatalf("Failed to place properly in %q, got %q", expectedCluster, si.Cluster.Name)
|
|
}
|
|
js.DeleteStream("TEST")
|
|
}
|
|
|
|
placeOK("C2", []string{"cloud:aws"}, "C1")
|
|
placeOK("C2", []string{"country:jp"}, "C3")
|
|
placeOK("C1", []string{"cloud:gcp", "country:uk"}, "C2")
|
|
|
|
// Case shoud not matter.
|
|
placeOK("C1", []string{"cloud:GCP", "country:UK"}, "C2")
|
|
placeOK("C2", []string{"Cloud:AwS", "Country:uS"}, "C1")
|
|
|
|
placeErr := func(connectCluster string, tags []string) {
|
|
t.Helper()
|
|
nc, js := jsClientConnect(t, sc.clusterForName(connectCluster).randomServer())
|
|
defer nc.Close()
|
|
_, err := js.AddStream(&nats.StreamConfig{
|
|
Name: "TEST",
|
|
Subjects: []string{"foo"},
|
|
Placement: &nats.Placement{Tags: tags},
|
|
})
|
|
require_Contains(t, err.Error(), "no suitable peers for placement", "tags not matched")
|
|
require_Contains(t, err.Error(), tags...)
|
|
}
|
|
|
|
placeErr("C1", []string{"cloud:GCP", "country:US"})
|
|
placeErr("C1", []string{"country:DN"})
|
|
placeErr("C1", []string{"cloud:DO"})
|
|
}
|
|
|
|
func TestJetStreamSuperClusterRemovedPeersAndStreamsListAndDelete(t *testing.T) {
|
|
sc := createJetStreamSuperCluster(t, 3, 3)
|
|
defer sc.shutdown()
|
|
|
|
pcn := "C2"
|
|
sc.waitOnLeader()
|
|
ml := sc.leader()
|
|
if ml.ClusterName() == pcn {
|
|
pcn = "C1"
|
|
}
|
|
|
|
// Client based API
|
|
nc, js := jsClientConnect(t, ml)
|
|
defer nc.Close()
|
|
|
|
_, err := js.AddStream(&nats.StreamConfig{
|
|
Name: "GONE",
|
|
Replicas: 3,
|
|
Placement: &nats.Placement{Cluster: pcn},
|
|
})
|
|
require_NoError(t, err)
|
|
|
|
_, err = js.AddConsumer("GONE", &nats.ConsumerConfig{Durable: "dlc", AckPolicy: nats.AckExplicitPolicy})
|
|
require_NoError(t, err)
|
|
|
|
_, err = js.AddStream(&nats.StreamConfig{
|
|
Name: "TEST",
|
|
Replicas: 3,
|
|
Placement: &nats.Placement{Cluster: ml.ClusterName()},
|
|
})
|
|
require_NoError(t, err)
|
|
|
|
// Put messages in..
|
|
num := 100
|
|
for i := 0; i < num; i++ {
|
|
js.PublishAsync("GONE", []byte("SLS"))
|
|
js.PublishAsync("TEST", []byte("SLS"))
|
|
}
|
|
select {
|
|
case <-js.PublishAsyncComplete():
|
|
case <-time.After(5 * time.Second):
|
|
t.Fatalf("Did not receive completion signal")
|
|
}
|
|
|
|
c := sc.clusterForName(pcn)
|
|
c.shutdown()
|
|
|
|
// Grab Stream List..
|
|
start := time.Now()
|
|
resp, err := nc.Request(JSApiStreamList, nil, 2*time.Second)
|
|
require_NoError(t, err)
|
|
if delta := time.Since(start); delta > 100*time.Millisecond {
|
|
t.Fatalf("Stream list call took too long to return: %v", delta)
|
|
}
|
|
var list JSApiStreamListResponse
|
|
err = json.Unmarshal(resp.Data, &list)
|
|
require_NoError(t, err)
|
|
|
|
if len(list.Missing) != 1 || list.Missing[0] != "GONE" {
|
|
t.Fatalf("Wrong Missing: %+v", list)
|
|
}
|
|
|
|
// Check behavior of stream info as well. We want it to return the stream is offline and not just timeout.
|
|
_, err = js.StreamInfo("GONE")
|
|
// FIXME(dlc) - Go client not putting nats: prefix on for stream but does for consumer.
|
|
require_Error(t, err, NewJSStreamOfflineError(), errors.New("nats: stream is offline"))
|
|
|
|
// Same for Consumer
|
|
start = time.Now()
|
|
resp, err = nc.Request("$JS.API.CONSUMER.LIST.GONE", nil, 2*time.Second)
|
|
require_NoError(t, err)
|
|
if delta := time.Since(start); delta > 100*time.Millisecond {
|
|
t.Fatalf("Consumer list call took too long to return: %v", delta)
|
|
}
|
|
var clist JSApiConsumerListResponse
|
|
err = json.Unmarshal(resp.Data, &clist)
|
|
require_NoError(t, err)
|
|
|
|
if len(clist.Missing) != 1 || clist.Missing[0] != "dlc" {
|
|
t.Fatalf("Wrong Missing: %+v", clist)
|
|
}
|
|
|
|
_, err = js.ConsumerInfo("GONE", "dlc")
|
|
require_Error(t, err, NewJSConsumerOfflineError(), errors.New("nats: consumer is offline"))
|
|
|
|
// Make sure delete works.
|
|
err = js.DeleteConsumer("GONE", "dlc")
|
|
require_NoError(t, err)
|
|
|
|
err = js.DeleteStream("GONE")
|
|
require_NoError(t, err)
|
|
|
|
// Test it is really gone.
|
|
_, err = js.StreamInfo("GONE")
|
|
require_Error(t, err, nats.ErrStreamNotFound)
|
|
}
|
|
|
|
func TestJetStreamSuperClusterConsumerDeliverNewBug(t *testing.T) {
|
|
sc := createJetStreamSuperCluster(t, 3, 3)
|
|
defer sc.shutdown()
|
|
|
|
pcn := "C2"
|
|
sc.waitOnLeader()
|
|
ml := sc.leader()
|
|
if ml.ClusterName() == pcn {
|
|
pcn = "C1"
|
|
}
|
|
|
|
// Client based API
|
|
nc, js := jsClientConnect(t, ml)
|
|
defer nc.Close()
|
|
|
|
_, err := js.AddStream(&nats.StreamConfig{
|
|
Name: "T",
|
|
Replicas: 3,
|
|
Placement: &nats.Placement{Cluster: pcn},
|
|
})
|
|
require_NoError(t, err)
|
|
|
|
// Put messages in..
|
|
num := 100
|
|
for i := 0; i < num; i++ {
|
|
js.PublishAsync("T", []byte("OK"))
|
|
}
|
|
select {
|
|
case <-js.PublishAsyncComplete():
|
|
case <-time.After(5 * time.Second):
|
|
t.Fatalf("Did not receive completion signal")
|
|
}
|
|
|
|
ci, err := js.AddConsumer("T", &nats.ConsumerConfig{
|
|
Durable: "d",
|
|
AckPolicy: nats.AckExplicitPolicy,
|
|
DeliverPolicy: nats.DeliverNewPolicy,
|
|
})
|
|
require_NoError(t, err)
|
|
|
|
if ci.Delivered.Consumer != 0 || ci.Delivered.Stream != 100 {
|
|
t.Fatalf("Incorrect consumer delivered info: %+v", ci.Delivered)
|
|
}
|
|
|
|
c := sc.clusterForName(pcn)
|
|
for _, s := range c.servers {
|
|
sd := s.JetStreamConfig().StoreDir
|
|
s.Shutdown()
|
|
removeDir(t, sd)
|
|
s = c.restartServer(s)
|
|
c.waitOnServerHealthz(s)
|
|
c.waitOnConsumerLeader("$G", "T", "d")
|
|
}
|
|
|
|
c.waitOnConsumerLeader("$G", "T", "d")
|
|
ci, err = js.ConsumerInfo("T", "d")
|
|
require_NoError(t, err)
|
|
|
|
if ci.Delivered.Consumer != 0 || ci.Delivered.Stream != 100 {
|
|
t.Fatalf("Incorrect consumer delivered info: %+v", ci.Delivered)
|
|
}
|
|
if ci.NumPending != 0 {
|
|
t.Fatalf("Did not expect NumPending, got %d", ci.NumPending)
|
|
}
|
|
}
|
|
|
|
// This will test our ability to move streams and consumers between clusters.
|
|
func TestJetStreamSuperClusterMovingStreamsAndConsumers(t *testing.T) {
|
|
sc := createJetStreamTaggedSuperCluster(t)
|
|
defer sc.shutdown()
|
|
|
|
nc, js := jsClientConnect(t, sc.randomServer())
|
|
defer nc.Close()
|
|
|
|
for _, test := range []struct {
|
|
name string
|
|
replicas int
|
|
}{
|
|
{"R1", 1},
|
|
{"R3", 3},
|
|
} {
|
|
t.Run(test.name, func(t *testing.T) {
|
|
replicas := test.replicas
|
|
|
|
si, err := js.AddStream(&nats.StreamConfig{
|
|
Name: "MOVE",
|
|
Replicas: replicas,
|
|
Placement: &nats.Placement{Tags: []string{"cloud:aws"}},
|
|
})
|
|
require_NoError(t, err)
|
|
defer js.DeleteStream("MOVE")
|
|
|
|
if si.Cluster.Name != "C1" {
|
|
t.Fatalf("Failed to place properly in %q, got %q", "C1", si.Cluster.Name)
|
|
}
|
|
|
|
for i := 0; i < 1000; i++ {
|
|
_, err := js.PublishAsync("MOVE", []byte("Moving on up"))
|
|
require_NoError(t, err)
|
|
}
|
|
select {
|
|
case <-js.PublishAsyncComplete():
|
|
case <-time.After(5 * time.Second):
|
|
t.Fatalf("Did not receive completion signal")
|
|
}
|
|
|
|
// Durable Push Consumer, so same R.
|
|
dpushSub, err := js.SubscribeSync("MOVE", nats.Durable("dlc"))
|
|
require_NoError(t, err)
|
|
defer dpushSub.Unsubscribe()
|
|
|
|
// Ephemeral Push Consumer, R1.
|
|
epushSub, err := js.SubscribeSync("MOVE")
|
|
require_NoError(t, err)
|
|
defer epushSub.Unsubscribe()
|
|
|
|
// Durable Pull Consumer, so same R.
|
|
dpullSub, err := js.PullSubscribe("MOVE", "dlc-pull")
|
|
require_NoError(t, err)
|
|
defer dpullSub.Unsubscribe()
|
|
|
|
// TODO(dlc) - Server supports ephemeral pulls but Go client does not yet.
|
|
|
|
si, err = js.StreamInfo("MOVE")
|
|
require_NoError(t, err)
|
|
if si.State.Consumers != 3 {
|
|
t.Fatalf("Expected 3 attached consumers, got %d", si.State.Consumers)
|
|
}
|
|
|
|
initialState := si.State
|
|
|
|
checkSubsPending(t, dpushSub, int(initialState.Msgs))
|
|
checkSubsPending(t, epushSub, int(initialState.Msgs))
|
|
|
|
// Ack 100
|
|
toAck := 100
|
|
for i := 0; i < toAck; i++ {
|
|
m, err := dpushSub.NextMsg(time.Second)
|
|
require_NoError(t, err)
|
|
m.AckSync()
|
|
// Ephemeral
|
|
m, err = epushSub.NextMsg(time.Second)
|
|
require_NoError(t, err)
|
|
m.AckSync()
|
|
}
|
|
|
|
// Do same with pull subscriber.
|
|
for _, m := range fetchMsgs(t, dpullSub, toAck, 5*time.Second) {
|
|
m.AckSync()
|
|
}
|
|
|
|
// First make sure we disallow move and replica changes in same update.
|
|
_, err = js.UpdateStream(&nats.StreamConfig{
|
|
Name: "MOVE",
|
|
Placement: &nats.Placement{Tags: []string{"cloud:gcp"}},
|
|
Replicas: replicas + 1,
|
|
})
|
|
require_Error(t, err, NewJSStreamMoveAndScaleError())
|
|
|
|
// Now move to new cluster.
|
|
si, err = js.UpdateStream(&nats.StreamConfig{
|
|
Name: "MOVE",
|
|
Replicas: replicas,
|
|
Placement: &nats.Placement{Tags: []string{"cloud:gcp"}},
|
|
})
|
|
require_NoError(t, err)
|
|
|
|
if si.Cluster.Name != "C1" {
|
|
t.Fatalf("Expected cluster of %q but got %q", "C1", si.Cluster.Name)
|
|
}
|
|
|
|
// Make sure we can not move an inflight stream and consumers, should error.
|
|
_, err = js.UpdateStream(&nats.StreamConfig{
|
|
Name: "MOVE",
|
|
Replicas: replicas,
|
|
Placement: &nats.Placement{Tags: []string{"cloud:aws"}},
|
|
})
|
|
require_Contains(t, err.Error(), "stream move already in progress")
|
|
|
|
checkFor(t, 10*time.Second, 10*time.Millisecond, func() error {
|
|
si, err := js.StreamInfo("MOVE")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
// We should see 2X peers.
|
|
numPeers := len(si.Cluster.Replicas)
|
|
if si.Cluster.Leader != _EMPTY_ {
|
|
numPeers++
|
|
}
|
|
if numPeers != 2*replicas {
|
|
// The move can happen very quick now, so we might already be done.
|
|
if si.Cluster.Name == "C2" {
|
|
return nil
|
|
}
|
|
return fmt.Errorf("Expected to see %d replicas, got %d", 2*replicas, numPeers)
|
|
}
|
|
return nil
|
|
})
|
|
|
|
// Expect a new leader to emerge and replicas to drop as a leader is elected.
|
|
// We have to check fast or it might complete and we will not see intermediate steps.
|
|
sc.waitOnStreamLeader("$G", "MOVE")
|
|
checkFor(t, 10*time.Second, 10*time.Millisecond, func() error {
|
|
si, err := js.StreamInfo("MOVE")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if len(si.Cluster.Replicas) >= 2*replicas {
|
|
return fmt.Errorf("Expected <%d replicas, got %d", 2*replicas, len(si.Cluster.Replicas))
|
|
}
|
|
return nil
|
|
})
|
|
|
|
// Should see the cluster designation and leader switch to C2.
|
|
// We should also shrink back down to original replica count.
|
|
checkFor(t, 20*time.Second, 100*time.Millisecond, func() error {
|
|
si, err := js.StreamInfo("MOVE")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if si.Cluster.Name != "C2" {
|
|
return fmt.Errorf("Wrong cluster: %q", si.Cluster.Name)
|
|
}
|
|
if si.Cluster.Leader == _EMPTY_ {
|
|
return fmt.Errorf("No leader yet")
|
|
} else if !strings.HasPrefix(si.Cluster.Leader, "C2-") {
|
|
return fmt.Errorf("Wrong leader: %q", si.Cluster.Leader)
|
|
}
|
|
// Now we want to see that we shrink back to original.
|
|
if len(si.Cluster.Replicas) != replicas-1 {
|
|
return fmt.Errorf("Expected %d replicas, got %d", replicas-1, len(si.Cluster.Replicas))
|
|
}
|
|
return nil
|
|
})
|
|
|
|
// Check moved state is same as initial state.
|
|
si, err = js.StreamInfo("MOVE")
|
|
require_NoError(t, err)
|
|
|
|
if !reflect.DeepEqual(si.State, initialState) {
|
|
t.Fatalf("States do not match after migration:\n%+v\nvs\n%+v", si.State, initialState)
|
|
}
|
|
|
|
// Make sure we can still send messages.
|
|
addN := toAck
|
|
for i := 0; i < addN; i++ {
|
|
_, err := js.Publish("MOVE", []byte("Done Moved"))
|
|
require_NoError(t, err)
|
|
}
|
|
|
|
si, err = js.StreamInfo("MOVE")
|
|
require_NoError(t, err)
|
|
|
|
expectedPushMsgs := initialState.Msgs + uint64(addN)
|
|
expectedPullMsgs := uint64(addN)
|
|
|
|
if si.State.Msgs != expectedPushMsgs {
|
|
t.Fatalf("Expected to be able to send new messages")
|
|
}
|
|
|
|
// Now check consumers, make sure the state is correct and that they transferred state and reflect the new messages.
|
|
// We Ack'd 100 and sent another 100, so should be same.
|
|
checkConsumer := func(sub *nats.Subscription, isPull bool) {
|
|
t.Helper()
|
|
checkFor(t, 10*time.Second, 100*time.Millisecond, func() error {
|
|
ci, err := sub.ConsumerInfo()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
var expectedMsgs uint64
|
|
if isPull {
|
|
expectedMsgs = expectedPullMsgs
|
|
} else {
|
|
expectedMsgs = expectedPushMsgs
|
|
}
|
|
|
|
if ci.Delivered.Consumer != expectedMsgs || ci.Delivered.Stream != expectedMsgs {
|
|
return fmt.Errorf("Delivered for %q is not correct: %+v", ci.Name, ci.Delivered)
|
|
}
|
|
if ci.AckFloor.Consumer != uint64(toAck) || ci.AckFloor.Stream != uint64(toAck) {
|
|
return fmt.Errorf("AckFloor for %q is not correct: %+v", ci.Name, ci.AckFloor)
|
|
}
|
|
if isPull && ci.NumAckPending != 0 {
|
|
return fmt.Errorf("NumAckPending for %q is not correct: %v", ci.Name, ci.NumAckPending)
|
|
} else if !isPull && ci.NumAckPending != int(initialState.Msgs) {
|
|
return fmt.Errorf("NumAckPending for %q is not correct: %v", ci.Name, ci.NumAckPending)
|
|
}
|
|
// Make sure the replicas etc are back to what is expected.
|
|
si, err := js.StreamInfo("MOVE")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
numExpected := si.Config.Replicas
|
|
if ci.Config.Durable == _EMPTY_ {
|
|
numExpected = 1
|
|
}
|
|
numPeers := len(ci.Cluster.Replicas)
|
|
if ci.Cluster.Leader != _EMPTY_ {
|
|
numPeers++
|
|
}
|
|
if numPeers != numExpected {
|
|
return fmt.Errorf("Expected %d peers, got %d", numExpected, numPeers)
|
|
}
|
|
// If we are push check sub pending.
|
|
if !isPull {
|
|
checkSubsPending(t, sub, int(expectedPushMsgs)-toAck)
|
|
}
|
|
return nil
|
|
})
|
|
}
|
|
|
|
checkPushConsumer := func(sub *nats.Subscription) {
|
|
t.Helper()
|
|
checkConsumer(sub, false)
|
|
}
|
|
checkPullConsumer := func(sub *nats.Subscription) {
|
|
t.Helper()
|
|
checkConsumer(sub, true)
|
|
}
|
|
|
|
checkPushConsumer(dpushSub)
|
|
checkPushConsumer(epushSub)
|
|
checkPullConsumer(dpullSub)
|
|
|
|
// Cleanup
|
|
err = js.DeleteStream("MOVE")
|
|
require_NoError(t, err)
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestJetStreamSuperClusterMovingStreamsWithMirror(t *testing.T) {
|
|
sc := createJetStreamTaggedSuperCluster(t)
|
|
defer sc.shutdown()
|
|
|
|
nc, js := jsClientConnect(t, sc.randomServer())
|
|
defer nc.Close()
|
|
|
|
_, err := js.AddStream(&nats.StreamConfig{
|
|
Name: "SOURCE",
|
|
Subjects: []string{"foo", "bar"},
|
|
Replicas: 3,
|
|
Placement: &nats.Placement{Tags: []string{"cloud:aws"}},
|
|
})
|
|
require_NoError(t, err)
|
|
|
|
_, err = js.AddStream(&nats.StreamConfig{
|
|
Name: "MIRROR",
|
|
Replicas: 1,
|
|
Mirror: &nats.StreamSource{Name: "SOURCE"},
|
|
Placement: &nats.Placement{Tags: []string{"cloud:gcp"}},
|
|
})
|
|
require_NoError(t, err)
|
|
|
|
done := make(chan struct{})
|
|
exited := make(chan struct{})
|
|
errors := make(chan error, 1)
|
|
|
|
numNoResp := uint64(0)
|
|
|
|
// We will run a separate routine and send at 100hz
|
|
go func() {
|
|
nc, js := jsClientConnect(t, sc.randomServer())
|
|
defer nc.Close()
|
|
|
|
defer close(exited)
|
|
|
|
for {
|
|
select {
|
|
case <-done:
|
|
return
|
|
case <-time.After(10 * time.Millisecond):
|
|
_, err := js.Publish("foo", []byte("100HZ"))
|
|
if err == nil {
|
|
} else if err == nats.ErrNoStreamResponse {
|
|
atomic.AddUint64(&numNoResp, 1)
|
|
continue
|
|
}
|
|
if err != nil {
|
|
errors <- err
|
|
return
|
|
}
|
|
}
|
|
}
|
|
}()
|
|
|
|
// Let it get going.
|
|
time.Sleep(500 * time.Millisecond)
|
|
|
|
// Now move the source to a new cluster.
|
|
_, err = js.UpdateStream(&nats.StreamConfig{
|
|
Name: "SOURCE",
|
|
Subjects: []string{"foo", "bar"},
|
|
Replicas: 3,
|
|
Placement: &nats.Placement{Tags: []string{"cloud:gcp"}},
|
|
})
|
|
require_NoError(t, err)
|
|
|
|
checkFor(t, 30*time.Second, 100*time.Millisecond, func() error {
|
|
si, err := js.StreamInfo("SOURCE")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if si.Cluster.Name != "C2" {
|
|
return fmt.Errorf("Wrong cluster: %q", si.Cluster.Name)
|
|
}
|
|
if si.Cluster.Leader == _EMPTY_ {
|
|
return fmt.Errorf("No leader yet")
|
|
} else if !strings.HasPrefix(si.Cluster.Leader, "C2-") {
|
|
return fmt.Errorf("Wrong leader: %q", si.Cluster.Leader)
|
|
}
|
|
// Now we want to see that we shrink back to original.
|
|
if len(si.Cluster.Replicas) != 2 {
|
|
return fmt.Errorf("Expected %d replicas, got %d", 2, len(si.Cluster.Replicas))
|
|
}
|
|
// Let's get to 50+ msgs.
|
|
if si.State.Msgs < 50 {
|
|
return fmt.Errorf("Only see %d msgs", si.State.Msgs)
|
|
}
|
|
return nil
|
|
})
|
|
|
|
close(done)
|
|
<-exited
|
|
|
|
if nnr := atomic.LoadUint64(&numNoResp); nnr > 0 {
|
|
if nnr > 5 {
|
|
t.Fatalf("Expected no or very few failed message publishes, got %d", nnr)
|
|
} else {
|
|
t.Logf("Got a few failed publishes: %d", nnr)
|
|
}
|
|
}
|
|
|
|
checkFor(t, 20*time.Second, 100*time.Millisecond, func() error {
|
|
si, err := js.StreamInfo("SOURCE")
|
|
require_NoError(t, err)
|
|
mi, err := js.StreamInfo("MIRROR")
|
|
require_NoError(t, err)
|
|
|
|
if !reflect.DeepEqual(si.State, mi.State) {
|
|
return fmt.Errorf("Expected mirror to be the same, got %+v vs %+v", mi.State, si.State)
|
|
}
|
|
return nil
|
|
})
|
|
|
|
}
|
|
|
|
func TestJetStreamSuperClusterMovingStreamAndMoveBack(t *testing.T) {
|
|
sc := createJetStreamTaggedSuperCluster(t)
|
|
defer sc.shutdown()
|
|
|
|
nc, js := jsClientConnect(t, sc.randomServer())
|
|
defer nc.Close()
|
|
|
|
for _, test := range []struct {
|
|
name string
|
|
replicas int
|
|
}{
|
|
{"R1", 1},
|
|
{"R3", 3},
|
|
} {
|
|
t.Run(test.name, func(t *testing.T) {
|
|
js.DeleteStream("TEST")
|
|
|
|
_, err := js.AddStream(&nats.StreamConfig{
|
|
Name: "TEST",
|
|
Replicas: test.replicas,
|
|
Placement: &nats.Placement{Tags: []string{"cloud:aws"}},
|
|
})
|
|
require_NoError(t, err)
|
|
|
|
toSend := 10_000
|
|
for i := 0; i < toSend; i++ {
|
|
_, err := js.PublishAsync("TEST", []byte("HELLO WORLD"))
|
|
require_NoError(t, err)
|
|
}
|
|
select {
|
|
case <-js.PublishAsyncComplete():
|
|
case <-time.After(5 * time.Second):
|
|
t.Fatalf("Did not receive completion signal")
|
|
}
|
|
|
|
_, err = js.UpdateStream(&nats.StreamConfig{
|
|
Name: "TEST",
|
|
Replicas: test.replicas,
|
|
Placement: &nats.Placement{Tags: []string{"cloud:gcp"}},
|
|
})
|
|
require_NoError(t, err)
|
|
|
|
checkMove := func(cluster string) {
|
|
t.Helper()
|
|
sc.waitOnStreamLeader("$G", "TEST")
|
|
checkFor(t, 20*time.Second, 100*time.Millisecond, func() error {
|
|
si, err := js.StreamInfo("TEST")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if si.Cluster.Name != cluster {
|
|
return fmt.Errorf("Wrong cluster: %q", si.Cluster.Name)
|
|
}
|
|
if si.Cluster.Leader == _EMPTY_ {
|
|
return fmt.Errorf("No leader yet")
|
|
} else if !strings.HasPrefix(si.Cluster.Leader, cluster) {
|
|
return fmt.Errorf("Wrong leader: %q", si.Cluster.Leader)
|
|
}
|
|
// Now we want to see that we shrink back to original.
|
|
if len(si.Cluster.Replicas) != test.replicas-1 {
|
|
return fmt.Errorf("Expected %d replicas, got %d", test.replicas-1, len(si.Cluster.Replicas))
|
|
}
|
|
if si.State.Msgs != uint64(toSend) {
|
|
return fmt.Errorf("Only see %d msgs", si.State.Msgs)
|
|
}
|
|
return nil
|
|
})
|
|
}
|
|
|
|
checkMove("C2")
|
|
|
|
_, err = js.UpdateStream(&nats.StreamConfig{
|
|
Name: "TEST",
|
|
Replicas: test.replicas,
|
|
Placement: &nats.Placement{Tags: []string{"cloud:aws"}},
|
|
})
|
|
require_NoError(t, err)
|
|
|
|
checkMove("C1")
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestJetStreamSuperClusterImportConsumerStreamSubjectRemap(t *testing.T) {
|
|
template := `
|
|
listen: 127.0.0.1:-1
|
|
server_name: %s
|
|
jetstream: {max_mem_store: 256MB, max_file_store: 2GB, domain: HUB, store_dir: '%s'}
|
|
|
|
cluster {
|
|
name: %s
|
|
listen: 127.0.0.1:%d
|
|
routes = [%s]
|
|
}
|
|
|
|
accounts: {
|
|
JS: {
|
|
jetstream: enabled
|
|
users: [ {user: js, password: pwd} ]
|
|
exports [
|
|
# This is streaming to a delivery subject for a push based consumer.
|
|
{ stream: "deliver.ORDERS.*" }
|
|
# This is to ack received messages. This is a service to support sync ack.
|
|
{ service: "$JS.ACK.ORDERS.*.>" }
|
|
# To support ordered consumers, flow control.
|
|
{ service: "$JS.FC.>" }
|
|
]
|
|
},
|
|
IM: {
|
|
users: [ {user: im, password: pwd} ]
|
|
imports [
|
|
{ stream: { account: JS, subject: "deliver.ORDERS.route" }}
|
|
{ stream: { account: JS, subject: "deliver.ORDERS.gateway" }}
|
|
{ stream: { account: JS, subject: "deliver.ORDERS.leaf1" }}
|
|
{ stream: { account: JS, subject: "deliver.ORDERS.leaf2" }}
|
|
{ service: {account: JS, subject: "$JS.FC.>" }}
|
|
]
|
|
},
|
|
$SYS { users = [ { user: "admin", pass: "s3cr3t!" } ] },
|
|
}
|
|
leaf {
|
|
listen: 127.0.0.1:-1
|
|
}`
|
|
|
|
test := func(t *testing.T, queue bool) {
|
|
c := createJetStreamSuperClusterWithTemplate(t, template, 3, 2)
|
|
defer c.shutdown()
|
|
|
|
s := c.randomServer()
|
|
nc, js := jsClientConnect(t, s, nats.UserInfo("js", "pwd"))
|
|
defer nc.Close()
|
|
|
|
_, err := js.AddStream(&nats.StreamConfig{
|
|
Name: "ORDERS",
|
|
Subjects: []string{"foo"}, // The JS subject.
|
|
Replicas: 3,
|
|
Placement: &nats.Placement{Cluster: "C1"},
|
|
})
|
|
require_NoError(t, err)
|
|
|
|
_, err = js.Publish("foo", []byte("OK"))
|
|
require_NoError(t, err)
|
|
|
|
for dur, deliver := range map[string]string{
|
|
"dur-route": "deliver.ORDERS.route",
|
|
"dur-gateway": "deliver.ORDERS.gateway",
|
|
"dur-leaf-1": "deliver.ORDERS.leaf1",
|
|
"dur-leaf-2": "deliver.ORDERS.leaf2",
|
|
} {
|
|
cfg := &nats.ConsumerConfig{
|
|
Durable: dur,
|
|
DeliverSubject: deliver,
|
|
AckPolicy: nats.AckExplicitPolicy,
|
|
}
|
|
if queue {
|
|
cfg.DeliverGroup = "queue"
|
|
}
|
|
_, err = js.AddConsumer("ORDERS", cfg)
|
|
require_NoError(t, err)
|
|
}
|
|
|
|
testCase := func(t *testing.T, s *Server, dSubj string) {
|
|
nc2, err := nats.Connect(s.ClientURL(), nats.UserInfo("im", "pwd"))
|
|
require_NoError(t, err)
|
|
defer nc2.Close()
|
|
|
|
var sub *nats.Subscription
|
|
if queue {
|
|
sub, err = nc2.QueueSubscribeSync(dSubj, "queue")
|
|
} else {
|
|
sub, err = nc2.SubscribeSync(dSubj)
|
|
}
|
|
require_NoError(t, err)
|
|
|
|
m, err := sub.NextMsg(time.Second)
|
|
require_NoError(t, err)
|
|
|
|
if m.Subject != "foo" {
|
|
t.Fatalf("Subject not mapped correctly across account boundary, expected %q got %q", "foo", m.Subject)
|
|
}
|
|
require_False(t, strings.Contains(m.Reply, "@"))
|
|
}
|
|
|
|
t.Run("route", func(t *testing.T) {
|
|
// pick random non consumer leader so we receive via route
|
|
s := c.clusterForName("C1").randomNonConsumerLeader("JS", "ORDERS", "dur-route")
|
|
testCase(t, s, "deliver.ORDERS.route")
|
|
})
|
|
t.Run("gateway", func(t *testing.T) {
|
|
// pick server with inbound gateway from consumer leader, so we receive from gateway and have no route in between
|
|
scl := c.clusterForName("C1").consumerLeader("JS", "ORDERS", "dur-gateway")
|
|
var sfound *Server
|
|
for _, s := range c.clusterForName("C2").servers {
|
|
s.mu.Lock()
|
|
for _, c := range s.gateway.in {
|
|
if c.GetName() == scl.info.ID {
|
|
sfound = s
|
|
break
|
|
}
|
|
}
|
|
s.mu.Unlock()
|
|
if sfound != nil {
|
|
break
|
|
}
|
|
}
|
|
testCase(t, sfound, "deliver.ORDERS.gateway")
|
|
})
|
|
t.Run("leaf-post-export", func(t *testing.T) {
|
|
// create leaf node server connected post export/import
|
|
scl := c.clusterForName("C1").consumerLeader("JS", "ORDERS", "dur-leaf-1")
|
|
cf := createConfFile(t, []byte(fmt.Sprintf(`
|
|
port: -1
|
|
leafnodes {
|
|
remotes [ { url: "nats://im:pwd@127.0.0.1:%d" } ]
|
|
}
|
|
authorization: {
|
|
user: im,
|
|
password: pwd
|
|
}
|
|
`, scl.getOpts().LeafNode.Port)))
|
|
s, _ := RunServerWithConfig(cf)
|
|
defer s.Shutdown()
|
|
checkLeafNodeConnected(t, scl)
|
|
testCase(t, s, "deliver.ORDERS.leaf1")
|
|
})
|
|
t.Run("leaf-pre-export", func(t *testing.T) {
|
|
// create leaf node server connected pre export, perform export/import on leaf node server
|
|
scl := c.clusterForName("C1").consumerLeader("JS", "ORDERS", "dur-leaf-2")
|
|
cf := createConfFile(t, []byte(fmt.Sprintf(`
|
|
port: -1
|
|
leafnodes {
|
|
remotes [ { url: "nats://js:pwd@127.0.0.1:%d", account: JS2 } ]
|
|
}
|
|
accounts: {
|
|
JS2: {
|
|
users: [ {user: js, password: pwd} ]
|
|
exports [
|
|
# This is streaming to a delivery subject for a push based consumer.
|
|
{ stream: "deliver.ORDERS.leaf2" }
|
|
# This is to ack received messages. This is a service to support sync ack.
|
|
{ service: "$JS.ACK.ORDERS.*.>" }
|
|
# To support ordered consumers, flow control.
|
|
{ service: "$JS.FC.>" }
|
|
]
|
|
},
|
|
IM2: {
|
|
users: [ {user: im, password: pwd} ]
|
|
imports [
|
|
{ stream: { account: JS2, subject: "deliver.ORDERS.leaf2" }}
|
|
{ service: {account: JS2, subject: "$JS.FC.>" }}
|
|
]
|
|
},
|
|
}
|
|
`, scl.getOpts().LeafNode.Port)))
|
|
s, _ := RunServerWithConfig(cf)
|
|
defer s.Shutdown()
|
|
checkLeafNodeConnected(t, scl)
|
|
testCase(t, s, "deliver.ORDERS.leaf2")
|
|
})
|
|
}
|
|
|
|
t.Run("noQueue", func(t *testing.T) {
|
|
test(t, false)
|
|
})
|
|
t.Run("queue", func(t *testing.T) {
|
|
test(t, true)
|
|
})
|
|
}
|
|
|
|
func TestJetStreamSuperClusterMaxHaAssets(t *testing.T) {
|
|
sc := createJetStreamSuperClusterWithTemplateAndModHook(t, `
|
|
listen: 127.0.0.1:-1
|
|
server_name: %s
|
|
jetstream: {max_mem_store: 256MB, max_file_store: 2GB, store_dir: '%s', limits: {max_ha_assets: 2}}
|
|
cluster {
|
|
name: %s
|
|
listen: 127.0.0.1:%d
|
|
routes = [%s]
|
|
}
|
|
accounts { $SYS { users = [ { user: "admin", pass: "s3cr3t!" } ] } }
|
|
`, 3, 2,
|
|
func(serverName, clusterName, storeDir, conf string) string {
|
|
return conf
|
|
}, nil)
|
|
defer sc.shutdown()
|
|
|
|
// speed up statsz reporting
|
|
for _, c := range sc.clusters {
|
|
for _, s := range c.servers {
|
|
s.mu.Lock()
|
|
s.sys.statsz = 10 * time.Millisecond
|
|
s.sys.cstatsz = s.sys.statsz
|
|
s.sys.stmr.Reset(s.sys.statsz)
|
|
s.mu.Unlock()
|
|
}
|
|
}
|
|
|
|
nc, js := jsClientConnect(t, sc.randomServer())
|
|
defer nc.Close()
|
|
|
|
ncSys := natsConnect(t, sc.randomServer().ClientURL(), nats.UserInfo("admin", "s3cr3t!"))
|
|
defer ncSys.Close()
|
|
statszSub, err := ncSys.SubscribeSync(fmt.Sprintf(serverStatsSubj, "*"))
|
|
require_NoError(t, err)
|
|
require_NoError(t, ncSys.Flush())
|
|
|
|
waitStatsz := func(peers, haassets int) {
|
|
t.Helper()
|
|
for peersWithExactHaAssets := 0; peersWithExactHaAssets < peers; {
|
|
m, err := statszSub.NextMsg(time.Second)
|
|
require_NoError(t, err)
|
|
var statsz ServerStatsMsg
|
|
err = json.Unmarshal(m.Data, &statsz)
|
|
require_NoError(t, err)
|
|
if statsz.Stats.JetStream == nil {
|
|
continue
|
|
}
|
|
if haassets == statsz.Stats.JetStream.Stats.HAAssets {
|
|
peersWithExactHaAssets++
|
|
}
|
|
}
|
|
}
|
|
waitStatsz(6, 1) // counts _meta_
|
|
_, err = js.AddStream(&nats.StreamConfig{Name: "S0", Replicas: 1, Placement: &nats.Placement{Cluster: "C1"}})
|
|
require_NoError(t, err)
|
|
waitStatsz(6, 1)
|
|
_, err = js.AddStream(&nats.StreamConfig{Name: "S1", Replicas: 3, Placement: &nats.Placement{Cluster: "C1"}})
|
|
require_NoError(t, err)
|
|
waitStatsz(3, 2)
|
|
waitStatsz(3, 1)
|
|
_, err = js.AddStream(&nats.StreamConfig{Name: "S2", Replicas: 3, Placement: &nats.Placement{Cluster: "C1"}})
|
|
require_NoError(t, err)
|
|
waitStatsz(3, 3)
|
|
waitStatsz(3, 1)
|
|
_, err = js.AddStream(&nats.StreamConfig{Name: "S3", Replicas: 3, Placement: &nats.Placement{Cluster: "C1"}})
|
|
require_Error(t, err)
|
|
require_Contains(t, err.Error(), "nats: no suitable peers for placement")
|
|
require_Contains(t, err.Error(), "miscellaneous issue")
|
|
require_NoError(t, js.DeleteStream("S1"))
|
|
waitStatsz(3, 2)
|
|
waitStatsz(3, 1)
|
|
_, err = js.AddConsumer("S2", &nats.ConsumerConfig{Durable: "DUR1", AckPolicy: nats.AckExplicitPolicy})
|
|
require_NoError(t, err)
|
|
waitStatsz(3, 3)
|
|
waitStatsz(3, 1)
|
|
_, err = js.AddConsumer("S2", &nats.ConsumerConfig{Durable: "DUR2", AckPolicy: nats.AckExplicitPolicy})
|
|
require_Error(t, err)
|
|
require_Equal(t, err.Error(), "nats: insufficient resources")
|
|
_, err = js.AddConsumer("S2", &nats.ConsumerConfig{AckPolicy: nats.AckExplicitPolicy})
|
|
require_NoError(t, err)
|
|
waitStatsz(3, 3)
|
|
waitStatsz(3, 1)
|
|
_, err = js.UpdateStream(&nats.StreamConfig{Name: "S2", Replicas: 3, Description: "foobar"})
|
|
require_NoError(t, err)
|
|
waitStatsz(3, 3)
|
|
waitStatsz(3, 1)
|
|
si, err := js.AddStream(&nats.StreamConfig{Name: "S4", Replicas: 3})
|
|
require_NoError(t, err)
|
|
require_Equal(t, si.Cluster.Name, "C2")
|
|
waitStatsz(3, 3)
|
|
waitStatsz(3, 2)
|
|
si, err = js.AddStream(&nats.StreamConfig{Name: "S5", Replicas: 3})
|
|
require_NoError(t, err)
|
|
require_Equal(t, si.Cluster.Name, "C2")
|
|
waitStatsz(6, 3)
|
|
_, err = js.AddConsumer("S4", &nats.ConsumerConfig{Durable: "DUR2", AckPolicy: nats.AckExplicitPolicy})
|
|
require_Error(t, err)
|
|
require_Equal(t, err.Error(), "nats: insufficient resources")
|
|
_, err = js.UpdateStream(&nats.StreamConfig{Name: "S2", Replicas: 3, Placement: &nats.Placement{Cluster: "C2"}})
|
|
require_Error(t, err)
|
|
require_Contains(t, err.Error(), "nats: no suitable peers for placement")
|
|
require_Contains(t, err.Error(), "miscellaneous issue")
|
|
}
|
|
|
|
func TestJetStreamSuperClusterStreamAlternates(t *testing.T) {
|
|
sc := createJetStreamTaggedSuperCluster(t)
|
|
defer sc.shutdown()
|
|
|
|
nc, js := jsClientConnect(t, sc.randomServer())
|
|
defer nc.Close()
|
|
|
|
// C1
|
|
_, err := js.AddStream(&nats.StreamConfig{
|
|
Name: "SOURCE",
|
|
Subjects: []string{"foo", "bar", "baz"},
|
|
Replicas: 3,
|
|
Placement: &nats.Placement{Tags: []string{"cloud:aws", "country:us"}},
|
|
})
|
|
require_NoError(t, err)
|
|
|
|
// C2
|
|
_, err = js.AddStream(&nats.StreamConfig{
|
|
Name: "MIRROR-1",
|
|
Replicas: 1,
|
|
Mirror: &nats.StreamSource{Name: "SOURCE"},
|
|
Placement: &nats.Placement{Tags: []string{"cloud:gcp", "country:uk"}},
|
|
})
|
|
require_NoError(t, err)
|
|
|
|
// C3
|
|
_, err = js.AddStream(&nats.StreamConfig{
|
|
Name: "MIRROR-2",
|
|
Replicas: 2,
|
|
Mirror: &nats.StreamSource{Name: "SOURCE"},
|
|
Placement: &nats.Placement{Tags: []string{"cloud:az", "country:jp"}},
|
|
})
|
|
require_NoError(t, err)
|
|
|
|
// No client support yet, so do by hand.
|
|
getStreamInfo := func(nc *nats.Conn, expected string) {
|
|
t.Helper()
|
|
resp, err := nc.Request(fmt.Sprintf(JSApiStreamInfoT, "SOURCE"), nil, time.Second)
|
|
require_NoError(t, err)
|
|
var si StreamInfo
|
|
err = json.Unmarshal(resp.Data, &si)
|
|
require_NoError(t, err)
|
|
require_True(t, len(si.Alternates) == 3)
|
|
require_True(t, si.Alternates[0].Cluster == expected)
|
|
seen := make(map[string]struct{})
|
|
for _, alt := range si.Alternates {
|
|
seen[alt.Cluster] = struct{}{}
|
|
}
|
|
require_True(t, len(seen) == 3)
|
|
}
|
|
|
|
// Connect to different clusters to check ordering.
|
|
nc, _ = jsClientConnect(t, sc.clusterForName("C1").randomServer())
|
|
defer nc.Close()
|
|
getStreamInfo(nc, "C1")
|
|
nc, _ = jsClientConnect(t, sc.clusterForName("C2").randomServer())
|
|
defer nc.Close()
|
|
getStreamInfo(nc, "C2")
|
|
nc, _ = jsClientConnect(t, sc.clusterForName("C3").randomServer())
|
|
defer nc.Close()
|
|
getStreamInfo(nc, "C3")
|
|
}
|
|
|
|
// We had a scenario where a consumer would not recover properly on restart due to
|
|
// the cluster state not being set properly when checking source subjects.
|
|
func TestJetStreamSuperClusterStateOnRestartPreventsConsumerRecovery(t *testing.T) {
|
|
sc := createJetStreamTaggedSuperCluster(t)
|
|
defer sc.shutdown()
|
|
|
|
nc, js := jsClientConnect(t, sc.randomServer())
|
|
defer nc.Close()
|
|
|
|
// C1
|
|
_, err := js.AddStream(&nats.StreamConfig{
|
|
Name: "SOURCE",
|
|
Subjects: []string{"foo", "bar"},
|
|
Replicas: 3,
|
|
Placement: &nats.Placement{Tags: []string{"cloud:aws", "country:us"}},
|
|
})
|
|
require_NoError(t, err)
|
|
|
|
// C2
|
|
_, err = js.AddStream(&nats.StreamConfig{
|
|
Name: "DS",
|
|
Subjects: []string{"baz"},
|
|
Replicas: 3,
|
|
Sources: []*nats.StreamSource{{Name: "SOURCE"}},
|
|
Placement: &nats.Placement{Tags: []string{"cloud:gcp", "country:uk"}},
|
|
})
|
|
require_NoError(t, err)
|
|
|
|
// Bind to DS and match filter subject of SOURCE.
|
|
_, err = js.AddConsumer("DS", &nats.ConsumerConfig{
|
|
Durable: "dlc",
|
|
AckPolicy: nats.AckExplicitPolicy,
|
|
FilterSubject: "foo",
|
|
DeliverSubject: "d",
|
|
})
|
|
require_NoError(t, err)
|
|
|
|
// Send a few messages.
|
|
for i := 0; i < 100; i++ {
|
|
_, err := js.Publish("foo", []byte("HELLO"))
|
|
require_NoError(t, err)
|
|
}
|
|
sub := natsSubSync(t, nc, "d")
|
|
natsNexMsg(t, sub, time.Second)
|
|
|
|
c := sc.clusterForName("C2")
|
|
cl := c.consumerLeader("$G", "DS", "dlc")
|
|
|
|
// Pull source out from underneath the downstream stream.
|
|
err = js.DeleteStream("SOURCE")
|
|
require_NoError(t, err)
|
|
|
|
cl.Shutdown()
|
|
cl = c.restartServer(cl)
|
|
c.waitOnServerHealthz(cl)
|
|
|
|
// Now make sure the consumer is still on this server and has restarted properly.
|
|
mset, err := cl.GlobalAccount().lookupStream("DS")
|
|
require_NoError(t, err)
|
|
if o := mset.lookupConsumer("dlc"); o == nil {
|
|
t.Fatalf("Consumer was not properly restarted")
|
|
}
|
|
}
|
|
|
|
// We allow mirrors to opt-in to direct get in a distributed queue group.
|
|
func TestJetStreamSuperClusterStreamDirectGetMirrorQueueGroup(t *testing.T) {
|
|
sc := createJetStreamTaggedSuperCluster(t)
|
|
defer sc.shutdown()
|
|
|
|
nc, js := jsClientConnect(t, sc.randomServer())
|
|
defer nc.Close()
|
|
|
|
// C1
|
|
// Do by hand for now.
|
|
cfg := &StreamConfig{
|
|
Name: "SOURCE",
|
|
Subjects: []string{"kv.>"},
|
|
MaxMsgsPer: 1,
|
|
Placement: &Placement{Tags: []string{"cloud:aws", "country:us"}},
|
|
AllowDirect: true,
|
|
Replicas: 3,
|
|
Storage: MemoryStorage,
|
|
}
|
|
addStream(t, nc, cfg)
|
|
|
|
num := 100
|
|
for i := 0; i < num; i++ {
|
|
js.PublishAsync(fmt.Sprintf("kv.%d", i), []byte("VAL"))
|
|
}
|
|
select {
|
|
case <-js.PublishAsyncComplete():
|
|
case <-time.After(5 * time.Second):
|
|
t.Fatalf("Did not receive completion signal")
|
|
}
|
|
|
|
// C2
|
|
cfg = &StreamConfig{
|
|
Name: "M1",
|
|
Mirror: &StreamSource{Name: "SOURCE"},
|
|
Placement: &Placement{Tags: []string{"cloud:gcp", "country:uk"}},
|
|
MirrorDirect: true,
|
|
Storage: MemoryStorage,
|
|
}
|
|
addStream(t, nc, cfg)
|
|
|
|
// C3 (clustered)
|
|
cfg = &StreamConfig{
|
|
Name: "M2",
|
|
Mirror: &StreamSource{Name: "SOURCE"},
|
|
Replicas: 3,
|
|
Placement: &Placement{Tags: []string{"country:jp"}},
|
|
MirrorDirect: true,
|
|
Storage: MemoryStorage,
|
|
}
|
|
addStream(t, nc, cfg)
|
|
|
|
checkFor(t, 5*time.Second, 100*time.Millisecond, func() error {
|
|
si, err := js.StreamInfo("M2")
|
|
require_NoError(t, err)
|
|
if si.State.Msgs != uint64(num) {
|
|
return fmt.Errorf("Expected %d msgs, got state: %d", num, si.State.Msgs)
|
|
}
|
|
return nil
|
|
})
|
|
|
|
// Since last one was an R3, check and wait for the direct subscription.
|
|
checkFor(t, 2*time.Second, 100*time.Millisecond, func() error {
|
|
sl := sc.clusterForName("C3").streamLeader("$G", "M2")
|
|
if mset, err := sl.GlobalAccount().lookupStream("M2"); err == nil {
|
|
mset.mu.RLock()
|
|
ok := mset.mirror.dsub != nil
|
|
mset.mu.RUnlock()
|
|
if ok {
|
|
return nil
|
|
}
|
|
}
|
|
return fmt.Errorf("No dsub yet")
|
|
})
|
|
|
|
// Always do a direct get to the source, but check that we are getting answers from the mirrors when connected to their cluster.
|
|
getSubj := fmt.Sprintf(JSDirectMsgGetT, "SOURCE")
|
|
req := []byte(`{"last_by_subj":"kv.22"}`)
|
|
getMsg := func(c *nats.Conn) *nats.Msg {
|
|
m, err := c.Request(getSubj, req, time.Second)
|
|
require_NoError(t, err)
|
|
require_True(t, string(m.Data) == "VAL")
|
|
require_True(t, m.Header.Get(JSSequence) == "23")
|
|
require_True(t, m.Header.Get(JSSubject) == "kv.22")
|
|
return m
|
|
}
|
|
|
|
// C1 -> SOURCE
|
|
nc, _ = jsClientConnect(t, sc.clusterForName("C1").randomServer())
|
|
defer nc.Close()
|
|
|
|
m := getMsg(nc)
|
|
require_True(t, m.Header.Get(JSStream) == "SOURCE")
|
|
|
|
// C2 -> M1
|
|
nc, _ = jsClientConnect(t, sc.clusterForName("C2").randomServer())
|
|
defer nc.Close()
|
|
|
|
m = getMsg(nc)
|
|
require_True(t, m.Header.Get(JSStream) == "M1")
|
|
|
|
// C3 -> M2
|
|
nc, _ = jsClientConnect(t, sc.clusterForName("C3").randomServer())
|
|
defer nc.Close()
|
|
|
|
m = getMsg(nc)
|
|
require_True(t, m.Header.Get(JSStream) == "M2")
|
|
}
|
|
|
|
func TestJetStreamSuperClusterTagInducedMoveCancel(t *testing.T) {
|
|
server := map[string]struct{}{}
|
|
sc := createJetStreamSuperClusterWithTemplateAndModHook(t, jsClusterTempl, 4, 2,
|
|
func(serverName, clusterName, storeDir, conf string) string {
|
|
server[serverName] = struct{}{}
|
|
return fmt.Sprintf("%s\nserver_tags: [%s]", conf, clusterName)
|
|
}, nil)
|
|
defer sc.shutdown()
|
|
|
|
// Client based API
|
|
c := sc.randomCluster()
|
|
srv := c.randomNonLeader()
|
|
nc, js := jsClientConnect(t, srv)
|
|
defer nc.Close()
|
|
|
|
cfg := &nats.StreamConfig{
|
|
Name: "TEST",
|
|
Subjects: []string{"foo"},
|
|
Placement: &nats.Placement{Tags: []string{"C1"}},
|
|
Replicas: 3,
|
|
}
|
|
siCreate, err := js.AddStream(cfg)
|
|
require_NoError(t, err)
|
|
require_Equal(t, siCreate.Cluster.Name, "C1")
|
|
|
|
toSend := uint64(1_000)
|
|
for i := uint64(0); i < toSend; i++ {
|
|
_, err = js.Publish("foo", nil)
|
|
require_NoError(t, err)
|
|
}
|
|
|
|
ncsys, err := nats.Connect(srv.ClientURL(), nats.UserInfo("admin", "s3cr3t!"))
|
|
require_NoError(t, err)
|
|
defer ncsys.Close()
|
|
|
|
// cause a move by altering placement tags
|
|
cfg.Placement.Tags = []string{"C2"}
|
|
_, err = js.UpdateStream(cfg)
|
|
require_NoError(t, err)
|
|
|
|
rmsg, err := ncsys.Request(fmt.Sprintf(JSApiServerStreamCancelMoveT, "$G", "TEST"), nil, 5*time.Second)
|
|
require_NoError(t, err)
|
|
var cancelResp JSApiStreamUpdateResponse
|
|
require_NoError(t, json.Unmarshal(rmsg.Data, &cancelResp))
|
|
if cancelResp.Error != nil && ErrorIdentifier(cancelResp.Error.ErrCode) == JSStreamMoveNotInProgress {
|
|
t.Skip("This can happen with delays, when Move completed before Cancel", cancelResp.Error)
|
|
return
|
|
}
|
|
require_True(t, cancelResp.Error == nil)
|
|
|
|
checkFor(t, 10*time.Second, 250*time.Millisecond, func() error {
|
|
si, err := js.StreamInfo("TEST")
|
|
require_NoError(t, err)
|
|
if si.Config.Placement != nil {
|
|
return fmt.Errorf("expected placement to be cleared got: %+v", si.Config.Placement)
|
|
}
|
|
return nil
|
|
})
|
|
}
|
|
|
|
func TestJetStreamSuperClusterMoveCancel(t *testing.T) {
|
|
usageTickOld := usageTick
|
|
usageTick = 250 * time.Millisecond
|
|
defer func() {
|
|
usageTick = usageTickOld
|
|
}()
|
|
|
|
server := map[string]struct{}{}
|
|
sc := createJetStreamSuperClusterWithTemplateAndModHook(t, jsClusterTempl, 4, 2,
|
|
func(serverName, clusterName, storeDir, conf string) string {
|
|
server[serverName] = struct{}{}
|
|
return fmt.Sprintf("%s\nserver_tags: [%s]", conf, serverName)
|
|
}, nil)
|
|
defer sc.shutdown()
|
|
|
|
// Client based API
|
|
c := sc.randomCluster()
|
|
srv := c.randomNonLeader()
|
|
nc, js := jsClientConnect(t, srv)
|
|
defer nc.Close()
|
|
|
|
siCreate, err := js.AddStream(&nats.StreamConfig{
|
|
Name: "TEST",
|
|
Subjects: []string{"foo"},
|
|
Replicas: 3,
|
|
})
|
|
require_NoError(t, err)
|
|
streamPeerSrv := []string{siCreate.Cluster.Leader, siCreate.Cluster.Replicas[0].Name, siCreate.Cluster.Replicas[1].Name}
|
|
// determine empty server
|
|
for _, s := range streamPeerSrv {
|
|
delete(server, s)
|
|
}
|
|
// pick left over server in same cluster as other server
|
|
emptySrv := _EMPTY_
|
|
for s := range server {
|
|
// server name is prefixed with cluster name
|
|
if strings.HasPrefix(s, c.name) {
|
|
emptySrv = s
|
|
break
|
|
}
|
|
}
|
|
|
|
expectedPeers := map[string]struct{}{
|
|
getHash(streamPeerSrv[0]): {},
|
|
getHash(streamPeerSrv[1]): {},
|
|
getHash(streamPeerSrv[2]): {},
|
|
}
|
|
|
|
_, err = js.AddConsumer("TEST", &nats.ConsumerConfig{Durable: "DUR", AckPolicy: nats.AckExplicitPolicy})
|
|
require_NoError(t, err)
|
|
ci, err := js.AddConsumer("TEST", &nats.ConsumerConfig{InactiveThreshold: time.Hour, AckPolicy: nats.AckExplicitPolicy})
|
|
require_NoError(t, err)
|
|
ephName := ci.Name
|
|
|
|
toSend := uint64(1_000)
|
|
for i := uint64(0); i < toSend; i++ {
|
|
_, err = js.Publish("foo", nil)
|
|
require_NoError(t, err)
|
|
}
|
|
|
|
serverEmpty := func(fromSrv string) error {
|
|
if jszAfter, err := c.serverByName(fromSrv).Jsz(nil); err != nil {
|
|
return fmt.Errorf("could not fetch JS info for server: %v", err)
|
|
} else if jszAfter.Streams != 0 {
|
|
return fmt.Errorf("empty server still has %d streams", jszAfter.Streams)
|
|
} else if jszAfter.Consumers != 0 {
|
|
return fmt.Errorf("empty server still has %d consumers", jszAfter.Consumers)
|
|
} else if jszAfter.Bytes != 0 {
|
|
return fmt.Errorf("empty server still has %d storage", jszAfter.Store)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
checkSrvInvariant := func(s *Server, expectedPeers map[string]struct{}) error {
|
|
js, cc := s.getJetStreamCluster()
|
|
js.mu.Lock()
|
|
defer js.mu.Unlock()
|
|
if sa, ok := cc.streams["$G"]["TEST"]; !ok {
|
|
return fmt.Errorf("stream not found")
|
|
} else if len(sa.Group.Peers) != len(expectedPeers) {
|
|
return fmt.Errorf("stream peer group size not %d, but %d", len(expectedPeers), len(sa.Group.Peers))
|
|
} else if da, ok := sa.consumers["DUR"]; !ok {
|
|
return fmt.Errorf("durable not found")
|
|
} else if len(da.Group.Peers) != len(expectedPeers) {
|
|
return fmt.Errorf("durable peer group size not %d, but %d", len(expectedPeers), len(da.Group.Peers))
|
|
} else if ea, ok := sa.consumers[ephName]; !ok {
|
|
return fmt.Errorf("ephemeral not found")
|
|
} else if len(ea.Group.Peers) != 1 {
|
|
return fmt.Errorf("ephemeral peer group size not 1, but %d", len(ea.Group.Peers))
|
|
} else if _, ok := expectedPeers[ea.Group.Peers[0]]; !ok {
|
|
return fmt.Errorf("ephemeral peer not an expected peer")
|
|
} else {
|
|
for _, p := range sa.Group.Peers {
|
|
if _, ok := expectedPeers[p]; !ok {
|
|
return fmt.Errorf("peer not expected")
|
|
}
|
|
found := false
|
|
for _, dp := range da.Group.Peers {
|
|
if p == dp {
|
|
found = true
|
|
break
|
|
}
|
|
}
|
|
if !found {
|
|
t.Logf("durable peer group does not match stream peer group")
|
|
}
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
ncsys, err := nats.Connect(srv.ClientURL(), nats.UserInfo("admin", "s3cr3t!"))
|
|
require_NoError(t, err)
|
|
defer ncsys.Close()
|
|
|
|
time.Sleep(2 * usageTick)
|
|
aiBefore, err := js.AccountInfo()
|
|
require_NoError(t, err)
|
|
|
|
for _, moveFromSrv := range streamPeerSrv {
|
|
moveReq, err := json.Marshal(&JSApiMetaServerStreamMoveRequest{Server: moveFromSrv, Tags: []string{emptySrv}})
|
|
require_NoError(t, err)
|
|
rmsg, err := ncsys.Request(fmt.Sprintf(JSApiServerStreamMoveT, "$G", "TEST"), moveReq, 5*time.Second)
|
|
require_NoError(t, err)
|
|
var moveResp JSApiStreamUpdateResponse
|
|
require_NoError(t, json.Unmarshal(rmsg.Data, &moveResp))
|
|
require_True(t, moveResp.Error == nil)
|
|
|
|
rmsg, err = ncsys.Request(fmt.Sprintf(JSApiServerStreamCancelMoveT, "$G", "TEST"), nil, 5*time.Second)
|
|
require_NoError(t, err)
|
|
var cancelResp JSApiStreamUpdateResponse
|
|
require_NoError(t, json.Unmarshal(rmsg.Data, &cancelResp))
|
|
if cancelResp.Error != nil && ErrorIdentifier(cancelResp.Error.ErrCode) == JSStreamMoveNotInProgress {
|
|
t.Skip("This can happen with delays, when Move completed before Cancel", cancelResp.Error)
|
|
return
|
|
}
|
|
require_True(t, cancelResp.Error == nil)
|
|
|
|
for _, sExpected := range streamPeerSrv {
|
|
s := c.serverByName(sExpected)
|
|
require_True(t, s.JetStreamIsStreamAssigned("$G", "TEST"))
|
|
checkFor(t, 20*time.Second, 100*time.Millisecond, func() error { return checkSrvInvariant(s, expectedPeers) })
|
|
}
|
|
checkFor(t, 10*time.Second, 100*time.Millisecond, func() error { return serverEmpty(emptySrv) })
|
|
checkFor(t, 3*usageTick, 100*time.Millisecond, func() error {
|
|
if aiAfter, err := js.AccountInfo(); err != nil {
|
|
return err
|
|
} else if aiAfter.Store != aiBefore.Store {
|
|
return fmt.Errorf("store before %d and after %d don't match", aiBefore.Store, aiAfter.Store)
|
|
} else {
|
|
return nil
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestJetStreamSuperClusterDoubleStreamMove(t *testing.T) {
|
|
server := map[string]struct{}{}
|
|
sc := createJetStreamSuperClusterWithTemplateAndModHook(t, jsClusterTempl, 4, 2,
|
|
func(serverName, clusterName, storeDir, conf string) string {
|
|
server[serverName] = struct{}{}
|
|
return fmt.Sprintf("%s\nserver_tags: [%s]", conf, serverName)
|
|
}, nil)
|
|
defer sc.shutdown()
|
|
|
|
// Client based API
|
|
c := sc.randomCluster()
|
|
srv := c.randomNonLeader()
|
|
nc, js := jsClientConnect(t, srv)
|
|
defer nc.Close()
|
|
|
|
siCreate, err := js.AddStream(&nats.StreamConfig{
|
|
Name: "TEST",
|
|
Subjects: []string{"foo"},
|
|
Replicas: 3,
|
|
})
|
|
require_NoError(t, err)
|
|
srvMoveList := []string{siCreate.Cluster.Leader, siCreate.Cluster.Replicas[0].Name, siCreate.Cluster.Replicas[1].Name}
|
|
// determine empty server
|
|
for _, s := range srvMoveList {
|
|
delete(server, s)
|
|
}
|
|
// pick left over server in same cluster as other server
|
|
for s := range server {
|
|
// server name is prefixed with cluster name
|
|
if strings.HasPrefix(s, c.name) {
|
|
srvMoveList = append(srvMoveList, s)
|
|
break
|
|
}
|
|
}
|
|
|
|
servers := []*Server{
|
|
c.serverByName(srvMoveList[0]),
|
|
c.serverByName(srvMoveList[1]),
|
|
c.serverByName(srvMoveList[2]),
|
|
c.serverByName(srvMoveList[3]), // starts out empty
|
|
}
|
|
|
|
_, err = js.AddConsumer("TEST", &nats.ConsumerConfig{Durable: "DUR", AckPolicy: nats.AckExplicitPolicy})
|
|
require_NoError(t, err)
|
|
ci, err := js.AddConsumer("TEST", &nats.ConsumerConfig{InactiveThreshold: time.Hour, AckPolicy: nats.AckExplicitPolicy})
|
|
require_NoError(t, err)
|
|
ephName := ci.Name
|
|
|
|
toSend := uint64(100)
|
|
for i := uint64(0); i < toSend; i++ {
|
|
_, err = js.Publish("foo", nil)
|
|
require_NoError(t, err)
|
|
}
|
|
|
|
ncsys, err := nats.Connect(srv.ClientURL(), nats.UserInfo("admin", "s3cr3t!"))
|
|
require_NoError(t, err)
|
|
defer ncsys.Close()
|
|
|
|
move := func(fromSrv string, toTags ...string) {
|
|
sEmpty := c.serverByName(fromSrv)
|
|
jszBefore, err := sEmpty.Jsz(nil)
|
|
require_NoError(t, err)
|
|
require_True(t, jszBefore.Streams == 1)
|
|
|
|
moveReq, err := json.Marshal(&JSApiMetaServerStreamMoveRequest{
|
|
Server: fromSrv, Tags: toTags})
|
|
require_NoError(t, err)
|
|
rmsg, err := ncsys.Request(fmt.Sprintf(JSApiServerStreamMoveT, "$G", "TEST"), moveReq, 100*time.Second)
|
|
require_NoError(t, err)
|
|
var moveResp JSApiStreamUpdateResponse
|
|
require_NoError(t, json.Unmarshal(rmsg.Data, &moveResp))
|
|
require_True(t, moveResp.Error == nil)
|
|
}
|
|
|
|
serverEmpty := func(fromSrv string) error {
|
|
if jszAfter, err := c.serverByName(fromSrv).Jsz(nil); err != nil {
|
|
return fmt.Errorf("could not fetch JS info for server: %v", err)
|
|
} else if jszAfter.Streams != 0 {
|
|
return fmt.Errorf("empty server still has %d streams", jszAfter.Streams)
|
|
} else if jszAfter.Consumers != 0 {
|
|
return fmt.Errorf("empty server still has %d consumers", jszAfter.Consumers)
|
|
} else if jszAfter.Store != 0 {
|
|
return fmt.Errorf("empty server still has %d storage", jszAfter.Store)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
moveComplete := func(toSrv string, expectedSet ...string) error {
|
|
eSet := map[string]int{}
|
|
foundInExpected := false
|
|
for i, sExpected := range expectedSet {
|
|
eSet[sExpected] = i
|
|
s := c.serverByName(sExpected)
|
|
if !s.JetStreamIsStreamAssigned("$G", "TEST") {
|
|
return fmt.Errorf("expected stream to be assigned to %s", sExpected)
|
|
}
|
|
// test list order invariant
|
|
js, cc := s.getJetStreamCluster()
|
|
sExpHash := getHash(sExpected)
|
|
js.mu.Lock()
|
|
if sa, ok := cc.streams["$G"]["TEST"]; !ok {
|
|
js.mu.Unlock()
|
|
return fmt.Errorf("stream not found in cluster")
|
|
} else if len(sa.Group.Peers) != 3 {
|
|
js.mu.Unlock()
|
|
return fmt.Errorf("peers not reset")
|
|
} else if sa.Group.Peers[i] != sExpHash {
|
|
js.mu.Unlock()
|
|
return fmt.Errorf("stream: expected peer %s on index %d, got %s/%s",
|
|
sa.Group.Peers[i], i, sExpHash, sExpected)
|
|
} else if ca, ok := sa.consumers["DUR"]; !ok {
|
|
js.mu.Unlock()
|
|
return fmt.Errorf("durable not found in stream")
|
|
} else {
|
|
found := false
|
|
for _, peer := range ca.Group.Peers {
|
|
if peer == sExpHash {
|
|
found = true
|
|
break
|
|
}
|
|
}
|
|
if !found {
|
|
js.mu.Unlock()
|
|
return fmt.Errorf("consumer expected peer %s/%s bud didn't find in %+v",
|
|
sExpHash, sExpected, ca.Group.Peers)
|
|
}
|
|
if ephA, ok := sa.consumers[ephName]; ok {
|
|
if len(ephA.Group.Peers) != 1 {
|
|
return fmt.Errorf("ephemeral peers not reset")
|
|
}
|
|
foundInExpected = foundInExpected || (ephA.Group.Peers[0] == cc.meta.ID())
|
|
}
|
|
}
|
|
js.mu.Unlock()
|
|
}
|
|
if len(expectedSet) > 0 && !foundInExpected {
|
|
return fmt.Errorf("ephemeral peer not expected")
|
|
}
|
|
for _, s := range servers {
|
|
if jszAfter, err := c.serverByName(toSrv).Jsz(nil); err != nil {
|
|
return fmt.Errorf("could not fetch JS info for server: %v", err)
|
|
} else if jszAfter.Messages != toSend {
|
|
return fmt.Errorf("messages not yet copied, got %d, expected %d", jszAfter.Messages, toSend)
|
|
}
|
|
nc, js := jsClientConnect(t, s)
|
|
defer nc.Close()
|
|
if si, err := js.StreamInfo("TEST", nats.MaxWait(time.Second)); err != nil {
|
|
return fmt.Errorf("could not fetch stream info: %v", err)
|
|
} else if len(si.Cluster.Replicas)+1 != si.Config.Replicas {
|
|
return fmt.Errorf("not yet downsized replica should be empty has: %d %s",
|
|
len(si.Cluster.Replicas), si.Cluster.Leader)
|
|
} else if si.Cluster.Leader == _EMPTY_ {
|
|
return fmt.Errorf("leader not found")
|
|
} else if len(expectedSet) > 0 {
|
|
if _, ok := eSet[si.Cluster.Leader]; !ok {
|
|
return fmt.Errorf("leader %s not in expected set %+v", si.Cluster.Leader, eSet)
|
|
} else if _, ok := eSet[si.Cluster.Replicas[0].Name]; !ok {
|
|
return fmt.Errorf("leader %s not in expected set %+v", si.Cluster.Replicas[0].Name, eSet)
|
|
} else if _, ok := eSet[si.Cluster.Replicas[1].Name]; !ok {
|
|
return fmt.Errorf("leader %s not in expected set %+v", si.Cluster.Replicas[1].Name, eSet)
|
|
}
|
|
}
|
|
nc.Close()
|
|
}
|
|
return nil
|
|
}
|
|
|
|
moveAndCheck := func(from, to string, expectedSet ...string) {
|
|
t.Helper()
|
|
move(from, to)
|
|
checkFor(t, 40*time.Second, 100*time.Millisecond, func() error { return moveComplete(to, expectedSet...) })
|
|
checkFor(t, 20*time.Second, 100*time.Millisecond, func() error { return serverEmpty(from) })
|
|
}
|
|
|
|
checkFor(t, 20*time.Second, 1000*time.Millisecond, func() error { return serverEmpty(srvMoveList[3]) })
|
|
// first iteration establishes order of server 0-2 (the internal order in the server could be 1,0,2)
|
|
moveAndCheck(srvMoveList[0], srvMoveList[3])
|
|
moveAndCheck(srvMoveList[1], srvMoveList[0])
|
|
moveAndCheck(srvMoveList[2], srvMoveList[1])
|
|
moveAndCheck(srvMoveList[3], srvMoveList[2], srvMoveList[0], srvMoveList[1], srvMoveList[2])
|
|
// second iteration iterates in order
|
|
moveAndCheck(srvMoveList[0], srvMoveList[3], srvMoveList[1], srvMoveList[2], srvMoveList[3])
|
|
moveAndCheck(srvMoveList[1], srvMoveList[0], srvMoveList[2], srvMoveList[3], srvMoveList[0])
|
|
moveAndCheck(srvMoveList[2], srvMoveList[1], srvMoveList[3], srvMoveList[0], srvMoveList[1])
|
|
moveAndCheck(srvMoveList[3], srvMoveList[2], srvMoveList[0], srvMoveList[1], srvMoveList[2])
|
|
// iterate in the opposite direction and establish order 2-0
|
|
moveAndCheck(srvMoveList[2], srvMoveList[3], srvMoveList[0], srvMoveList[1], srvMoveList[3])
|
|
moveAndCheck(srvMoveList[1], srvMoveList[2], srvMoveList[0], srvMoveList[3], srvMoveList[2])
|
|
moveAndCheck(srvMoveList[0], srvMoveList[1], srvMoveList[3], srvMoveList[2], srvMoveList[1])
|
|
moveAndCheck(srvMoveList[3], srvMoveList[0], srvMoveList[2], srvMoveList[1], srvMoveList[0])
|
|
// move server in the middle of list
|
|
moveAndCheck(srvMoveList[1], srvMoveList[3], srvMoveList[2], srvMoveList[0], srvMoveList[3])
|
|
moveAndCheck(srvMoveList[0], srvMoveList[1], srvMoveList[2], srvMoveList[3], srvMoveList[1])
|
|
moveAndCheck(srvMoveList[3], srvMoveList[0], srvMoveList[2], srvMoveList[1], srvMoveList[0])
|
|
// repeatedly use end
|
|
moveAndCheck(srvMoveList[0], srvMoveList[3], srvMoveList[2], srvMoveList[1], srvMoveList[3])
|
|
moveAndCheck(srvMoveList[3], srvMoveList[0], srvMoveList[2], srvMoveList[1], srvMoveList[0])
|
|
moveAndCheck(srvMoveList[0], srvMoveList[3], srvMoveList[2], srvMoveList[1], srvMoveList[3])
|
|
moveAndCheck(srvMoveList[3], srvMoveList[0], srvMoveList[2], srvMoveList[1], srvMoveList[0])
|
|
}
|
|
|
|
func TestJetStreamSuperClusterPeerEvacuationAndStreamReassignment(t *testing.T) {
|
|
s := createJetStreamSuperClusterWithTemplateAndModHook(t, jsClusterTempl, 4, 2,
|
|
func(serverName, clusterName, storeDir, conf string) string {
|
|
return fmt.Sprintf("%s\nserver_tags: [cluster:%s, server:%s]", conf, clusterName, serverName)
|
|
}, nil)
|
|
defer s.shutdown()
|
|
|
|
c := s.clusterForName("C1")
|
|
|
|
// Client based API
|
|
srv := c.randomNonLeader()
|
|
nc, js := jsClientConnect(t, srv)
|
|
defer nc.Close()
|
|
|
|
test := func(t *testing.T, r int, moveTags []string, targetCluster string, testMigrateTo bool, listFrom bool) {
|
|
si, err := js.AddStream(&nats.StreamConfig{
|
|
Name: "TEST",
|
|
Subjects: []string{"foo"},
|
|
Replicas: r,
|
|
})
|
|
require_NoError(t, err)
|
|
defer js.DeleteStream("TEST")
|
|
startSet := map[string]struct{}{
|
|
si.Cluster.Leader: {},
|
|
}
|
|
for _, p := range si.Cluster.Replicas {
|
|
startSet[p.Name] = struct{}{}
|
|
}
|
|
|
|
_, err = js.AddConsumer("TEST", &nats.ConsumerConfig{Durable: "DUR", AckPolicy: nats.AckExplicitPolicy})
|
|
require_NoError(t, err)
|
|
|
|
sub, err := js.SubscribeSync("foo")
|
|
require_NoError(t, err)
|
|
|
|
for i := 0; i < 100; i++ {
|
|
_, err = js.Publish("foo", nil)
|
|
require_NoError(t, err)
|
|
}
|
|
|
|
toMoveFrom := si.Cluster.Leader
|
|
if !listFrom {
|
|
toMoveFrom = _EMPTY_
|
|
}
|
|
sLdr := c.serverByName(si.Cluster.Leader)
|
|
jszBefore, err := sLdr.Jsz(nil)
|
|
require_NoError(t, err)
|
|
require_True(t, jszBefore.Streams == 1)
|
|
require_True(t, jszBefore.Consumers >= 1)
|
|
require_True(t, jszBefore.Store != 0)
|
|
|
|
migrateToServer := _EMPTY_
|
|
if testMigrateTo {
|
|
// find an empty server
|
|
for _, s := range c.servers {
|
|
name := s.Name()
|
|
found := si.Cluster.Leader == name
|
|
if !found {
|
|
for _, r := range si.Cluster.Replicas {
|
|
if r.Name == name {
|
|
found = true
|
|
break
|
|
}
|
|
}
|
|
}
|
|
if !found {
|
|
migrateToServer = name
|
|
break
|
|
}
|
|
}
|
|
jszAfter, err := c.serverByName(migrateToServer).Jsz(nil)
|
|
require_NoError(t, err)
|
|
require_True(t, jszAfter.Streams == 0)
|
|
|
|
moveTags = append(moveTags, fmt.Sprintf("server:%s", migrateToServer))
|
|
}
|
|
|
|
ncsys, err := nats.Connect(srv.ClientURL(), nats.UserInfo("admin", "s3cr3t!"))
|
|
require_NoError(t, err)
|
|
defer ncsys.Close()
|
|
|
|
moveReq, err := json.Marshal(&JSApiMetaServerStreamMoveRequest{Server: toMoveFrom, Tags: moveTags})
|
|
require_NoError(t, err)
|
|
rmsg, err := ncsys.Request(fmt.Sprintf(JSApiServerStreamMoveT, "$G", "TEST"), moveReq, 100*time.Second)
|
|
require_NoError(t, err)
|
|
var moveResp JSApiStreamUpdateResponse
|
|
require_NoError(t, json.Unmarshal(rmsg.Data, &moveResp))
|
|
require_True(t, moveResp.Error == nil)
|
|
|
|
// test move to particular server
|
|
if testMigrateTo {
|
|
toSrv := c.serverByName(migrateToServer)
|
|
checkFor(t, 20*time.Second, 1000*time.Millisecond, func() error {
|
|
jszAfter, err := toSrv.Jsz(nil)
|
|
if err != nil {
|
|
return fmt.Errorf("could not fetch JS info for server: %v", err)
|
|
}
|
|
if jszAfter.Streams != 1 {
|
|
return fmt.Errorf("server expected to have one stream, has %d", jszAfter.Streams)
|
|
}
|
|
return nil
|
|
})
|
|
}
|
|
// Now wait until the stream is now current.
|
|
checkFor(t, 50*time.Second, 100*time.Millisecond, func() error {
|
|
si, err := js.StreamInfo("TEST", nats.MaxWait(time.Second))
|
|
if err != nil {
|
|
return fmt.Errorf("could not fetch stream info: %v", err)
|
|
}
|
|
if si.Cluster.Leader == toMoveFrom {
|
|
return fmt.Errorf("peer not removed yet: %+v", toMoveFrom)
|
|
}
|
|
if si.Cluster.Leader == _EMPTY_ {
|
|
return fmt.Errorf("no leader yet")
|
|
}
|
|
if len(si.Cluster.Replicas) != r-1 {
|
|
return fmt.Errorf("not yet downsized replica should be %d has: %d", r-1, len(si.Cluster.Replicas))
|
|
}
|
|
if si.Config.Replicas != r {
|
|
return fmt.Errorf("bad replica count %d", si.Config.Replicas)
|
|
}
|
|
if si.Cluster.Name != targetCluster {
|
|
return fmt.Errorf("stream expected in %s but found in %s", si.Cluster.Name, targetCluster)
|
|
}
|
|
sNew := s.serverByName(si.Cluster.Leader)
|
|
if jszNew, err := sNew.Jsz(nil); err != nil {
|
|
return err
|
|
} else if jszNew.Streams != 1 {
|
|
return fmt.Errorf("new leader has %d streams, not one", jszNew.Streams)
|
|
} else if jszNew.Store != jszBefore.Store {
|
|
return fmt.Errorf("new leader has %d storage, should have %d", jszNew.Store, jszBefore.Store)
|
|
}
|
|
return nil
|
|
})
|
|
// test draining
|
|
checkFor(t, 20*time.Second, time.Second, func() error {
|
|
if !listFrom {
|
|
// when needed determine which server move moved away from
|
|
si, err := js.StreamInfo("TEST", nats.MaxWait(2*time.Second))
|
|
if err != nil {
|
|
return fmt.Errorf("could not fetch stream info: %v", err)
|
|
}
|
|
for n := range startSet {
|
|
if n != si.Cluster.Leader {
|
|
var found bool
|
|
for _, p := range si.Cluster.Replicas {
|
|
if p.Name == n {
|
|
found = true
|
|
break
|
|
}
|
|
}
|
|
if !found {
|
|
toMoveFrom = n
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if toMoveFrom == _EMPTY_ {
|
|
return fmt.Errorf("server to move away from not found")
|
|
}
|
|
sEmpty := c.serverByName(toMoveFrom)
|
|
jszAfter, err := sEmpty.Jsz(nil)
|
|
if err != nil {
|
|
return fmt.Errorf("could not fetch JS info for server: %v", err)
|
|
}
|
|
if jszAfter.Streams != 0 {
|
|
return fmt.Errorf("empty server still has %d streams", jszAfter.Streams)
|
|
}
|
|
if jszAfter.Consumers != 0 {
|
|
return fmt.Errorf("empty server still has %d consumers", jszAfter.Consumers)
|
|
}
|
|
if jszAfter.Store != 0 {
|
|
return fmt.Errorf("empty server still has %d storage", jszAfter.Store)
|
|
}
|
|
return nil
|
|
})
|
|
// consume messages from ephemeral consumer
|
|
for i := 0; i < 100; i++ {
|
|
_, err := sub.NextMsg(time.Second)
|
|
require_NoError(t, err)
|
|
}
|
|
}
|
|
|
|
for i := 1; i <= 3; i++ {
|
|
t.Run(fmt.Sprintf("r%d", i), func(t *testing.T) {
|
|
test(t, i, nil, "C1", false, true)
|
|
})
|
|
t.Run(fmt.Sprintf("r%d-explicit", i), func(t *testing.T) {
|
|
test(t, i, nil, "C1", true, true)
|
|
})
|
|
t.Run(fmt.Sprintf("r%d-nosrc", i), func(t *testing.T) {
|
|
test(t, i, nil, "C1", false, false)
|
|
})
|
|
}
|
|
|
|
t.Run("r3-cluster-move", func(t *testing.T) {
|
|
test(t, 3, []string{"cluster:C2"}, "C2", false, false)
|
|
})
|
|
t.Run("r3-cluster-move-nosrc", func(t *testing.T) {
|
|
test(t, 3, []string{"cluster:C2"}, "C2", false, true)
|
|
})
|
|
}
|
|
|
|
func TestJetStreamSuperClusterMirrorInheritsAllowDirect(t *testing.T) {
|
|
sc := createJetStreamTaggedSuperCluster(t)
|
|
defer sc.shutdown()
|
|
|
|
nc, js := jsClientConnect(t, sc.randomServer())
|
|
defer nc.Close()
|
|
|
|
_, err := js.AddStream(&nats.StreamConfig{
|
|
Name: "KV",
|
|
Subjects: []string{"key.*"},
|
|
Placement: &nats.Placement{Tags: []string{"cloud:aws", "country:us"}},
|
|
MaxMsgsPerSubject: 1,
|
|
AllowDirect: true,
|
|
})
|
|
require_NoError(t, err)
|
|
|
|
_, err = js.AddStream(&nats.StreamConfig{
|
|
Name: "M",
|
|
Mirror: &nats.StreamSource{Name: "KV"},
|
|
Placement: &nats.Placement{Tags: []string{"cloud:gcp", "country:uk"}},
|
|
})
|
|
require_NoError(t, err)
|
|
|
|
// Do direct grab for now.
|
|
resp, err := nc.Request(fmt.Sprintf(JSApiStreamInfoT, "M"), nil, time.Second)
|
|
require_NoError(t, err)
|
|
var si StreamInfo
|
|
err = json.Unmarshal(resp.Data, &si)
|
|
require_NoError(t, err)
|
|
|
|
if !si.Config.MirrorDirect {
|
|
t.Fatalf("Expected MirrorDirect to be inherited as true")
|
|
}
|
|
}
|
|
|
|
func TestJetStreamSuperClusterSystemLimitsPlacement(t *testing.T) {
|
|
const largeSystemLimit = 1024
|
|
const smallSystemLimit = 512
|
|
|
|
tmpl := `
|
|
listen: 127.0.0.1:-1
|
|
server_name: %s
|
|
jetstream: {
|
|
max_mem_store: _MAXMEM_
|
|
max_file_store: _MAXFILE_
|
|
store_dir: '%s',
|
|
}
|
|
server_tags: [
|
|
_TAG_
|
|
]
|
|
leaf {
|
|
listen: 127.0.0.1:-1
|
|
}
|
|
cluster {
|
|
name: %s
|
|
listen: 127.0.0.1:%d
|
|
routes = [%s]
|
|
}
|
|
|
|
accounts { $SYS { users = [ { user: "admin", pass: "s3cr3t!" } ] } }
|
|
`
|
|
storeCnf := func(serverName, clusterName, storeDir, conf string) string {
|
|
switch {
|
|
case strings.HasPrefix(serverName, "C1"):
|
|
conf = strings.Replace(conf, "_MAXMEM_", fmt.Sprint(largeSystemLimit), 1)
|
|
conf = strings.Replace(conf, "_MAXFILE_", fmt.Sprint(largeSystemLimit), 1)
|
|
return strings.Replace(conf, "_TAG_", serverName, 1)
|
|
case strings.HasPrefix(serverName, "C2"):
|
|
conf = strings.Replace(conf, "_MAXMEM_", fmt.Sprint(smallSystemLimit), 1)
|
|
conf = strings.Replace(conf, "_MAXFILE_", fmt.Sprint(smallSystemLimit), 1)
|
|
return strings.Replace(conf, "_TAG_", serverName, 1)
|
|
default:
|
|
return conf
|
|
}
|
|
}
|
|
|
|
sCluster := createJetStreamSuperClusterWithTemplateAndModHook(t, tmpl, 3, 2, storeCnf, nil)
|
|
defer sCluster.shutdown()
|
|
|
|
requestLeaderStepDown := func(clientURL string) error {
|
|
nc, err := nats.Connect(clientURL)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer nc.Close()
|
|
|
|
ncResp, err := nc.Request(JSApiLeaderStepDown, nil, 3*time.Second)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
var resp JSApiLeaderStepDownResponse
|
|
if err := json.Unmarshal(ncResp.Data, &resp); err != nil {
|
|
return err
|
|
}
|
|
if resp.Error != nil {
|
|
return resp.Error
|
|
}
|
|
if !resp.Success {
|
|
return fmt.Errorf("leader step down request not successful")
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// Force large cluster to be leader
|
|
var largeLeader *Server
|
|
err := checkForErr(15*time.Second, 500*time.Millisecond, func() error {
|
|
// Range over cluster A, which is the large cluster.
|
|
servers := sCluster.clusters[0].servers
|
|
for _, s := range servers {
|
|
if s.JetStreamIsLeader() {
|
|
largeLeader = s
|
|
return nil
|
|
}
|
|
}
|
|
|
|
if err := requestLeaderStepDown(servers[0].ClientURL()); err != nil {
|
|
return fmt.Errorf("failed to request leader step down: %s", err)
|
|
}
|
|
return fmt.Errorf("leader is not in large cluster")
|
|
})
|
|
if err != nil {
|
|
t.Skipf("failed to get desired layout: %s", err)
|
|
}
|
|
|
|
getStreams := func(jsm nats.JetStreamManager) []string {
|
|
var streams []string
|
|
for s := range jsm.StreamNames() {
|
|
streams = append(streams, s)
|
|
}
|
|
return streams
|
|
}
|
|
nc, js := jsClientConnect(t, largeLeader)
|
|
defer nc.Close()
|
|
|
|
cases := []struct {
|
|
name string
|
|
storage nats.StorageType
|
|
createMaxBytes int64
|
|
serverTag string
|
|
wantErr bool
|
|
}{
|
|
{
|
|
name: "file create large stream on small cluster b0",
|
|
storage: nats.FileStorage,
|
|
createMaxBytes: smallSystemLimit + 1,
|
|
serverTag: "C2-S1",
|
|
wantErr: true,
|
|
},
|
|
{
|
|
name: "memory create large stream on small cluster b0",
|
|
storage: nats.MemoryStorage,
|
|
createMaxBytes: smallSystemLimit + 1,
|
|
serverTag: "C2-S1",
|
|
wantErr: true,
|
|
},
|
|
{
|
|
name: "file create large stream on small cluster b1",
|
|
storage: nats.FileStorage,
|
|
createMaxBytes: smallSystemLimit + 1,
|
|
serverTag: "C2-S2",
|
|
wantErr: true,
|
|
},
|
|
{
|
|
name: "memory create large stream on small cluster b1",
|
|
storage: nats.MemoryStorage,
|
|
createMaxBytes: smallSystemLimit + 1,
|
|
serverTag: "C2-S2",
|
|
wantErr: true,
|
|
},
|
|
{
|
|
name: "file create large stream on small cluster b2",
|
|
storage: nats.FileStorage,
|
|
createMaxBytes: smallSystemLimit + 1,
|
|
serverTag: "C2-S3",
|
|
wantErr: true,
|
|
},
|
|
{
|
|
name: "memory create large stream on small cluster b2",
|
|
storage: nats.MemoryStorage,
|
|
createMaxBytes: smallSystemLimit + 1,
|
|
serverTag: "C2-S3",
|
|
wantErr: true,
|
|
},
|
|
{
|
|
name: "file create large stream on large cluster a0",
|
|
storage: nats.FileStorage,
|
|
createMaxBytes: smallSystemLimit + 1,
|
|
serverTag: "C1-S1",
|
|
},
|
|
{
|
|
name: "memory create large stream on large cluster a0",
|
|
storage: nats.MemoryStorage,
|
|
createMaxBytes: smallSystemLimit + 1,
|
|
serverTag: "C1-S1",
|
|
},
|
|
{
|
|
name: "file create large stream on large cluster a1",
|
|
storage: nats.FileStorage,
|
|
createMaxBytes: smallSystemLimit + 1,
|
|
serverTag: "C1-S2",
|
|
},
|
|
{
|
|
name: "memory create large stream on large cluster a1",
|
|
storage: nats.MemoryStorage,
|
|
createMaxBytes: smallSystemLimit + 1,
|
|
serverTag: "C1-S2",
|
|
},
|
|
{
|
|
name: "file create large stream on large cluster a2",
|
|
storage: nats.FileStorage,
|
|
createMaxBytes: smallSystemLimit + 1,
|
|
serverTag: "C1-S3",
|
|
},
|
|
{
|
|
name: "memory create large stream on large cluster a2",
|
|
storage: nats.MemoryStorage,
|
|
createMaxBytes: smallSystemLimit + 1,
|
|
serverTag: "C1-S3",
|
|
},
|
|
}
|
|
for i := 0; i < len(cases) && !t.Failed(); i++ {
|
|
c := cases[i]
|
|
t.Run(c.name, func(st *testing.T) {
|
|
var clusterName string
|
|
if strings.HasPrefix(c.serverTag, "a") {
|
|
clusterName = "cluster-a"
|
|
} else if strings.HasPrefix(c.serverTag, "b") {
|
|
clusterName = "cluster-b"
|
|
}
|
|
|
|
if s := getStreams(js); len(s) != 0 {
|
|
st.Fatalf("unexpected stream count, got=%d, want=0", len(s))
|
|
}
|
|
|
|
streamName := fmt.Sprintf("TEST-%s", c.serverTag)
|
|
si, err := js.AddStream(&nats.StreamConfig{
|
|
Name: streamName,
|
|
Subjects: []string{"foo"},
|
|
Storage: c.storage,
|
|
MaxBytes: c.createMaxBytes,
|
|
Placement: &nats.Placement{
|
|
Cluster: clusterName,
|
|
Tags: []string{c.serverTag},
|
|
},
|
|
})
|
|
if c.wantErr && err == nil {
|
|
if s := getStreams(js); len(s) != 1 {
|
|
st.Logf("unexpected stream count, got=%d, want=1, streams=%v", len(s), s)
|
|
}
|
|
|
|
cfg := si.Config
|
|
st.Fatalf("unexpected success, maxBytes=%d, cluster=%s, tags=%v",
|
|
cfg.MaxBytes, cfg.Placement.Cluster, cfg.Placement.Tags)
|
|
} else if !c.wantErr && err != nil {
|
|
if s := getStreams(js); len(s) != 0 {
|
|
st.Logf("unexpected stream count, got=%d, want=0, streams=%v", len(s), s)
|
|
}
|
|
|
|
require_NoError(st, err)
|
|
}
|
|
|
|
if err == nil {
|
|
if s := getStreams(js); len(s) != 1 {
|
|
st.Fatalf("unexpected stream count, got=%d, want=1", len(s))
|
|
}
|
|
}
|
|
// Delete regardless.
|
|
js.DeleteStream(streamName)
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestJetStreamSuperClusterMixedModeSwitchToInterestOnlyStaticConfig(t *testing.T) {
|
|
tmpl := `
|
|
listen: 127.0.0.1:-1
|
|
server_name: %s
|
|
jetstream: { domain: ngs, max_mem_store: 256MB, max_file_store: 2GB, store_dir: '%s'}
|
|
leaf: { listen: 127.0.0.1:-1 }
|
|
cluster {
|
|
name: %s
|
|
listen: 127.0.0.1:%d
|
|
routes = [%s]
|
|
}
|
|
accounts {
|
|
ONE {
|
|
users = [ { user: "one", pass: "pwd" } ]
|
|
jetstream: enabled
|
|
}
|
|
TWO { users = [ { user: "two", pass: "pwd" } ] }
|
|
$SYS { users = [ { user: "admin", pass: "s3cr3t!" } ] }
|
|
}
|
|
`
|
|
sc := createJetStreamSuperClusterWithTemplateAndModHook(t, tmpl, 5, 3,
|
|
func(serverName, clusterName, storeDir, conf string) string {
|
|
sname := serverName[strings.Index(serverName, "-")+1:]
|
|
switch sname {
|
|
case "S4", "S5":
|
|
conf = strings.ReplaceAll(conf, "jetstream: { ", "#jetstream: { ")
|
|
default:
|
|
conf = strings.ReplaceAll(conf, "leaf: { ", "#leaf: { ")
|
|
}
|
|
return conf
|
|
}, nil)
|
|
defer sc.shutdown()
|
|
|
|
// Connect our client to a non JS server
|
|
c := sc.randomCluster()
|
|
var s *Server
|
|
for _, as := range c.servers {
|
|
if !as.JetStreamEnabled() {
|
|
s = as
|
|
break
|
|
}
|
|
}
|
|
if s == nil {
|
|
t.Fatal("Did not find a non JS server!")
|
|
}
|
|
nc, js := jsClientConnect(t, s, nats.UserInfo("one", "pwd"))
|
|
defer nc.Close()
|
|
|
|
// Just create a stream and then make sure that all gateways have switched
|
|
// to interest-only mode.
|
|
si, err := js.AddStream(&nats.StreamConfig{Name: "interest", Replicas: 3})
|
|
require_NoError(t, err)
|
|
|
|
sc.waitOnStreamLeader("ONE", "interest")
|
|
|
|
check := func(accName string) {
|
|
t.Helper()
|
|
for _, c := range sc.clusters {
|
|
for _, s := range c.servers {
|
|
// Check only JS servers outbound GW connections
|
|
if !s.JetStreamEnabled() {
|
|
continue
|
|
}
|
|
opts := s.getOpts()
|
|
for _, gw := range opts.Gateway.Gateways {
|
|
if gw.Name == opts.Gateway.Name {
|
|
continue
|
|
}
|
|
checkGWInterestOnlyMode(t, s, gw.Name, accName)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
// Starting v2.9.0, all accounts should be switched to interest-only mode
|
|
check("ONE")
|
|
check("TWO")
|
|
|
|
var gwsa [16]*client
|
|
gws := gwsa[:0]
|
|
|
|
s = sc.serverByName(si.Cluster.Leader)
|
|
// Get the GW outbound connections
|
|
s.getOutboundGatewayConnections(&gws)
|
|
for _, gwc := range gws {
|
|
gwc.mu.Lock()
|
|
gwc.nc.Close()
|
|
gwc.mu.Unlock()
|
|
}
|
|
waitForOutboundGateways(t, s, 2, 5*time.Second)
|
|
check("ONE")
|
|
check("TWO")
|
|
}
|
|
|
|
func TestJetStreamSuperClusterMixedModeSwitchToInterestOnlyOperatorConfig(t *testing.T) {
|
|
kp, _ := nkeys.FromSeed(oSeed)
|
|
|
|
skp, _ := nkeys.CreateAccount()
|
|
spub, _ := skp.PublicKey()
|
|
nac := jwt.NewAccountClaims(spub)
|
|
sjwt, err := nac.Encode(kp)
|
|
require_NoError(t, err)
|
|
|
|
akp, _ := nkeys.CreateAccount()
|
|
apub, _ := akp.PublicKey()
|
|
nac = jwt.NewAccountClaims(apub)
|
|
// Set some limits to enable JS.
|
|
nac.Limits.JetStreamLimits.DiskStorage = 1024 * 1024
|
|
nac.Limits.JetStreamLimits.Streams = 10
|
|
ajwt, err := nac.Encode(kp)
|
|
require_NoError(t, err)
|
|
|
|
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
if strings.HasSuffix(r.URL.Path, spub) {
|
|
w.Write([]byte(sjwt))
|
|
} else {
|
|
w.Write([]byte(ajwt))
|
|
}
|
|
}))
|
|
defer ts.Close()
|
|
|
|
operator := fmt.Sprintf(`
|
|
operator: %s
|
|
resolver: URL("%s/ngs/v1/accounts/jwt/")
|
|
`, ojwt, ts.URL)
|
|
|
|
tmpl := `
|
|
listen: 127.0.0.1:-1
|
|
server_name: %s
|
|
jetstream: { domain: ngs, max_mem_store: 256MB, max_file_store: 2GB, store_dir: '%s'}
|
|
leaf: { listen: 127.0.0.1:-1 }
|
|
cluster {
|
|
name: %s
|
|
listen: 127.0.0.1:%d
|
|
routes = [%s]
|
|
}
|
|
` + operator
|
|
sc := createJetStreamSuperClusterWithTemplateAndModHook(t, tmpl, 5, 3,
|
|
func(serverName, clusterName, storeDir, conf string) string {
|
|
conf = strings.ReplaceAll(conf, "system_account: \"$SYS\"", fmt.Sprintf("system_account: \"%s\"", spub))
|
|
sname := serverName[strings.Index(serverName, "-")+1:]
|
|
switch sname {
|
|
case "S4", "S5":
|
|
conf = strings.ReplaceAll(conf, "jetstream: { ", "#jetstream: { ")
|
|
default:
|
|
conf = strings.ReplaceAll(conf, "leaf: { ", "#leaf: { ")
|
|
}
|
|
return conf
|
|
}, nil)
|
|
defer sc.shutdown()
|
|
|
|
// Connect our client to a non JS server
|
|
c := sc.randomCluster()
|
|
var s *Server
|
|
for _, as := range c.servers {
|
|
if !as.JetStreamEnabled() {
|
|
s = as
|
|
break
|
|
}
|
|
}
|
|
if s == nil {
|
|
t.Fatal("Did not find a non JS server!")
|
|
}
|
|
nc, js := jsClientConnect(t, s, createUserCreds(t, nil, akp))
|
|
defer nc.Close()
|
|
|
|
// Just create a stream and then make sure that all gateways have switched
|
|
// to interest-only mode.
|
|
si, err := js.AddStream(&nats.StreamConfig{Name: "interest", Replicas: 3})
|
|
require_NoError(t, err)
|
|
|
|
sc.waitOnStreamLeader(apub, "interest")
|
|
|
|
check := func(s *Server) {
|
|
opts := s.getOpts()
|
|
for _, gw := range opts.Gateway.Gateways {
|
|
if gw.Name == opts.Gateway.Name {
|
|
continue
|
|
}
|
|
checkGWInterestOnlyMode(t, s, gw.Name, apub)
|
|
}
|
|
}
|
|
s = sc.serverByName(si.Cluster.Leader)
|
|
check(s)
|
|
|
|
// Let's cause a leadership change and verify that it still works.
|
|
_, err = nc.Request(fmt.Sprintf(JSApiStreamLeaderStepDownT, "interest"), nil, time.Second)
|
|
require_NoError(t, err)
|
|
sc.waitOnStreamLeader(apub, "interest")
|
|
|
|
si, err = js.StreamInfo("interest")
|
|
require_NoError(t, err)
|
|
s = sc.serverByName(si.Cluster.Leader)
|
|
check(s)
|
|
|
|
var gwsa [16]*client
|
|
gws := gwsa[:0]
|
|
// Get the GW outbound connections
|
|
s.getOutboundGatewayConnections(&gws)
|
|
for _, gwc := range gws {
|
|
gwc.mu.Lock()
|
|
gwc.nc.Close()
|
|
gwc.mu.Unlock()
|
|
}
|
|
waitForOutboundGateways(t, s, 2, 5*time.Second)
|
|
check(s)
|
|
}
|
|
|
|
type captureGWRewriteLogger struct {
|
|
DummyLogger
|
|
ch chan string
|
|
}
|
|
|
|
func (l *captureGWRewriteLogger) Tracef(format string, args ...interface{}) {
|
|
msg := fmt.Sprintf(format, args...)
|
|
if strings.Contains(msg, "$JS.SNAPSHOT.ACK.TEST") && strings.Contains(msg, gwReplyPrefix) {
|
|
select {
|
|
case l.ch <- msg:
|
|
default:
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestJetStreamSuperClusterGWReplyRewrite(t *testing.T) {
|
|
sc := createJetStreamSuperCluster(t, 3, 2)
|
|
defer sc.shutdown()
|
|
|
|
nc, js := jsClientConnect(t, sc.serverByName("C1-S1"))
|
|
defer nc.Close()
|
|
|
|
_, err := js.AddStream(&nats.StreamConfig{
|
|
Name: "TEST",
|
|
Subjects: []string{"foo"},
|
|
Replicas: 3,
|
|
})
|
|
require_NoError(t, err)
|
|
sc.waitOnStreamLeader(globalAccountName, "TEST")
|
|
|
|
for i := 0; i < 10; i++ {
|
|
sendStreamMsg(t, nc, "foo", "msg")
|
|
}
|
|
|
|
nc2, _ := jsClientConnect(t, sc.serverByName("C2-S2"))
|
|
defer nc2.Close()
|
|
|
|
s := sc.clusters[0].streamLeader(globalAccountName, "TEST")
|
|
var gws []*client
|
|
s.getOutboundGatewayConnections(&gws)
|
|
for _, gw := range gws {
|
|
gw.mu.Lock()
|
|
gw.trace = true
|
|
gw.mu.Unlock()
|
|
}
|
|
l := &captureGWRewriteLogger{ch: make(chan string, 1)}
|
|
s.SetLogger(l, false, true)
|
|
|
|
// Send a request through the gateway
|
|
sreq := &JSApiStreamSnapshotRequest{
|
|
DeliverSubject: nats.NewInbox(),
|
|
ChunkSize: 512,
|
|
}
|
|
natsSub(t, nc2, sreq.DeliverSubject, func(m *nats.Msg) {
|
|
m.Respond(nil)
|
|
})
|
|
natsFlush(t, nc2)
|
|
req, _ := json.Marshal(sreq)
|
|
rmsg, err := nc2.Request(fmt.Sprintf(JSApiStreamSnapshotT, "TEST"), req, time.Second)
|
|
require_NoError(t, err)
|
|
var resp JSApiStreamSnapshotResponse
|
|
err = json.Unmarshal(rmsg.Data, &resp)
|
|
require_NoError(t, err)
|
|
if resp.Error != nil {
|
|
t.Fatalf("Did not get correct error response: %+v", resp.Error)
|
|
}
|
|
|
|
// Now we just want to make sure that the reply has the gateway prefix
|
|
select {
|
|
case <-l.ch:
|
|
case <-time.After(10 * time.Second):
|
|
}
|
|
}
|
|
|
|
func TestJetStreamSuperClusterGWOfflineSatus(t *testing.T) {
|
|
orgEventsHBInterval := eventsHBInterval
|
|
eventsHBInterval = 500 * time.Millisecond //time.Second
|
|
defer func() { eventsHBInterval = orgEventsHBInterval }()
|
|
|
|
tmpl := `
|
|
listen: 127.0.0.1:-1
|
|
server_name: %s
|
|
jetstream: {max_mem_store: 256MB, max_file_store: 2GB, store_dir: '%s'}
|
|
|
|
gateway {
|
|
name: "local"
|
|
listen: 127.0.0.1:-1
|
|
}
|
|
|
|
cluster {
|
|
name: %s
|
|
listen: 127.0.0.1:%d
|
|
routes = [%s]
|
|
}
|
|
|
|
accounts {
|
|
SYS {
|
|
users [{user: sys, password: pwd}]
|
|
}
|
|
ONE {
|
|
jetstream: enabled
|
|
users [{user: one, password: pwd}]
|
|
}
|
|
}
|
|
system_account=SYS
|
|
`
|
|
c := createJetStreamClusterWithTemplate(t, tmpl, "local", 3)
|
|
defer c.shutdown()
|
|
|
|
var gwURLs string
|
|
for i, s := range c.servers {
|
|
if i > 0 {
|
|
gwURLs += ","
|
|
}
|
|
gwURLs += `"nats://` + s.GatewayAddr().String() + `"`
|
|
}
|
|
|
|
tmpl2 := `
|
|
listen: 127.0.0.1:-1
|
|
server_name: %s
|
|
jetstream: {max_mem_store: 256MB, max_file_store: 2GB, store_dir: '%s'}
|
|
|
|
gateway {
|
|
name: "remote"
|
|
listen: 127.0.0.1:-1
|
|
__remote__
|
|
}
|
|
|
|
cluster {
|
|
name: %s
|
|
listen: 127.0.0.1:%d
|
|
routes = [%s]
|
|
}
|
|
|
|
accounts {
|
|
SYS {
|
|
users [{user: sys, password: pwd}]
|
|
}
|
|
ONE {
|
|
jetstream: enabled
|
|
users [{user: one, password: pwd}]
|
|
}
|
|
}
|
|
system_account=SYS
|
|
`
|
|
c2 := createJetStreamClusterAndModHook(t, tmpl2, "remote", "R", 2, 16022, false,
|
|
func(serverName, clusterName, storeDir, conf string) string {
|
|
conf = strings.Replace(conf, "__remote__", fmt.Sprintf("gateways [ { name: 'local', urls: [%s] } ]", gwURLs), 1)
|
|
return conf
|
|
})
|
|
defer c2.shutdown()
|
|
|
|
for _, s := range c.servers {
|
|
waitForOutboundGateways(t, s, 1, 2*time.Second)
|
|
}
|
|
for _, s := range c2.servers {
|
|
waitForOutboundGateways(t, s, 1, 2*time.Second)
|
|
}
|
|
c.waitOnPeerCount(5)
|
|
|
|
// Simulate going offline without sending shutdown protocol
|
|
for _, s := range c2.servers {
|
|
c := s.getOutboundGatewayConnection("local")
|
|
c.setNoReconnect()
|
|
c.mu.Lock()
|
|
c.nc.Close()
|
|
c.mu.Unlock()
|
|
}
|
|
c2.shutdown()
|
|
|
|
checkFor(t, 10*time.Second, 100*time.Millisecond, func() error {
|
|
var ok int
|
|
for _, s := range c.servers {
|
|
jsz, err := s.Jsz(nil)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
for _, r := range jsz.Meta.Replicas {
|
|
if r.Name == "RS-1" && r.Offline {
|
|
ok++
|
|
} else if r.Name == "RS-2" && r.Offline {
|
|
ok++
|
|
}
|
|
}
|
|
}
|
|
if ok != 2 {
|
|
return fmt.Errorf("RS-1 or RS-2 still marked as online")
|
|
}
|
|
return nil
|
|
})
|
|
}
|