Refactor outbound queues, remove dynamic sizing, add buffer reuse

Also try to reduce flakiness of `TestClusterQueueSubs` and `TestCrossAccountServiceResponseTypes`
2026-04-02 03:38:42 -07:00 · 2022-12-20 16:47:31 +00:00
parent c0784bc363
commit f2bffec366
5 changed files with 263 additions and 258 deletions
--- a/server/client_test.go
+++ b/server/client_test.go
@@ -31,7 +31,6 @@ import (
 	"testing"
 	"time"

-	"crypto/rand"
 	"crypto/tls"

 	"github.com/nats-io/jwt/v2"
@@ -1484,7 +1483,11 @@ func TestWildcardCharsInLiteralSubjectWorks(t *testing.T) {
 	}
 }

-func TestDynamicBuffers(t *testing.T) {
+// This test ensures that coalescing into the fixed-size output
+// queues works as expected. When bytes are queued up, they should
+// not overflow a buffer until the capacity is exceeded, at which
+// point a new buffer should be added.
+func TestClientOutboundQueueCoalesce(t *testing.T) {
 	opts := DefaultOptions()
 	s := RunServer(opts)
 	defer s.Shutdown()
@@ -1495,139 +1498,114 @@ func TestDynamicBuffers(t *testing.T) {
 	}
 	defer nc.Close()

-	// Grab the client from server.
-	s.mu.Lock()
-	lc := len(s.clients)
-	c := s.clients[s.gcid]
-	s.mu.Unlock()
-
-	if lc != 1 {
-		t.Fatalf("Expected only 1 client but got %d\n", lc)
+	clients := s.GlobalAccount().getClients()
+	if len(clients) != 1 {
+		t.Fatal("Expecting a client to exist")
 	}
-	if c == nil {
-		t.Fatal("Expected to retrieve client\n")
+	client := clients[0]
+	client.mu.Lock()
+	defer client.mu.Unlock()
+
+	// First up, queue something small into the queue.
+	client.queueOutbound([]byte{1, 2, 3, 4, 5})
+
+	if len(client.out.nb) != 1 {
+		t.Fatal("Expecting a single queued buffer")
+	}
+	if l := len(client.out.nb[0]); l != 5 {
+		t.Fatalf("Expecting only 5 bytes in the first queued buffer, found %d instead", l)
 	}

-	// Create some helper functions and data structures.
-	done := make(chan bool)            // Used to stop recording.
-	type maxv struct{ rsz, wsz int32 } // Used to hold max values.
-	results := make(chan maxv)
+	// Then queue up a few more bytes, but not enough
+	// to overflow into the next buffer.
+	client.queueOutbound([]byte{6, 7, 8, 9, 10})

-	// stopRecording stops the recording ticker and releases go routine.
-	stopRecording := func() maxv {
-		done <- true
-		return <-results
+	if len(client.out.nb) != 1 {
+		t.Fatal("Expecting a single queued buffer")
 	}
-	// max just grabs max values.
-	max := func(a, b int32) int32 {
-		if a > b {
-			return a
+	if l := len(client.out.nb[0]); l != 10 {
+		t.Fatalf("Expecting 10 bytes in the first queued buffer, found %d instead", l)
+	}
+
+	// Finally, queue up something that is guaranteed
+	// to overflow.
+	b := nbPoolSmall.Get().(*[nbPoolSizeSmall]byte)[:]
+	b = b[:cap(b)]
+	client.queueOutbound(b)
+	if len(client.out.nb) != 2 {
+		t.Fatal("Expecting buffer to have overflowed")
+	}
+	if l := len(client.out.nb[0]); l != cap(b) {
+		t.Fatalf("Expecting %d bytes in the first queued buffer, found %d instead", cap(b), l)
+	}
+	if l := len(client.out.nb[1]); l != 10 {
+		t.Fatalf("Expecting 10 bytes in the second queued buffer, found %d instead", l)
+	}
+}
+
+// This test ensures that outbound queues don't cause a run on
+// memory when sending something to lots of clients.
+func TestClientOutboundQueueMemory(t *testing.T) {
+	opts := DefaultOptions()
+	s := RunServer(opts)
+	defer s.Shutdown()
+
+	var before runtime.MemStats
+	var after runtime.MemStats
+
+	var err error
+	clients := make([]*nats.Conn, 50000)
+	wait := &sync.WaitGroup{}
+	wait.Add(len(clients))
+
+	for i := 0; i < len(clients); i++ {
+		clients[i], err = nats.Connect(fmt.Sprintf("nats://%s:%d", opts.Host, opts.Port), nats.InProcessServer(s))
+		if err != nil {
+			t.Fatalf("Error on connect: %v", err)
 		}
-		return b
-	}
-	// Returns current value of the buffer sizes.
-	getBufferSizes := func() (int32, int32) {
-		c.mu.Lock()
-		defer c.mu.Unlock()
-		return c.in.rsz, c.out.sz
-	}
-	// Record the max values seen.
-	recordMaxBufferSizes := func() {
-		ticker := time.NewTicker(10 * time.Microsecond)
-		defer ticker.Stop()
+		defer clients[i].Close()

-		var m maxv
+		clients[i].Subscribe("test", func(m *nats.Msg) {
+			wait.Done()
+		})
+	}

-		recordMax := func() {
-			rsz, wsz := getBufferSizes()
-			m.rsz = max(m.rsz, rsz)
-			m.wsz = max(m.wsz, wsz)
+	runtime.GC()
+	runtime.ReadMemStats(&before)
+
+	nc, err := nats.Connect(fmt.Sprintf("nats://%s:%d", opts.Host, opts.Port), nats.InProcessServer(s))
+	if err != nil {
+		t.Fatalf("Error on connect: %v", err)
+	}
+	defer nc.Close()
+
+	var m [48000]byte
+	if err = nc.Publish("test", m[:]); err != nil {
+		t.Fatal(err)
+	}
+
+	wait.Wait()
+
+	runtime.GC()
+	runtime.ReadMemStats(&after)
+
+	hb, ha := float64(before.HeapAlloc), float64(after.HeapAlloc)
+	ms := float64(len(m))
+	diff := float64(ha) - float64(hb)
+	inc := (diff / float64(hb)) * 100
+
+	fmt.Printf("Message size:       %.1fKB\n", ms/1024)
+	fmt.Printf("Subscribed clients: %d\n", len(clients))
+	fmt.Printf("Heap allocs before: %.1fMB\n", hb/1024/1024)
+	fmt.Printf("Heap allocs after:  %.1fMB\n", ha/1024/1024)
+	fmt.Printf("Heap allocs delta:  %.1f%%\n", inc)
+
+	// TODO: What threshold makes sense here for a failure?
+	/*
+		if inc > 10 {
+			t.Fatalf("memory increase was %.1f%% (should be <= 10%%)", inc)
 		}
-
-		for {
-			select {
-			case <-done:
-				recordMax()
-				results <- m
-				return
-			case <-ticker.C:
-				recordMax()
-			}
-		}
-	}
-	// Check that the current value is what we expected.
-	checkBuffers := func(ers, ews int32) {
-		t.Helper()
-		rsz, wsz := getBufferSizes()
-		if rsz != ers {
-			t.Fatalf("Expected read buffer of %d, but got %d\n", ers, rsz)
-		}
-		if wsz != ews {
-			t.Fatalf("Expected write buffer of %d, but got %d\n", ews, wsz)
-		}
-	}
-
-	// Check that the max was as expected.
-	checkResults := func(m maxv, rsz, wsz int32) {
-		t.Helper()
-		if rsz != m.rsz {
-			t.Fatalf("Expected read buffer of %d, but got %d\n", rsz, m.rsz)
-		}
-		if wsz != m.wsz {
-			t.Fatalf("Expected write buffer of %d, but got %d\n", wsz, m.wsz)
-		}
-	}
-
-	// Here is where testing begins..
-
-	// Should be at or below the startBufSize for both.
-	rsz, wsz := getBufferSizes()
-	if rsz > startBufSize {
-		t.Fatalf("Expected read buffer of <= %d, but got %d\n", startBufSize, rsz)
-	}
-	if wsz > startBufSize {
-		t.Fatalf("Expected write buffer of <= %d, but got %d\n", startBufSize, wsz)
-	}
-
-	// Send some data.
-	data := make([]byte, 2048)
-	rand.Read(data)
-
-	go recordMaxBufferSizes()
-	for i := 0; i < 200; i++ {
-		nc.Publish("foo", data)
-	}
-	nc.Flush()
-	m := stopRecording()
-
-	if m.rsz != maxBufSize && m.rsz != maxBufSize/2 {
-		t.Fatalf("Expected read buffer of %d or %d, but got %d\n", maxBufSize, maxBufSize/2, m.rsz)
-	}
-	if m.wsz > startBufSize {
-		t.Fatalf("Expected write buffer of <= %d, but got %d\n", startBufSize, m.wsz)
-	}
-
-	// Create Subscription to test outbound buffer from server.
-	nc.Subscribe("foo", func(m *nats.Msg) {
-		// Just eat it..
-	})
-	go recordMaxBufferSizes()
-
-	for i := 0; i < 200; i++ {
-		nc.Publish("foo", data)
-	}
-	nc.Flush()
-
-	m = stopRecording()
-	checkResults(m, maxBufSize, maxBufSize)
-
-	// Now test that we shrink correctly.
-
-	// Should go to minimum for both..
-	for i := 0; i < 20; i++ {
-		nc.Flush()
-	}
-	checkBuffers(minBufSize, minBufSize)
+	*/
 }

 func TestClientTraceRace(t *testing.T) {
@@ -2246,7 +2224,6 @@ func TestFlushOutboundNoSliceReuseIfPartial(t *testing.T) {
 		expected.Write(buf)
 		c.mu.Lock()
 		c.queueOutbound(buf)
-		c.out.sz = 10
 		c.flushOutbound()
 		fakeConn.partial = false
 		c.mu.Unlock()