From 03aa44dc3dcfa24e01db8cedb5a72179b56d6187 Mon Sep 17 00:00:00 2001 From: Marco Primi Date: Tue, 26 Sep 2023 10:18:43 -0700 Subject: [PATCH 1/5] Improve setup of JS Consume benchmark Handle error condition during stream setup that was resulting in failed runs. --- server/jetstream_benchmark_test.go | 38 ++++++++++++++++-------------- 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/server/jetstream_benchmark_test.go b/server/jetstream_benchmark_test.go index 466b98dc..ec1bd76f 100644 --- a/server/jetstream_benchmark_test.go +++ b/server/jetstream_benchmark_test.go @@ -29,11 +29,12 @@ import ( func BenchmarkJetStreamConsume(b *testing.B) { const ( - verbose = false - streamName = "S" - subject = "s" - seed = 12345 - publishTimeout = 30 * time.Second + verbose = false + streamName = "S" + subject = "s" + seed = 12345 + publishTimeout = 30 * time.Second + PublishBatchSize = 10000 ) runSyncPushConsumer := func(b *testing.B, js nats.JetStreamContext, streamName, subject string) (int, int, int) { @@ -347,24 +348,25 @@ func BenchmarkJetStreamConsume(b *testing.B) { rng := rand.New(rand.NewSource(int64(seed))) message := make([]byte, bc.messageSize) - publishedCount := 0 - for publishedCount < b.N { + + // Publish b.N messages to the stream (in batches) + for i := 1; i <= b.N; i++ { rng.Read(message) _, err := js.PublishAsync(subject, message) if err != nil { - continue - } else { - publishedCount++ + b.Fatalf("Failed to publish: %s", err) } - } - - select { - case <-js.PublishAsyncComplete(): - if verbose { - b.Logf("Published %d messages", b.N) + // Limit outstanding published messages to PublishBatchSize + if i%PublishBatchSize == 0 || i == b.N { + select { + case <-js.PublishAsyncComplete(): + if verbose { + b.Logf("Published %d/%d messages", i, b.N) + } + case <-time.After(publishTimeout): + b.Fatalf("Publish timed out") + } } - case <-time.After(publishTimeout): - b.Fatalf("Publish timed out") } // Discard time spent during setup From e1080966019dfa0edb0227c9b162e3ded59fd947 Mon Sep 17 00:00:00 2001 From: Marco Primi Date: Tue, 26 Sep 2023 10:20:02 -0700 Subject: [PATCH 2/5] Improve JS asynchronous publish benchmark Simplify logic and make sure no more than `asyncWindow` messages are ever in-flight --- server/jetstream_benchmark_test.go | 86 ++++++++++++++---------------- 1 file changed, 40 insertions(+), 46 deletions(-) diff --git a/server/jetstream_benchmark_test.go b/server/jetstream_benchmark_test.go index ec1bd76f..9fee6011 100644 --- a/server/jetstream_benchmark_test.go +++ b/server/jetstream_benchmark_test.go @@ -456,61 +456,55 @@ func BenchmarkJetStreamPublish(b *testing.B) { const publishCompleteMaxWait = 30 * time.Second rng := rand.New(rand.NewSource(int64(seed))) message := make([]byte, messageSize) - pending := make([]nats.PubAckFuture, 0, asyncWindow) + published, errors := 0, 0 + b.SetBytes(int64(messageSize)) b.ResetTimer() - for i := 1; i <= b.N; i++ { - rng.Read(message) // TODO may skip this? - subject := subjects[rng.Intn(len(subjects))] - pubAckFuture, err := js.PublishAsync(subject, message) - if err != nil { - errors++ - continue - } - pending = append(pending, pubAckFuture) + for published < b.N { - // Regularly trim the list of pending - if i%asyncWindow == 0 { - newPending := make([]nats.PubAckFuture, 0, asyncWindow) - for _, pubAckFuture := range pending { - select { - case <-pubAckFuture.Ok(): - published++ - b.SetBytes(int64(messageSize)) - case <-pubAckFuture.Err(): - errors++ - default: - // This pubAck is still pending, keep it - newPending = append(newPending, pubAckFuture) - } + // Normally publish a full batch (of size `asyncWindow`) + publishBatchSize := asyncWindow + // Unless fewer are left to complete the benchmark + if b.N-published < asyncWindow { + publishBatchSize = b.N - published + } + + pending := make([]nats.PubAckFuture, 0, publishBatchSize) + + for i := 0; i < publishBatchSize; i++ { + rng.Read(message) // TODO may skip this? + subject := subjects[rng.Intn(len(subjects))] + pubAckFuture, err := js.PublishAsync(subject, message) + if err != nil { + errors++ + continue } - pending = newPending + pending = append(pending, pubAckFuture) } - if verbose && i%1000 == 0 { - b.Logf("Published %d/%d, %d errors", i, b.N, errors) - } - } - - // All published, wait for completed - select { - case <-js.PublishAsyncComplete(): - case <-time.After(publishCompleteMaxWait): - b.Fatalf("Publish timed out") - } - - // Clear whatever is left pending - for _, pubAckFuture := range pending { + // All in this batch published, wait for completed select { - case <-pubAckFuture.Ok(): - published++ - b.SetBytes(int64(messageSize)) - case <-pubAckFuture.Err(): - errors++ - default: - b.Fatalf("PubAck is still pending after publish completed") + case <-js.PublishAsyncComplete(): + case <-time.After(publishCompleteMaxWait): + b.Fatalf("Publish timed out") + } + + // Verify one by one if they were published successfully + for _, pubAckFuture := range pending { + select { + case <-pubAckFuture.Ok(): + published++ + case <-pubAckFuture.Err(): + errors++ + default: + b.Fatalf("PubAck is still pending after publish completed") + } + } + + if verbose { + b.Logf("Published %d/%d", published, b.N) } } From c5698a9435b688e76dc4dd1301f23ab059cc624b Mon Sep 17 00:00:00 2001 From: Marco Primi Date: Tue, 26 Sep 2023 10:44:11 -0700 Subject: [PATCH 3/5] Cleanup unnecessary calls to setBytes in JS benchmarks --- server/jetstream_benchmark_test.go | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/server/jetstream_benchmark_test.go b/server/jetstream_benchmark_test.go index 9fee6011..75d73d58 100644 --- a/server/jetstream_benchmark_test.go +++ b/server/jetstream_benchmark_test.go @@ -82,7 +82,6 @@ func BenchmarkJetStreamConsume(b *testing.B) { uniqueConsumed++ bitset.set(index, true) - b.SetBytes(int64(len(msg.Data))) if verbose && uniqueConsumed%1000 == 0 { b.Logf("Consumed: %d/%d", bitset.count(), b.N) @@ -128,7 +127,6 @@ func BenchmarkJetStreamConsume(b *testing.B) { uniqueConsumed++ bitset.set(index, true) - b.SetBytes(int64(len(msg.Data))) if uniqueConsumed == b.N { msg.Sub.Unsubscribe() @@ -224,7 +222,6 @@ func BenchmarkJetStreamConsume(b *testing.B) { uniqueConsumed++ bitset.set(index, true) - b.SetBytes(int64(len(msg.Data))) if uniqueConsumed == b.N { msg.Sub.Unsubscribe() @@ -369,6 +366,9 @@ func BenchmarkJetStreamConsume(b *testing.B) { } } + // Set size of each operation, for throughput calculation + b.SetBytes(int64(bc.messageSize)) + // Discard time spent during setup // Consumer may reset again further in b.ResetTimer() @@ -439,7 +439,6 @@ func BenchmarkJetStreamPublish(b *testing.B) { errors++ } else { published++ - b.SetBytes(int64(messageSize)) } if verbose && i%1000 == 0 { @@ -459,7 +458,6 @@ func BenchmarkJetStreamPublish(b *testing.B) { published, errors := 0, 0 - b.SetBytes(int64(messageSize)) b.ResetTimer() for published < b.N { @@ -651,6 +649,8 @@ func BenchmarkJetStreamPublish(b *testing.B) { b.Logf("Running %v publisher with message size: %dB", pc.pType, bc.messageSize) } + b.SetBytes(int64(bc.messageSize)) + // Benchmark starts here b.ResetTimer() @@ -873,7 +873,7 @@ func BenchmarkJetStreamInterestStreamWithLimit(b *testing.B) { b.StopTimer() b.ResetTimer() - // Set per-iteration bytes to calculate throughput (a.k.a. speed) + // Set size of each operation, for throughput calculation b.SetBytes(messageSize) // Print benchmark parameters @@ -931,8 +931,11 @@ func BenchmarkJetStreamInterestStreamWithLimit(b *testing.B) { // Wait for all publishers to be ready pubCtx.readyWg.Wait() + // Set size of each operation, for throughput calculation + b.SetBytes(messageSize) + // Benchmark starts here - b.StartTimer() + b.ResetTimer() // Unblock the publishers pubCtx.lock.Unlock() @@ -978,14 +981,12 @@ func BenchmarkJetStreamKV(b *testing.B) { for i := 1; i <= b.N; i++ { key := keys[rng.Intn(len(keys))] - kve, err := kv.Get(key) + _, err := kv.Get(key) if err != nil { errors++ continue } - b.SetBytes(int64(len(kve.Value()))) - if verbose && i%1000 == 0 { b.Logf("Completed %d/%d Get ops", i, b.N) } @@ -1011,8 +1012,6 @@ func BenchmarkJetStreamKV(b *testing.B) { continue } - b.SetBytes(int64(valueSize)) - if verbose && i%1000 == 0 { b.Logf("Completed %d/%d Put ops", i, b.N) } @@ -1045,8 +1044,6 @@ func BenchmarkJetStreamKV(b *testing.B) { continue } - b.SetBytes(int64(valueSize)) - if verbose && i%1000 == 0 { b.Logf("Completed %d/%d Update ops", i, b.N) } @@ -1175,6 +1172,9 @@ func BenchmarkJetStreamKV(b *testing.B) { b.Fatalf("Error binding to KV: %v", err) } + // Set size of each operation, for throughput calculation + b.SetBytes(int64(bc.valueSize)) + // Discard time spent during setup // May reset again further in b.ResetTimer() From be106d1ee5cc04588ef2e7636c01436d8f21ecbc Mon Sep 17 00:00:00 2001 From: Marco Primi Date: Tue, 26 Sep 2023 11:59:16 -0700 Subject: [PATCH 4/5] Remove artificial limit on minimum number of operations --- server/jetstream_benchmark_test.go | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/server/jetstream_benchmark_test.go b/server/jetstream_benchmark_test.go index 75d73d58..da42b607 100644 --- a/server/jetstream_benchmark_test.go +++ b/server/jetstream_benchmark_test.go @@ -563,11 +563,6 @@ func BenchmarkJetStreamPublish(b *testing.B) { b.Run( name, func(b *testing.B) { - // Skip short runs, benchmark gets re-executed with a larger N - if b.N < bc.minMessages { - b.ResetTimer() - return - } subjects := make([]string, bc.numSubjects) for i := 0; i < bc.numSubjects; i++ { @@ -869,12 +864,6 @@ func BenchmarkJetStreamInterestStreamWithLimit(b *testing.B) { b.Run( limitDescription, func(b *testing.B) { - // Stop timer during setup - b.StopTimer() - b.ResetTimer() - - // Set size of each operation, for throughput calculation - b.SetBytes(messageSize) // Print benchmark parameters if verbose { @@ -970,7 +959,6 @@ func BenchmarkJetStreamKV(b *testing.B) { kvName = "BUCKET" keyPrefix = "K_" seed = 12345 - minOps = 1_000 ) runKVGet := func(b *testing.B, kv nats.KeyValue, keys []string) int { @@ -1097,11 +1085,6 @@ func BenchmarkJetStreamKV(b *testing.B) { b.Run( wName, func(b *testing.B) { - // Skip short runs, benchmark gets re-executed with a larger N - if b.N < minOps { - b.ResetTimer() - return - } if verbose { b.Logf("Running %s workload %s with %d messages", wName, bName, b.N) From d31236cea2570573905c1d8fc2cab35c3727c77a Mon Sep 17 00:00:00 2001 From: Marco Primi Date: Tue, 26 Sep 2023 14:54:06 -0700 Subject: [PATCH 5/5] Refactor cluster creation for JS benchmarks --- server/jetstream_benchmark_test.go | 207 +++++++++++------------------ 1 file changed, 74 insertions(+), 133 deletions(-) diff --git a/server/jetstream_benchmark_test.go b/server/jetstream_benchmark_test.go index da42b607..5d6ec571 100644 --- a/server/jetstream_benchmark_test.go +++ b/server/jetstream_benchmark_test.go @@ -305,23 +305,9 @@ func BenchmarkJetStreamConsume(b *testing.B) { if verbose { b.Logf("Setting up %d nodes", bc.clusterSize) } - var ( - connectURL string - cl *cluster - ) - if bc.clusterSize == 1 { - s := RunBasicJetStreamServer(b) - defer s.Shutdown() - connectURL = s.ClientURL() - } else { - cl = createJetStreamClusterExplicit(b, "BENCH_PUB", bc.clusterSize) - defer cl.shutdown() - cl.waitOnClusterReadyWithNumPeers(bc.clusterSize) - cl.waitOnLeader() - connectURL = cl.leader().ClientURL() - } - nc, js := jsClientConnectURL(b, connectURL) + cl, _, shutdown, nc, js := startJSClusterAndConnect(b, bc.clusterSize) + defer shutdown() defer nc.Close() if verbose { @@ -336,9 +322,9 @@ func BenchmarkJetStreamConsume(b *testing.B) { b.Fatalf("Error creating stream: %v", err) } - // cluster_size > 1, connect to stream leader - if cl != nil { - connectURL = cl.streamLeader("$G", streamName).ClientURL() + // If replicated resource, connect to stream leader for lower variability + if bc.replicas > 1 { + connectURL := cl.streamLeader("$G", streamName).ClientURL() nc.Close() _, js = jsClientConnectURL(b, connectURL) } @@ -576,27 +562,9 @@ func BenchmarkJetStreamPublish(b *testing.B) { if verbose { b.Logf("Setting up %d nodes", bc.clusterSize) } - var ( - connectURL string - cl *cluster - ) - if bc.clusterSize == 1 { - s := RunBasicJetStreamServer(b) - defer s.Shutdown() - connectURL = s.ClientURL() - } else { - cl = createJetStreamClusterExplicit(b, "BENCH_PUB", bc.clusterSize) - defer cl.shutdown() - cl.waitOnClusterReadyWithNumPeers(bc.clusterSize) - cl.waitOnLeader() - connectURL = cl.leader().ClientURL() - } - - nc, err := nats.Connect(connectURL) - if err != nil { - b.Fatalf("Failed to create client: %v", err) - } + cl, _, shutdown, nc, _ := startJSClusterAndConnect(b, bc.clusterSize) + defer shutdown() defer nc.Close() jsOpts := []nats.JSOpt{ @@ -624,9 +592,9 @@ func BenchmarkJetStreamPublish(b *testing.B) { b.Fatalf("Error creating stream: %v", err) } - // cluster_size > 1, connect to stream leader - if cl != nil { - connectURL = cl.streamLeader("$G", streamName).ClientURL() + // If replicated resource, connect to stream leader for lower variability + if bc.replicas > 1 { + connectURL := cl.streamLeader("$G", streamName).ClientURL() nc.Close() nc, err = nats.Connect(connectURL) if err != nil { @@ -637,7 +605,6 @@ func BenchmarkJetStreamPublish(b *testing.B) { if err != nil { b.Fatalf("Unexpected error getting JetStream context for stream leader: %v", err) } - } if verbose { @@ -738,47 +705,6 @@ func BenchmarkJetStreamInterestStreamWithLimit(b *testing.B) { }, } - // Helper: Stand up in-process single node or cluster - setupCluster := func(b *testing.B, clusterSize int) (string, func()) { - var connectURL string - var shutdownFunc func() - - if clusterSize == 1 { - s := RunBasicJetStreamServer(b) - shutdownFunc = s.Shutdown - connectURL = s.ClientURL() - } else { - cl := createJetStreamClusterExplicit(b, "BENCH_PUB", clusterSize) - shutdownFunc = cl.shutdown - cl.waitOnClusterReadyWithNumPeers(clusterSize) - cl.waitOnLeader() - connectURL = cl.leader().ClientURL() - } - - return connectURL, shutdownFunc - } - - // Helper: Create the stream - setupStream := func(b *testing.B, connectURL string, streamConfig *nats.StreamConfig) { - // Connect - nc, err := nats.Connect(connectURL) - if err != nil { - b.Fatalf("Failed to create client: %v", err) - } - defer nc.Close() - - jsOpts := []nats.JSOpt{} - - js, err := nc.JetStream(jsOpts...) - if err != nil { - b.Fatalf("Unexpected error getting JetStream context: %v", err) - } - - if _, err := js.AddStream(streamConfig); err != nil { - b.Fatalf("Error creating stream: %v", err) - } - } - // Context shared by publishers routines type PublishersContext = struct { readyWg sync.WaitGroup @@ -877,8 +803,9 @@ func BenchmarkJetStreamInterestStreamWithLimit(b *testing.B) { } // Setup server or cluster - connectURL, shutdownFunc := setupCluster(b, benchmarkCase.clusterSize) - defer shutdownFunc() + cl, ls, shutdown, nc, js := startJSClusterAndConnect(b, benchmarkCase.clusterSize) + defer shutdown() + defer nc.Close() // Common stream configuration streamConfig := &nats.StreamConfig{ @@ -891,8 +818,11 @@ func BenchmarkJetStreamInterestStreamWithLimit(b *testing.B) { } // Configure stream limit limitConfigFunc(streamConfig) + // Create stream - setupStream(b, connectURL, streamConfig) + if _, err := js.AddStream(streamConfig); err != nil { + b.Fatalf("Error creating stream: %v", err) + } // Set up publishers shared context var pubCtx PublishersContext @@ -903,6 +833,12 @@ func BenchmarkJetStreamInterestStreamWithLimit(b *testing.B) { pubCtx.lock.Lock() pubCtx.messagesLeft = b.N + connectURL := ls.ClientURL() + // If replicated resource, connect to stream leader for lower variability + if benchmarkCase.replicas > 1 { + connectURL = cl.streamLeader("$G", "S").ClientURL() + } + // Spawn publishers routines, each with its own connection and JS context for i := 0; i < numPublishers; i++ { nc, err := nats.Connect(connectURL) @@ -1093,24 +1029,6 @@ func BenchmarkJetStreamKV(b *testing.B) { if verbose { b.Logf("Setting up %d nodes", bc.clusterSize) } - var ( - connectURL string - cl *cluster - ) - if bc.clusterSize == 1 { - s := RunBasicJetStreamServer(b) - defer s.Shutdown() - connectURL = s.ClientURL() - } else { - cl = createJetStreamClusterExplicit(b, "BENCH_KV", bc.clusterSize) - defer cl.shutdown() - cl.waitOnClusterReadyWithNumPeers(bc.clusterSize) - cl.waitOnLeader() - connectURL = cl.leader().ClientURL() - } - - nc, js := jsClientConnectURL(b, connectURL) - defer nc.Close() // Pre-generate all keys keys := make([]string, 0, bc.numKeys) @@ -1119,6 +1037,11 @@ func BenchmarkJetStreamKV(b *testing.B) { keys = append(keys, key) } + // Setup server or cluster + cl, _, shutdown, nc, js := startJSClusterAndConnect(b, bc.clusterSize) + defer shutdown() + defer nc.Close() + // Create bucket if verbose { b.Logf("Creating KV %s with R=%d", kvName, bc.replicas) @@ -1143,13 +1066,14 @@ func BenchmarkJetStreamKV(b *testing.B) { } } - // if cluster_size > 1, connect to stream leader of bucket - if cl != nil { + // If replicated resource, connect to stream leader for lower variability + if bc.replicas > 1 { nc.Close() - - connectURL = cl.streamLeader("$G", fmt.Sprintf("KV_%s", kvName)).ClientURL() - _, js = jsClientConnectURL(b, connectURL) + connectURL := cl.streamLeader("$G", fmt.Sprintf("KV_%s", kvName)).ClientURL() + nc, js = jsClientConnectURL(b, connectURL) + defer nc.Close() } + kv, err = js.KeyValue(kv.Bucket()) if err != nil { b.Fatalf("Error binding to KV: %v", err) @@ -1264,7 +1188,6 @@ func BenchmarkJetStreamObjStore(b *testing.B) { minObjSz int maxObjSz int }{ - // TODO remove duplicates and fix comments {nats.MemoryStorage, 100, 1024, 102400}, // mem storage, 100 objects sized (1KB-100KB) {nats.MemoryStorage, 100, 102400, 1048576}, // mem storage, 100 objects sized (100KB-1MB) {nats.MemoryStorage, 1000, 10240, 102400}, // mem storage, 1k objects of various size (10KB - 100KB) @@ -1273,7 +1196,6 @@ func BenchmarkJetStreamObjStore(b *testing.B) { {nats.FileStorage, 100, 102400, 1048576}, // file storage, 100 objects sized (100KB-1MB) {nats.FileStorage, 100, 1048576, 10485760}, // file storage, 100 objects sized (1MB-10MB) {nats.FileStorage, 10, 10485760, 104857600}, // file storage, 10 objects sized (10MB-100MB) - } var ( @@ -1306,23 +1228,10 @@ func BenchmarkJetStreamObjStore(b *testing.B) { if verbose { b.Logf("Setting up %d nodes", replicas) } - var ( - connectURL string - cl *cluster - ) - if clusterSize == 1 { - s := RunBasicJetStreamServer(b) - defer s.Shutdown() - connectURL = s.ClientURL() - } else { - cl = createJetStreamClusterExplicit(b, "BENCH_OBJ_STORE", clusterSize) - defer cl.shutdown() - cl.waitOnClusterReadyWithNumPeers(replicas) - cl.waitOnLeader() - // connect to leader and not replicas - connectURL = cl.leader().ClientURL() - } - nc, js := jsClientConnectURL(b, connectURL) + + // Setup server or cluster + cl, _, shutdown, nc, js := startJSClusterAndConnect(b, clusterSize) + defer shutdown() defer nc.Close() // Initialize object store @@ -1339,10 +1248,10 @@ func BenchmarkJetStreamObjStore(b *testing.B) { b.Fatalf("Error creating ObjectStore: %v", err) } - // if cluster_size > 1, connect to stream leader - if cl != nil { + // If replicated resource, connect to stream leader for lower variability + if clusterSize > 1 { nc.Close() - connectURL = cl.streamLeader("$G", fmt.Sprintf("OBJ_%s", objStoreName)).ClientURL() + connectURL := cl.streamLeader("$G", fmt.Sprintf("OBJ_%s", objStoreName)).ClientURL() nc, js := jsClientConnectURL(b, connectURL) defer nc.Close() objStore, err = js.ObjectStore(objStoreName) @@ -1380,9 +1289,41 @@ func BenchmarkJetStreamObjStore(b *testing.B) { } }, ) - } }, ) } } + +// Helper function to stand up a JS-enabled single server or cluster +func startJSClusterAndConnect(b *testing.B, clusterSize int) (c *cluster, s *Server, shutdown func(), nc *nats.Conn, js nats.JetStreamContext) { + b.Helper() + var err error + + if clusterSize == 1 { + s = RunBasicJetStreamServer(b) + shutdown = func() { + s.Shutdown() + } + } else { + c = createJetStreamClusterExplicit(b, "BENCH_PUB", clusterSize) + c.waitOnClusterReadyWithNumPeers(clusterSize) + c.waitOnLeader() + s = c.leader() + shutdown = func() { + c.shutdown() + } + } + + nc, err = nats.Connect(s.ClientURL()) + if err != nil { + b.Fatalf("failed to connect: %s", err) + } + + js, err = nc.JetStream() + if err != nil { + b.Fatalf("failed to init jetstream: %s", err) + } + + return c, s, shutdown, nc, js +}