From 0873b46f67b089ed04ae15679add9da20af23a2d Mon Sep 17 00:00:00 2001 From: Ivan Kozlovic Date: Fri, 12 Jul 2019 18:46:11 -0600 Subject: [PATCH] [FIXED] LeafNode urls may be missing in INFO sent to LN connections When a cluster of servers are having routes to each other, there is a chance that the list of leafnode URLs maintained on each server is not complete. This would result in LN servers connecting to this cluster to not get the full list of possible URLs the server could reconnect to. Also fixed a DATA RACE that appeared when running the updated TestLeafNodeInfoURLs test. Fixed the race and added specific test that easily demonstrated the race: TestLeafNodeNoRaceGeneratingNonce Signed-off-by: Ivan Kozlovic --- server/leafnode.go | 7 +++- server/route.go | 5 +++ test/leafnode_test.go | 97 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 108 insertions(+), 1 deletion(-) diff --git a/server/leafnode.go b/server/leafnode.go index 1064dff2..e032852b 100644 --- a/server/leafnode.go +++ b/server/leafnode.go @@ -507,9 +507,14 @@ func (s *Server) createLeafNode(conn net.Conn, remote *leafNodeCfg) *client { c.leaf.remote = remote } + var nonce [nonceLen]byte + // Grab server variables s.mu.Lock() info := s.copyLeafNodeInfo() + if !solicited { + s.generateNonce(nonce[:]) + } s.mu.Unlock() // Grab lock @@ -606,7 +611,7 @@ func (s *Server) createLeafNode(conn net.Conn, remote *leafNodeCfg) *client { // Send our info to the other side. // Remember the nonce we sent here for signatures, etc. c.nonce = make([]byte, nonceLen) - s.generateNonce(c.nonce) + copy(c.nonce, nonce[:]) info.Nonce = string(c.nonce) info.CID = c.cid b, _ := json.Marshal(info) diff --git a/server/route.go b/server/route.go index c9a3d058..512e5629 100644 --- a/server/route.go +++ b/server/route.go @@ -1280,6 +1280,11 @@ func (s *Server) addRoute(c *client, info *Info) (bool, bool) { rs := *c.route r = &rs } + // Since this duplicate route is going to be removed, make sure we clear + // c.route.leafnodeURL, otherwise, when processing the disconnect, this + // would cause the leafnode URL for that remote server to be removed + // from our list. + c.route.leafnodeURL = _EMPTY_ c.mu.Unlock() remote.mu.Lock() diff --git a/test/leafnode_test.go b/test/leafnode_test.go index 4685076f..e491e733 100644 --- a/test/leafnode_test.go +++ b/test/leafnode_test.go @@ -1584,6 +1584,59 @@ func TestLeafNodeInfoURLs(t *testing.T) { t.Fatalf("Expected URL to be %s, got %s", s1LNURL, url) } lc.Close() + + s1.Shutdown() + + // Now we need a configuration where both s1 and s2 have a route + // to each other, so we need explicit configuration. We are trying + // to get S1->S2 and S2->S1 so one of the route is dropped. This + // should not affect the number of URLs reported in INFO. + + opts.Cluster.Port = 5223 + opts.Routes = server.RoutesFromStr(fmt.Sprintf("nats://%s:5224", opts2.Host)) + s1, _ = server.NewServer(opts) + defer s1.Shutdown() + + opts2.Cluster.Port = 5224 + opts2.Routes = server.RoutesFromStr(fmt.Sprintf("nats://%s:5223", opts.Host)) + s2, _ = server.NewServer(opts2) + defer s2.Shutdown() + + // Start this way to increase chance of having the two connect + // to each other at the same time. This will cause one of the + // route to be dropped. + wg := &sync.WaitGroup{} + wg.Add(2) + go func() { + s1.Start() + wg.Done() + }() + go func() { + s2.Start() + wg.Done() + }() + + checkClusterFormed(t, s1, s2) + + lc = createLeafConn(t, opts.LeafNode.Host, opts.LeafNode.Port) + defer lc.Close() + info = checkInfoMsg(t, lc) + if sz := len(info.LeafNodeURLs); sz != 2 { + t.Fatalf("Expected LeafNodeURLs array to be size 2, got %v", sz) + } + ok[0], ok[1] = 0, 0 + for _, url := range info.LeafNodeURLs { + if url == s1LNURL { + ok[0]++ + } else if url == s2LNURL { + ok[1]++ + } + } + for i, res := range ok { + if res != 1 { + t.Fatalf("URL from server %v was found %v times", i+1, res) + } + } }) } } @@ -2701,3 +2754,47 @@ func TestLeafNodeCycleWithSolicited(t *testing.T) { verifyOneResponse(rsub) verifyRequestTotal(2) // This is total since use same responders. } + +func TestLeafNodeNoRaceGeneratingNonce(t *testing.T) { + opts := testDefaultOptionsForLeafNodes() + opts.Cluster.Port = -1 + s := RunServer(opts) + defer s.Shutdown() + + quitCh := make(chan struct{}) + wg := sync.WaitGroup{} + wg.Add(2) + + go func() { + defer wg.Done() + for { + lc := createLeafConn(t, opts.LeafNode.Host, opts.LeafNode.Port) + checkInfoMsg(t, lc) + lc.Close() + select { + case <-quitCh: + return + case <-time.After(time.Millisecond): + } + } + }() + + go func() { + defer wg.Done() + for { + rc := createRouteConn(t, opts.Cluster.Host, opts.Cluster.Port) + checkInfoMsg(t, rc) + rc.Close() + select { + case <-quitCh: + return + case <-time.After(time.Millisecond): + } + } + }() + + // Let this run for a bit to see if we get data race + time.Sleep(100 * time.Millisecond) + close(quitCh) + wg.Wait() +}