mirror of
https://github.com/gogrlx/nats-server.git
synced 2026-04-02 03:38:42 -07:00
Merge pull request #1475 from nats-io/fix_leafnode_solicit_failure_race
[FIXED] LeafNode solicit failure race could leave conn registered
This commit is contained in:
@@ -604,6 +604,7 @@ func (s *Server) createLeafNode(conn net.Conn, remote *leafNodeCfg) *client {
|
||||
// Determines if we are soliciting the connection or not.
|
||||
var solicited bool
|
||||
var sendSysConnectEvent bool
|
||||
var acc *Account
|
||||
|
||||
c.mu.Lock()
|
||||
c.initClient()
|
||||
@@ -625,7 +626,8 @@ func (s *Server) createLeafNode(conn net.Conn, remote *leafNodeCfg) *client {
|
||||
// TODO: Decide what should be the optimal behavior here.
|
||||
// For now, if lookup fails, we will constantly try
|
||||
// to recreate this LN connection.
|
||||
acc, err := s.LookupAccount(remote.LocalAccount)
|
||||
var err error
|
||||
acc, err = s.LookupAccount(remote.LocalAccount)
|
||||
if err != nil {
|
||||
c.Errorf("No local account %q for leafnode: %v", remote.LocalAccount, err)
|
||||
c.closeConnection(MissingAccount)
|
||||
@@ -726,18 +728,16 @@ func (s *Server) createLeafNode(conn net.Conn, remote *leafNodeCfg) *client {
|
||||
// Force handshake
|
||||
c.mu.Unlock()
|
||||
if err = conn.Handshake(); err != nil {
|
||||
if solicited {
|
||||
// If we overrode and used the saved tlsName but that failed
|
||||
// we will clear that here. This is for the case that another server
|
||||
// does not have the same tlsName, maybe only IPs.
|
||||
// https://github.com/nats-io/nats-server/issues/1256
|
||||
if _, ok := err.(x509.HostnameError); ok {
|
||||
remote.Lock()
|
||||
if host == remote.tlsName {
|
||||
remote.tlsName = ""
|
||||
}
|
||||
remote.Unlock()
|
||||
// If we overrode and used the saved tlsName but that failed
|
||||
// we will clear that here. This is for the case that another server
|
||||
// does not have the same tlsName, maybe only IPs.
|
||||
// https://github.com/nats-io/nats-server/issues/1256
|
||||
if _, ok := err.(x509.HostnameError); ok {
|
||||
remote.Lock()
|
||||
if host == remote.tlsName {
|
||||
remote.tlsName = ""
|
||||
}
|
||||
remote.Unlock()
|
||||
}
|
||||
c.Errorf("TLS handshake error: %v", err)
|
||||
c.closeConnection(TLSHandshakeError)
|
||||
@@ -829,11 +829,28 @@ func (s *Server) createLeafNode(conn net.Conn, remote *leafNodeCfg) *client {
|
||||
// Also send our local subs.
|
||||
if solicited {
|
||||
// Make sure we register with the account here.
|
||||
c.registerWithAccount(c.acc)
|
||||
c.registerWithAccount(acc)
|
||||
s.addLeafNodeConnection(c)
|
||||
s.initLeafNodeSmapAndSendSubs(c)
|
||||
if sendSysConnectEvent {
|
||||
s.sendLeafNodeConnect(c.acc)
|
||||
s.sendLeafNodeConnect(acc)
|
||||
}
|
||||
|
||||
// The above functions are not atomically under the client
|
||||
// lock doing those operations. It is possible - since we
|
||||
// have started the read/write loops - that the connection
|
||||
// is closed before or in between. This would leave the
|
||||
// closed LN connection possible registered with the account
|
||||
// and/or the server's leafs map. So check if connection
|
||||
// is closed, and if so, manually cleanup.
|
||||
c.mu.Lock()
|
||||
closed := c.isClosed()
|
||||
c.mu.Unlock()
|
||||
if closed {
|
||||
s.removeLeafNodeConnection(c)
|
||||
if prev := acc.removeClient(c); prev == 1 {
|
||||
s.decActiveAccounts()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1566,10 +1566,10 @@ func TestLeafNodeTLSVerifyAndMap(t *testing.T) {
|
||||
if !test.provideCert {
|
||||
// Wait a bit and make sure we are not connecting
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
checkLeafNodeConnectedCount(t, sl, 0)
|
||||
checkLeafNodeConnectedCount(t, s, 0)
|
||||
return
|
||||
}
|
||||
checkLeafNodeConnected(t, sl)
|
||||
checkLeafNodeConnected(t, s)
|
||||
|
||||
var uname string
|
||||
var accname string
|
||||
|
||||
@@ -2158,10 +2158,15 @@ func TestLeafNodeConnectionLimitsSingleServer(t *testing.T) {
|
||||
|
||||
checkAccConnectionCounts(t, 2)
|
||||
|
||||
// Make sure s4 has 0 still.
|
||||
if nln := s4.NumLeafNodes(); nln != 0 {
|
||||
t.Fatalf("Expected no leafnodes accounted for in s4, got %d", nln)
|
||||
}
|
||||
// Make sure s4 has 0 still. We need checkFor because it is possible
|
||||
// that when we check we have actually the connection registered for
|
||||
// a short period before it is closed due to limit.
|
||||
checkFor(t, time.Second, 15*time.Millisecond, func() error {
|
||||
if nln := s4.NumLeafNodes(); nln != 0 {
|
||||
return fmt.Errorf("Expected no leafnodes accounted for in s4, got %d", nln)
|
||||
}
|
||||
return nil
|
||||
})
|
||||
|
||||
// Make sure this is still 2.
|
||||
checkLeafNodeConnections(t, s, 2)
|
||||
|
||||
Reference in New Issue
Block a user