mirror of
https://github.com/gogrlx/nats-server.git
synced 2026-04-17 03:24:40 -07:00
[FIXED] LeafNode solicit failure race could leave conn registered
This was found due to a recent test that was flapping. The test was not checking the correct server for leafnode connection, but that uncovered the following bug: When a leafnode connection is solicited, the read/write loops are started. Then, the connection lock is released and several functions invoked to register the connection with an account and add to the connection leafs map. The problem is that the readloop (for instance) could get a read error and close the connection *before* the above said code executes, which would lead to a connection incorrectly registered. This could be fixed either by delaying the start of read/write loops after the registration is done, or like in this PR, check the connection close status after registration, and if closed, manually undoing the registration with account/leafs map. Signed-off-by: Ivan Kozlovic <ivan@synadia.com>
This commit is contained in:
@@ -603,6 +603,7 @@ func (s *Server) createLeafNode(conn net.Conn, remote *leafNodeCfg) *client {
|
||||
// Determines if we are soliciting the connection or not.
|
||||
var solicited bool
|
||||
var sendSysConnectEvent bool
|
||||
var acc *Account
|
||||
|
||||
c.mu.Lock()
|
||||
c.initClient()
|
||||
@@ -624,7 +625,8 @@ func (s *Server) createLeafNode(conn net.Conn, remote *leafNodeCfg) *client {
|
||||
// TODO: Decide what should be the optimal behavior here.
|
||||
// For now, if lookup fails, we will constantly try
|
||||
// to recreate this LN connection.
|
||||
acc, err := s.LookupAccount(remote.LocalAccount)
|
||||
var err error
|
||||
acc, err = s.LookupAccount(remote.LocalAccount)
|
||||
if err != nil {
|
||||
c.Errorf("No local account %q for leafnode: %v", remote.LocalAccount, err)
|
||||
c.closeConnection(MissingAccount)
|
||||
@@ -725,18 +727,16 @@ func (s *Server) createLeafNode(conn net.Conn, remote *leafNodeCfg) *client {
|
||||
// Force handshake
|
||||
c.mu.Unlock()
|
||||
if err = conn.Handshake(); err != nil {
|
||||
if solicited {
|
||||
// If we overrode and used the saved tlsName but that failed
|
||||
// we will clear that here. This is for the case that another server
|
||||
// does not have the same tlsName, maybe only IPs.
|
||||
// https://github.com/nats-io/nats-server/issues/1256
|
||||
if _, ok := err.(x509.HostnameError); ok {
|
||||
remote.Lock()
|
||||
if host == remote.tlsName {
|
||||
remote.tlsName = ""
|
||||
}
|
||||
remote.Unlock()
|
||||
// If we overrode and used the saved tlsName but that failed
|
||||
// we will clear that here. This is for the case that another server
|
||||
// does not have the same tlsName, maybe only IPs.
|
||||
// https://github.com/nats-io/nats-server/issues/1256
|
||||
if _, ok := err.(x509.HostnameError); ok {
|
||||
remote.Lock()
|
||||
if host == remote.tlsName {
|
||||
remote.tlsName = ""
|
||||
}
|
||||
remote.Unlock()
|
||||
}
|
||||
c.Errorf("TLS handshake error: %v", err)
|
||||
c.closeConnection(TLSHandshakeError)
|
||||
@@ -828,11 +828,28 @@ func (s *Server) createLeafNode(conn net.Conn, remote *leafNodeCfg) *client {
|
||||
// Also send our local subs.
|
||||
if solicited {
|
||||
// Make sure we register with the account here.
|
||||
c.registerWithAccount(c.acc)
|
||||
c.registerWithAccount(acc)
|
||||
s.addLeafNodeConnection(c)
|
||||
s.initLeafNodeSmapAndSendSubs(c)
|
||||
if sendSysConnectEvent {
|
||||
s.sendLeafNodeConnect(c.acc)
|
||||
s.sendLeafNodeConnect(acc)
|
||||
}
|
||||
|
||||
// The above functions are not atomically under the client
|
||||
// lock doing those operations. It is possible - since we
|
||||
// have started the read/write loops - that the connection
|
||||
// is closed before or in between. This would leave the
|
||||
// closed LN connection possible registered with the account
|
||||
// and/or the server's leafs map. So check if connection
|
||||
// is closed, and if so, manually cleanup.
|
||||
c.mu.Lock()
|
||||
closed := c.isClosed()
|
||||
c.mu.Unlock()
|
||||
if closed {
|
||||
s.removeLeafNodeConnection(c)
|
||||
if prev := acc.removeClient(c); prev == 1 {
|
||||
s.decActiveAccounts()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user