From 47b08335a46d947f71641012e1fa355793bdf0e9 Mon Sep 17 00:00:00 2001 From: Ivan Kozlovic Date: Tue, 28 Jan 2020 13:16:38 -0700 Subject: [PATCH] [FIXED] Reset of tlsName only for x509.HostnameError For issue #1256, we cleared the possibly saved tlsName on Hanshake failure. However, this meant that for normal use cases, if a reconnect failed for any reason we would not be able to reconnect if it is an IP until we get back to the URL that contained the hostname. We now clear only if the handshake error is of x509.HostnameError type, which include errors such as: ``` "x509: Common Name is not a valid hostname: " "x509: cannot validate certificate for because it doesn't contain any IP SANs" "x509: certificate is not valid for any names, but wanted to match " "x509: certificate is valid for , not " ``` Applied the same logic to solicited gateway connections, and fixed the fact that the tlsConfig should be cloned (since we set the ServerName). I have also made a change for leafnode connections similar to what we are doing for gateway connections, which is to use the saved tlsName only if tlsConfig.ServerName is empty, which may not be the case for users that embed NATS Server and pass directly tls configuration. In other words, if the option TLSConfig.ServerName is not empty, always use this value. Relates to #1256 Signed-off-by: Ivan Kozlovic --- server/gateway.go | 17 +++++++- server/leafnode.go | 55 ++++++++++++++------------ test/gateway_test.go | 91 +++++++++++++++++++++++++++++++++++++++++++ test/leafnode_test.go | 2 - 4 files changed, 136 insertions(+), 29 deletions(-) diff --git a/server/gateway.go b/server/gateway.go index d045c804..3b9bb064 100644 --- a/server/gateway.go +++ b/server/gateway.go @@ -17,6 +17,7 @@ import ( "bytes" "crypto/sha256" "crypto/tls" + "crypto/x509" "encoding/json" "fmt" "math/rand" @@ -744,20 +745,21 @@ func (s *Server) createGateway(cfg *gatewayCfg, url *url.URL, conn net.Conn) { // Check for TLS if tlsRequired { + var host string var timeout float64 // If we solicited, we will act like the client, otherwise the server. if solicit { c.Debugf("Starting TLS gateway client handshake") cfg.RLock() tlsName := cfg.tlsName - tlsConfig := cfg.TLSConfig + tlsConfig := cfg.TLSConfig.Clone() timeout = cfg.TLSTimeout cfg.RUnlock() if tlsConfig.ServerName == "" { // If the given url is a hostname, use this hostname for the // ServerName. If it is an IP, use the cfg's tlsName. If none // is available, resort to current IP. - host := url.Hostname() + host = url.Hostname() if tlsName != "" && net.ParseIP(host) != nil { host = tlsName } @@ -779,6 +781,17 @@ func (s *Server) createGateway(cfg *gatewayCfg, url *url.URL, conn net.Conn) { c.mu.Unlock() if err := conn.Handshake(); err != nil { + if solicit { + // Based on type of error, possibly clear the saved tlsName + // See: https://github.com/nats-io/nats-server/issues/1256 + if _, ok := err.(x509.HostnameError); ok { + cfg.Lock() + if host == cfg.tlsName { + cfg.tlsName = "" + } + cfg.Unlock() + } + } c.Errorf("TLS gateway handshake error: %v", err) c.sendErr("Secure Connection - TLS Required") c.closeConnection(TLSHandshakeError) diff --git a/server/leafnode.go b/server/leafnode.go index 69176563..ff021fe3 100644 --- a/server/leafnode.go +++ b/server/leafnode.go @@ -17,6 +17,7 @@ import ( "bufio" "bytes" "crypto/tls" + "crypto/x509" "encoding/base64" "encoding/json" "fmt" @@ -638,30 +639,30 @@ func (s *Server) createLeafNode(conn net.Conn, remote *leafNodeCfg) *client { } // Do TLS here as needed. - tlsRequired := c.leaf.remote.TLS || c.leaf.remote.TLSConfig != nil + tlsRequired := remote.TLS || remote.TLSConfig != nil if tlsRequired { c.Debugf("Starting TLS leafnode client handshake") // Specify the ServerName we are expecting. var tlsConfig *tls.Config - if c.leaf.remote.TLSConfig != nil { - tlsConfig = c.leaf.remote.TLSConfig.Clone() + if remote.TLSConfig != nil { + tlsConfig = remote.TLSConfig.Clone() } else { tlsConfig = &tls.Config{MinVersion: tls.VersionTLS12} } - url := c.leaf.remote.getCurrentURL() - host, _, _ := net.SplitHostPort(url.Host) - // We need to check if this host is an IP. If so, we probably - // had this advertised to us and should use the configured host - // name for the TLS server name. - if net.ParseIP(host) != nil { - if c.leaf.remote.tlsName != "" { - host = c.leaf.remote.tlsName - } else { - host, _, _ = net.SplitHostPort(c.leaf.remote.curURL.Host) + var host string + // If ServerName was given to us from the option, use that, always. + if tlsConfig.ServerName == "" { + url := remote.getCurrentURL() + host = url.Hostname() + // We need to check if this host is an IP. If so, we probably + // had this advertised to us and should use the configured host + // name for the TLS server name. + if remote.tlsName != "" && net.ParseIP(host) != nil { + host = remote.tlsName } + tlsConfig.ServerName = host } - tlsConfig.ServerName = host c.nc = tls.Client(c.nc, tlsConfig) @@ -669,10 +670,10 @@ func (s *Server) createLeafNode(conn net.Conn, remote *leafNodeCfg) *client { // Setup the timeout var wait time.Duration - if c.leaf.remote.TLSTimeout == 0 { + if remote.TLSTimeout == 0 { wait = TLS_TIMEOUT } else { - wait = secondsToDuration(c.leaf.remote.TLSTimeout) + wait = secondsToDuration(remote.TLSTimeout) } time.AfterFunc(wait, func() { tlsTimeout(c, conn) }) conn.SetReadDeadline(time.Now().Add(wait)) @@ -680,17 +681,21 @@ func (s *Server) createLeafNode(conn net.Conn, remote *leafNodeCfg) *client { // Force handshake c.mu.Unlock() if err = conn.Handshake(); err != nil { + if solicited { + // If we overrode and used the saved tlsName but that failed + // we will clear that here. This is for the case that another server + // does not have the same tlsName, maybe only IPs. + // https://github.com/nats-io/nats-server/issues/1256 + if _, ok := err.(x509.HostnameError); ok { + remote.Lock() + if host == remote.tlsName { + remote.tlsName = "" + } + remote.Unlock() + } + } c.Errorf("TLS handshake error: %v", err) c.closeConnection(TLSHandshakeError) - // If we overrode and used the saved tlsName but that failed - // we will clear that here. This is for the case that another server - // does not have the same tlsName, maybe only IPs. - // https://github.com/nats-io/nats-server/issues/1256 - c.mu.Lock() - if host == c.leaf.remote.tlsName { - c.leaf.remote.tlsName = "" - } - c.mu.Unlock() return nil } // Reset the read deadline diff --git a/test/gateway_test.go b/test/gateway_test.go index 69dc6773..340e6e19 100644 --- a/test/gateway_test.go +++ b/test/gateway_test.go @@ -16,8 +16,10 @@ package test import ( "bufio" "bytes" + "crypto/tls" "fmt" "net" + "net/url" "regexp" "testing" "time" @@ -571,3 +573,92 @@ func TestGatewaySystemConnectionAllowedToPublishOnGWPrefix(t *testing.T) { } } } + +func TestGatewayTLSMixedIPAndDNS(t *testing.T) { + server.SetGatewaysSolicitDelay(5 * time.Millisecond) + defer server.ResetGatewaysSolicitDelay() + + confA1 := createConfFile(t, []byte(` + listen: 127.0.0.1:-1 + gateway { + name: "A" + listen: "127.0.0.1:-1" + tls { + cert_file: "./configs/certs/server-iponly.pem" + key_file: "./configs/certs/server-key-iponly.pem" + ca_file: "./configs/certs/ca.pem" + timeout: 2 + } + } + cluster { + listen: "127.0.0.1:-1" + } + `)) + srvA1, optsA1 := RunServerWithConfig(confA1) + defer srvA1.Shutdown() + + confA2Template := ` + listen: 127.0.0.1:-1 + gateway { + name: "A" + listen: "localhost:-1" + tls { + cert_file: "./configs/certs/server-cert.pem" + key_file: "./configs/certs/server-key.pem" + ca_file: "./configs/certs/ca.pem" + timeout: 2 + } + } + cluster { + listen: "127.0.0.1:-1" + routes [ + "nats://%s:%d" + ] + } + ` + confA2 := createConfFile(t, []byte(fmt.Sprintf(confA2Template, + optsA1.Cluster.Host, optsA1.Cluster.Port))) + srvA2, optsA2 := RunServerWithConfig(confA2) + defer srvA2.Shutdown() + + checkClusterFormed(t, srvA1, srvA2) + + // Create a GW connection to cluster "A". Don't use the helper since we need verification etc. + o := DefaultTestOptions + o.Port = -1 + o.Gateway.Name = "B" + o.Gateway.Host = "127.0.0.1" + o.Gateway.Port = -1 + + tc := &server.TLSConfigOpts{} + tc.CertFile = "./configs/certs/server-cert.pem" + tc.KeyFile = "./configs/certs/server-key.pem" + tc.CaFile = "./configs/certs/ca.pem" + tc.Timeout = 2.0 + tlsConfig, err := server.GenTLSConfig(tc) + if err != nil { + t.Fatalf("Error generating TLS config: %v", err) + } + tlsConfig.ClientAuth = tls.RequireAndVerifyClientCert + tlsConfig.RootCAs = tlsConfig.ClientCAs + + o.Gateway.TLSConfig = tlsConfig.Clone() + + rurl, _ := url.Parse(fmt.Sprintf("nats://%s:%d", optsA2.Gateway.Host, optsA2.Gateway.Port)) + remote := &server.RemoteGatewayOpts{Name: "A", URLs: []*url.URL{rurl}} + remote.TLSConfig = tlsConfig.Clone() + o.Gateway.Gateways = []*server.RemoteGatewayOpts{remote} + + srvB := RunServer(&o) + defer srvB.Shutdown() + + waitForOutboundGateways(t, srvB, 1, 10*time.Second) + waitForOutboundGateways(t, srvA1, 1, 10*time.Second) + waitForOutboundGateways(t, srvA2, 1, 10*time.Second) + + // Now kill off srvA2 and force serverB to connect to srvA1. + srvA2.Shutdown() + + // Make sure this works. + waitForOutboundGateways(t, srvB, 1, 10*time.Second) +} diff --git a/test/leafnode_test.go b/test/leafnode_test.go index 7d138a3b..2111d76c 100644 --- a/test/leafnode_test.go +++ b/test/leafnode_test.go @@ -3082,8 +3082,6 @@ func TestClusterTLSMixedIPAndDNS(t *testing.T) { t.Fatalf("Failed to parse root certificate from %q", "./configs/certs/ca.pem") } remote.TLSConfig.RootCAs = pool - host, _, _ := net.SplitHostPort(optsB.LeafNode.Host) - remote.TLSConfig.ServerName = host o.LeafNode.Remotes = []*server.RemoteLeafOpts{remote} sl, _ := RunServer(&o), &o defer sl.Shutdown()