[FIXED] Reset of tlsName only for x509.HostnameError

For issue #1256, we cleared the possibly saved tlsName on Hanshake failure.
However, this meant that for normal use cases, if a reconnect failed for
any reason we would not be able to reconnect if it is an IP until we get
back to the URL that contained the hostname.

We now clear only if the handshake error is of x509.HostnameError type,
which include errors such as:
```
"x509: Common Name is not a valid hostname: <x>"
"x509: cannot validate certificate for <x> because it doesn't contain any IP SANs"
"x509: certificate is not valid for any names, but wanted to match <x>"
"x509: certificate is valid for <x>, not <y>"
```

Applied the same logic to solicited gateway connections, and fixed the fact
that the tlsConfig should be cloned (since we set the ServerName).

I have also made a change for leafnode connections similar to what we are
doing for gateway connections, which is to use the saved tlsName only if
tlsConfig.ServerName is empty, which may not be the case for users that
embed NATS Server and pass directly tls configuration. In other words,
if the option TLSConfig.ServerName is not empty, always use this value.

Relates to #1256

Signed-off-by: Ivan Kozlovic <ivan@synadia.com>
This commit is contained in:
Ivan Kozlovic
2020-01-28 13:16:38 -07:00
parent b1fc364a6c
commit 47b08335a4
4 changed files with 136 additions and 29 deletions

View File

@@ -17,6 +17,7 @@ import (
"bytes"
"crypto/sha256"
"crypto/tls"
"crypto/x509"
"encoding/json"
"fmt"
"math/rand"
@@ -744,20 +745,21 @@ func (s *Server) createGateway(cfg *gatewayCfg, url *url.URL, conn net.Conn) {
// Check for TLS
if tlsRequired {
var host string
var timeout float64
// If we solicited, we will act like the client, otherwise the server.
if solicit {
c.Debugf("Starting TLS gateway client handshake")
cfg.RLock()
tlsName := cfg.tlsName
tlsConfig := cfg.TLSConfig
tlsConfig := cfg.TLSConfig.Clone()
timeout = cfg.TLSTimeout
cfg.RUnlock()
if tlsConfig.ServerName == "" {
// If the given url is a hostname, use this hostname for the
// ServerName. If it is an IP, use the cfg's tlsName. If none
// is available, resort to current IP.
host := url.Hostname()
host = url.Hostname()
if tlsName != "" && net.ParseIP(host) != nil {
host = tlsName
}
@@ -779,6 +781,17 @@ func (s *Server) createGateway(cfg *gatewayCfg, url *url.URL, conn net.Conn) {
c.mu.Unlock()
if err := conn.Handshake(); err != nil {
if solicit {
// Based on type of error, possibly clear the saved tlsName
// See: https://github.com/nats-io/nats-server/issues/1256
if _, ok := err.(x509.HostnameError); ok {
cfg.Lock()
if host == cfg.tlsName {
cfg.tlsName = ""
}
cfg.Unlock()
}
}
c.Errorf("TLS gateway handshake error: %v", err)
c.sendErr("Secure Connection - TLS Required")
c.closeConnection(TLSHandshakeError)

View File

@@ -17,6 +17,7 @@ import (
"bufio"
"bytes"
"crypto/tls"
"crypto/x509"
"encoding/base64"
"encoding/json"
"fmt"
@@ -638,30 +639,30 @@ func (s *Server) createLeafNode(conn net.Conn, remote *leafNodeCfg) *client {
}
// Do TLS here as needed.
tlsRequired := c.leaf.remote.TLS || c.leaf.remote.TLSConfig != nil
tlsRequired := remote.TLS || remote.TLSConfig != nil
if tlsRequired {
c.Debugf("Starting TLS leafnode client handshake")
// Specify the ServerName we are expecting.
var tlsConfig *tls.Config
if c.leaf.remote.TLSConfig != nil {
tlsConfig = c.leaf.remote.TLSConfig.Clone()
if remote.TLSConfig != nil {
tlsConfig = remote.TLSConfig.Clone()
} else {
tlsConfig = &tls.Config{MinVersion: tls.VersionTLS12}
}
url := c.leaf.remote.getCurrentURL()
host, _, _ := net.SplitHostPort(url.Host)
// We need to check if this host is an IP. If so, we probably
// had this advertised to us and should use the configured host
// name for the TLS server name.
if net.ParseIP(host) != nil {
if c.leaf.remote.tlsName != "" {
host = c.leaf.remote.tlsName
} else {
host, _, _ = net.SplitHostPort(c.leaf.remote.curURL.Host)
var host string
// If ServerName was given to us from the option, use that, always.
if tlsConfig.ServerName == "" {
url := remote.getCurrentURL()
host = url.Hostname()
// We need to check if this host is an IP. If so, we probably
// had this advertised to us and should use the configured host
// name for the TLS server name.
if remote.tlsName != "" && net.ParseIP(host) != nil {
host = remote.tlsName
}
tlsConfig.ServerName = host
}
tlsConfig.ServerName = host
c.nc = tls.Client(c.nc, tlsConfig)
@@ -669,10 +670,10 @@ func (s *Server) createLeafNode(conn net.Conn, remote *leafNodeCfg) *client {
// Setup the timeout
var wait time.Duration
if c.leaf.remote.TLSTimeout == 0 {
if remote.TLSTimeout == 0 {
wait = TLS_TIMEOUT
} else {
wait = secondsToDuration(c.leaf.remote.TLSTimeout)
wait = secondsToDuration(remote.TLSTimeout)
}
time.AfterFunc(wait, func() { tlsTimeout(c, conn) })
conn.SetReadDeadline(time.Now().Add(wait))
@@ -680,17 +681,21 @@ func (s *Server) createLeafNode(conn net.Conn, remote *leafNodeCfg) *client {
// Force handshake
c.mu.Unlock()
if err = conn.Handshake(); err != nil {
if solicited {
// If we overrode and used the saved tlsName but that failed
// we will clear that here. This is for the case that another server
// does not have the same tlsName, maybe only IPs.
// https://github.com/nats-io/nats-server/issues/1256
if _, ok := err.(x509.HostnameError); ok {
remote.Lock()
if host == remote.tlsName {
remote.tlsName = ""
}
remote.Unlock()
}
}
c.Errorf("TLS handshake error: %v", err)
c.closeConnection(TLSHandshakeError)
// If we overrode and used the saved tlsName but that failed
// we will clear that here. This is for the case that another server
// does not have the same tlsName, maybe only IPs.
// https://github.com/nats-io/nats-server/issues/1256
c.mu.Lock()
if host == c.leaf.remote.tlsName {
c.leaf.remote.tlsName = ""
}
c.mu.Unlock()
return nil
}
// Reset the read deadline

View File

@@ -16,8 +16,10 @@ package test
import (
"bufio"
"bytes"
"crypto/tls"
"fmt"
"net"
"net/url"
"regexp"
"testing"
"time"
@@ -571,3 +573,92 @@ func TestGatewaySystemConnectionAllowedToPublishOnGWPrefix(t *testing.T) {
}
}
}
func TestGatewayTLSMixedIPAndDNS(t *testing.T) {
server.SetGatewaysSolicitDelay(5 * time.Millisecond)
defer server.ResetGatewaysSolicitDelay()
confA1 := createConfFile(t, []byte(`
listen: 127.0.0.1:-1
gateway {
name: "A"
listen: "127.0.0.1:-1"
tls {
cert_file: "./configs/certs/server-iponly.pem"
key_file: "./configs/certs/server-key-iponly.pem"
ca_file: "./configs/certs/ca.pem"
timeout: 2
}
}
cluster {
listen: "127.0.0.1:-1"
}
`))
srvA1, optsA1 := RunServerWithConfig(confA1)
defer srvA1.Shutdown()
confA2Template := `
listen: 127.0.0.1:-1
gateway {
name: "A"
listen: "localhost:-1"
tls {
cert_file: "./configs/certs/server-cert.pem"
key_file: "./configs/certs/server-key.pem"
ca_file: "./configs/certs/ca.pem"
timeout: 2
}
}
cluster {
listen: "127.0.0.1:-1"
routes [
"nats://%s:%d"
]
}
`
confA2 := createConfFile(t, []byte(fmt.Sprintf(confA2Template,
optsA1.Cluster.Host, optsA1.Cluster.Port)))
srvA2, optsA2 := RunServerWithConfig(confA2)
defer srvA2.Shutdown()
checkClusterFormed(t, srvA1, srvA2)
// Create a GW connection to cluster "A". Don't use the helper since we need verification etc.
o := DefaultTestOptions
o.Port = -1
o.Gateway.Name = "B"
o.Gateway.Host = "127.0.0.1"
o.Gateway.Port = -1
tc := &server.TLSConfigOpts{}
tc.CertFile = "./configs/certs/server-cert.pem"
tc.KeyFile = "./configs/certs/server-key.pem"
tc.CaFile = "./configs/certs/ca.pem"
tc.Timeout = 2.0
tlsConfig, err := server.GenTLSConfig(tc)
if err != nil {
t.Fatalf("Error generating TLS config: %v", err)
}
tlsConfig.ClientAuth = tls.RequireAndVerifyClientCert
tlsConfig.RootCAs = tlsConfig.ClientCAs
o.Gateway.TLSConfig = tlsConfig.Clone()
rurl, _ := url.Parse(fmt.Sprintf("nats://%s:%d", optsA2.Gateway.Host, optsA2.Gateway.Port))
remote := &server.RemoteGatewayOpts{Name: "A", URLs: []*url.URL{rurl}}
remote.TLSConfig = tlsConfig.Clone()
o.Gateway.Gateways = []*server.RemoteGatewayOpts{remote}
srvB := RunServer(&o)
defer srvB.Shutdown()
waitForOutboundGateways(t, srvB, 1, 10*time.Second)
waitForOutboundGateways(t, srvA1, 1, 10*time.Second)
waitForOutboundGateways(t, srvA2, 1, 10*time.Second)
// Now kill off srvA2 and force serverB to connect to srvA1.
srvA2.Shutdown()
// Make sure this works.
waitForOutboundGateways(t, srvB, 1, 10*time.Second)
}

View File

@@ -3082,8 +3082,6 @@ func TestClusterTLSMixedIPAndDNS(t *testing.T) {
t.Fatalf("Failed to parse root certificate from %q", "./configs/certs/ca.pem")
}
remote.TLSConfig.RootCAs = pool
host, _, _ := net.SplitHostPort(optsB.LeafNode.Host)
remote.TLSConfig.ServerName = host
o.LeafNode.Remotes = []*server.RemoteLeafOpts{remote}
sl, _ := RunServer(&o), &o
defer sl.Shutdown()