From d98d51c8cc7f51b3691bfa28921491769736feb4 Mon Sep 17 00:00:00 2001 From: Ivan Kozlovic Date: Wed, 15 Aug 2018 18:04:07 -0600 Subject: [PATCH] [FIXED] Possible cluster `Authorization Error` during config reload When changing something in the cluster, such as Timeout and doing a config reload, the route could be closed with an `Authorization Error` report. Moreover, the route would not try to reconnect, even if specified as an explicit route. There were 2 issues: - When checking if a solicited route is still valid, we need to check the Routes' URL against the URL that we try to connect to but not compare the pointers, but either do a reflect deep equal, or compare their String representation (this is what I do in the PR). - We should check route authorization only if this is an accepted route, not an explicit one. The reason is that we a server explicitly connect to another server, it does not get the remote server's username and password. So the check would always fail. Note: It is possible that a config reload even without any change in the cluster triggers the code checking if routes are properly authorized, and that happens if there is TLS specified. When the reload code checks if config has changed, the TLSConfig between the old and new seem to indicate a change, eventhough there is apparently none. Another reload does not detect a change. I suspect some internal state in TLSConfig that causes the reflect.DeepEqual() to report a difference. Note2: This commit also contains fixes to regex that staticcheck would otherwise complain about (they did not have any special character), and I have removed printing the usage on startup when getting an error. The usage is still correctly printed if passing a parameter that is unknown. Resolves #719 Signed-off-by: Ivan Kozlovic --- main.go | 2 +- server/reload.go | 11 +++-- server/reload_test.go | 99 +++++++++++++++++++++++++++++++++++++++++-- server/route.go | 2 +- test/test.go | 4 +- 5 files changed, 107 insertions(+), 11 deletions(-) diff --git a/main.go b/main.go index 1feb0930..4553336c 100644 --- a/main.go +++ b/main.go @@ -86,7 +86,7 @@ func main() { fs.Usage, server.PrintTLSHelpAndDie) if err != nil { - server.PrintAndDie(err.Error() + "\n" + usageStr) + server.PrintAndDie(err.Error()) } // Create the server with appropriate options. diff --git a/server/reload.go b/server/reload.go index 8839cf99..682f5880 100644 --- a/server/reload.go +++ b/server/reload.go @@ -656,11 +656,14 @@ func (s *Server) reloadAuthorization() { s.removeUnauthorizedSubs(client) } - for _, client := range routes { + for _, route := range routes { // Disconnect any unauthorized routes. - if !s.isRouterAuthorized(client) { - client.setRouteNoReconnectOnClose() - client.authViolation() + // Do this only for route that were accepted, not initiated + // because in the later case, we don't have the user name/password + // of the remote server. + if !route.isSolicitedRoute() && !s.isRouterAuthorized(route) { + route.setRouteNoReconnectOnClose() + route.authViolation() } } } diff --git a/server/reload_test.go b/server/reload_test.go index eda457a0..d06cf6a8 100644 --- a/server/reload_test.go +++ b/server/reload_test.go @@ -390,10 +390,12 @@ func TestConfigReloadEnableTLS(t *testing.T) { t.Fatalf("Error reloading config: %v", err) } - // Ensure connecting fails. - if _, err := nats.Connect(addr); err == nil { - t.Fatal("Expected connect to fail") + // Ensure connecting is OK even without Secure (the client is now switching automatically). + nc, err = nats.Connect(addr) + if err != nil { + t.Fatalf("Error creating client: %v", err) } + nc.Close() // Ensure connecting succeeds when using secure. nc, err = nats.Connect(addr, nats.Secure()) @@ -1862,6 +1864,97 @@ func TestConfigReloadRotateFiles(t *testing.T) { } } +func createConfFile(t *testing.T, content []byte) string { + t.Helper() + conf, err := ioutil.TempFile("", "") + if err != nil { + t.Fatalf("Error creating conf file: %v", err) + } + fName := conf.Name() + conf.Close() + if err := ioutil.WriteFile(fName, content, 0666); err != nil { + os.Remove(fName) + t.Fatalf("Error writing conf file: %v", err) + } + return fName +} + +func TestConfigReloadClusterWorks(t *testing.T) { + confBTemplate := ` + listen: -1 + cluster: { + listen: 127.0.0.1:7244 + authorization { + user: ruser + password: pwd + timeout: %d + } + routes = [ + nats-route://ruser:pwd@127.0.0.1:7246 + ] + }` + confB := createConfFile(t, []byte(fmt.Sprintf(confBTemplate, 3))) + defer os.Remove(confB) + + confATemplate := ` + listen: -1 + cluster: { + listen: 127.0.0.1:7246 + authorization { + user: ruser + password: pwd + timeout: %d + } + routes = [ + nats-route://ruser:pwd@127.0.0.1:7244 + ] + }` + confA := createConfFile(t, []byte(fmt.Sprintf(confATemplate, 3))) + defer os.Remove(confA) + + srvb, _ := RunServerWithConfig(confB) + defer srvb.Shutdown() + + srva, _ := RunServerWithConfig(confA) + defer srva.Shutdown() + + // Wait for the cluster to form and capture the connection IDs of each route + checkClusterFormed(t, srva, srvb) + + getCID := func(s *Server) uint64 { + s.mu.Lock() + defer s.mu.Unlock() + for _, r := range s.routes { + return r.cid + } + return 0 + } + acid := getCID(srva) + bcid := getCID(srvb) + + // Update auth timeout to force a check of the connected route auth + reloadUpdateConfig(t, srvb, confB, fmt.Sprintf(confBTemplate, 5)) + reloadUpdateConfig(t, srva, confA, fmt.Sprintf(confATemplate, 5)) + + // Wait a little bit to ensure that there is no issue with connection + // breaking at this point (this was an issue before). + time.Sleep(100 * time.Millisecond) + + // Cluster should still exist + checkClusterFormed(t, srva, srvb) + + // Check that routes were not re-created + newacid := getCID(srva) + newbcid := getCID(srvb) + + if newacid != acid { + t.Fatalf("Expected server A route ID to be %v, got %v", acid, newacid) + } + if newbcid != bcid { + t.Fatalf("Expected server B route ID to be %v, got %v", bcid, newbcid) + } +} + func runServerWithSymlinkConfig(t *testing.T, symlinkName, configName string) (*Server, *Options, string) { t.Helper() opts, config := newOptionsWithSymlinkConfig(t, symlinkName, configName) diff --git a/server/route.go b/server/route.go index c12a713d..bb165940 100644 --- a/server/route.go +++ b/server/route.go @@ -1032,7 +1032,7 @@ func (s *Server) reConnectToRoute(rURL *url.URL, rtype RouteType) { // Checks to make sure the route is still valid. func (s *Server) routeStillValid(rURL *url.URL) bool { for _, ri := range s.getOpts().Routes { - if ri == rURL { + if ri.String() == rURL.String() { return true } } diff --git a/test/test.go b/test/test.go index 43684528..adaeaa13 100644 --- a/test/test.go +++ b/test/test.go @@ -245,8 +245,8 @@ func sendProto(t tLogger, c net.Conn, op string) { var ( infoRe = regexp.MustCompile(`INFO\s+([^\r\n]+)\r\n`) - pingRe = regexp.MustCompile(`PING\r\n`) - pongRe = regexp.MustCompile(`PONG\r\n`) + pingRe = regexp.MustCompile(`^PING\r\n`) + pongRe = regexp.MustCompile(`^PONG\r\n`) msgRe = regexp.MustCompile(`(?:(?:MSG\s+([^\s]+)\s+([^\s]+)\s+(([^\s]+)[^\S\r\n]+)?(\d+)\s*\r\n([^\\r\\n]*?)\r\n)+?)`) okRe = regexp.MustCompile(`\A\+OK\r\n`) errRe = regexp.MustCompile(`\A\-ERR\s+([^\r\n]+)\r\n`)