Merge pull request #603 from nats-io/fix_http_servers_timeout

[FIXED] Profiling and Monitoring timeout issues
2026-04-17 11:24:44 -07:00 · 2017-11-17 12:17:20 -07:00
parent 92c4481d1b 22cff99e58
commit 99f718fd9e
4 changed files with 83 additions and 22 deletions
--- a/server/server.go
+++ b/server/server.go
@@ -85,6 +85,11 @@ type Server struct {
 		trace  int32
 		debug  int32
 	}
+
+	// Used by tests to check that http.Servers do
+	// not set any timeout.
+	monitoringServer *http.Server
+	profilingServer  *http.Server
 }

 // Make sure all are 64bits for atomic use
@@ -477,20 +482,24 @@ func (s *Server) StartProfiler() {
 	srv := &http.Server{
 		Addr:           hp,
 		Handler:        http.DefaultServeMux,
-		ReadTimeout:    2 * time.Second,
-		WriteTimeout:   2 * time.Second,
 		MaxHeaderBytes: 1 << 20,
 	}

 	s.mu.Lock()
 	s.profiler = l
+	s.profilingServer = srv
 	s.mu.Unlock()

 	go func() {
 		// if this errors out, it's probably because the server is being shutdown
 		err := srv.Serve(l)
 		if err != nil {
-			s.Fatalf("error starting profiler: %s", err)
+			s.mu.Lock()
+			shutdown := s.shutdown
+			s.mu.Unlock()
+			if !shutdown {
+				s.Fatalf("error starting profiler: %s", err)
+			}
 		}
 		s.done <- true
 	}()
@@ -606,16 +615,18 @@ func (s *Server) startMonitoring(secure bool) error {
 	// Stacksz
 	mux.HandleFunc(StackszPath, s.HandleStacksz)

+	// Do not set a WriteTimeout because it could cause cURL/browser
+	// to return empty response or unable to display page if the
+	// server needs more time to build the response.
 	srv := &http.Server{
 		Addr:           hp,
 		Handler:        mux,
-		ReadTimeout:    2 * time.Second,
-		WriteTimeout:   2 * time.Second,
 		MaxHeaderBytes: 1 << 20,
 	}
 	s.mu.Lock()
 	s.http = httpListener
 	s.httpHandler = mux
+	s.monitoringServer = srv
 	s.mu.Unlock()

 	go func() {
@@ -919,7 +930,7 @@ func (s *Server) MonitorAddr() *net.TCPAddr {
 	return s.http.Addr().(*net.TCPAddr)
 }

-// RouteAddr returns the net.Addr object for the route listener.
+// ClusterAddr returns the net.Addr object for the route listener.
 func (s *Server) ClusterAddr() *net.TCPAddr {
 	s.mu.Lock()
 	defer s.mu.Unlock()
@@ -929,6 +940,16 @@ func (s *Server) ClusterAddr() *net.TCPAddr {
 	return s.routeListener.Addr().(*net.TCPAddr)
 }

+// ProfilerAddr returns the net.Addr object for the route listener.
+func (s *Server) ProfilerAddr() *net.TCPAddr {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	if s.profiler == nil {
+		return nil
+	}
+	return s.profiler.Addr().(*net.TCPAddr)
+}
+
 // ReadyForConnections returns `true` if the server is ready to accept client
 // and, if routing is enabled, route connections. If after the duration
 // `dur` the server is still not ready, returns `false`.
--- a/server/server_test.go
+++ b/server/server_test.go
@@ -19,7 +19,6 @@ func DefaultOptions() *Options {
 		Port:     -1,
 		HTTPPort: -1,
 		Cluster:  ClusterOpts{Port: -1},
-		ProfPort: -1,
 		NoLog:    true,
 		NoSigs:   true,
 		Debug:    true,
@@ -442,3 +441,51 @@ func TestCustomRouterAuthentication(t *testing.T) {
 	}

 }
+
+func TestMonitoringNoTimeout(t *testing.T) {
+	s := runMonitorServer()
+	defer s.Shutdown()
+
+	s.mu.Lock()
+	srv := s.monitoringServer
+	s.mu.Unlock()
+
+	if srv == nil {
+		t.Fatalf("Monitoring server not set")
+	}
+	if srv.ReadTimeout != 0 {
+		t.Fatalf("ReadTimeout should not be set, was set to %v", srv.ReadTimeout)
+	}
+	if srv.WriteTimeout != 0 {
+		t.Fatalf("WriteTimeout should not be set, was set to %v", srv.WriteTimeout)
+	}
+}
+
+func TestProfilingNoTimeout(t *testing.T) {
+	opts := DefaultOptions()
+	opts.ProfPort = -1
+	s := RunServer(opts)
+	defer s.Shutdown()
+
+	paddr := s.ProfilerAddr()
+	if paddr == nil {
+		t.Fatalf("Profiler not started")
+	}
+	pport := paddr.Port
+	if pport <= 0 {
+		t.Fatalf("Expected profiler port to be set, got %v", pport)
+	}
+	s.mu.Lock()
+	srv := s.profilingServer
+	s.mu.Unlock()
+
+	if srv == nil {
+		t.Fatalf("Profiling server not set")
+	}
+	if srv.ReadTimeout != 0 {
+		t.Fatalf("ReadTimeout should not be set, was set to %v", srv.ReadTimeout)
+	}
+	if srv.WriteTimeout != 0 {
+		t.Fatalf("WriteTimeout should not be set, was set to %v", srv.WriteTimeout)
+	}
+}
--- a/test/monitor_test.go
+++ b/test/monitor_test.go
@@ -71,7 +71,7 @@ func runMonitorServerNoHTTPPort() *server.Server {
 }

 func resetPreviousHTTPConnections() {
-	http.DefaultTransport = &http.Transport{}
+	http.DefaultTransport.(*http.Transport).CloseIdleConnections()
 }

 // Make sure that we do not run the http server for monitoring unless asked.
--- a/test/route_discovery_test.go
+++ b/test/route_discovery_test.go
@@ -289,8 +289,6 @@ func TestStressSeedSolicitWorks(t *testing.T) {
 			var err error
 			maxTime := time.Now().Add(5 * time.Second)
 			for time.Now().Before(maxTime) {
-				resetPreviousHTTPConnections()
-
 				for j := 0; j < len(serversInfo); j++ {
 					err = checkConnected(t, serversInfo, j, true)
 					// If error, start this for loop from beginning
@@ -430,8 +428,6 @@ func TestStressChainedSolicitWorks(t *testing.T) {
 			var err error
 			maxTime := time.Now().Add(5 * time.Second)
 			for time.Now().Before(maxTime) {
-				resetPreviousHTTPConnections()
-
 				for j := 0; j < len(serversInfo); j++ {
 					err = checkConnected(t, serversInfo, j, false)
 					// If error, start this for loop from beginning
@@ -554,25 +550,22 @@ func expectRidsNoFatal(t *testing.T, direct bool, rz *server.Routez, rids []stri

 // Helper to easily grab routez info.
 func readHTTPRoutez(t *testing.T, url string) *server.Routez {
+	resetPreviousHTTPConnections()
 	resp, err := http.Get(url + "routez")
 	if err != nil {
-		t.Fatalf("Expected no error: Got %v\n", err)
-	}
-	if resp.StatusCode != 200 {
-		// Do one retry - FIXME(dlc) - Why does this fail when running the solicit tests b2b?
-		resp, _ = http.Get(url + "routez")
-		if resp.StatusCode != 200 {
-			t.Fatalf("Expected a 200 response, got %d\n", resp.StatusCode)
-		}
+		stackFatalf(t, "Expected no error: Got %v\n", err)
 	}
 	defer resp.Body.Close()
+	if resp.StatusCode != 200 {
+		stackFatalf(t, "Expected a 200 response, got %d\n", resp.StatusCode)
+	}
 	body, err := ioutil.ReadAll(resp.Body)
 	if err != nil {
-		t.Fatalf("Got an error reading the body: %v\n", err)
+		stackFatalf(t, "Got an error reading the body: %v\n", err)
 	}
 	r := server.Routez{}
 	if err := json.Unmarshal(body, &r); err != nil {
-		t.Fatalf("Got an error unmarshalling the body: %v\n", err)
+		stackFatalf(t, "Got an error unmarshalling the body: %v\n", err)
 	}
 	return &r
 }