mirror of
https://github.com/gogrlx/nats-server.git
synced 2026-04-14 10:10:42 -07:00
[Fixed] Profiling and Monitoring timeout issues
The http servers for those two were recently modified to set a ReadTimeout and WriteTimeout. The WriteTimeout specifically caused issues for Profiling since it is common to ask sampling of several seconds. Pprof code would reject the request if it detected that http server's WriteTimeout was more than sampling in request. For monitoring, any situation that would cause the monitoring code to take more than 2 seconds to gather information (could be due to locking, amount of objects to return, time required for sorting, etc..) would also cause cURL to return empty response or WebBrowser to fail to display the page. Resolves #600
This commit is contained in:
@@ -85,6 +85,11 @@ type Server struct {
|
||||
trace int32
|
||||
debug int32
|
||||
}
|
||||
|
||||
// Used by tests to check that http.Servers do
|
||||
// not set any timeout.
|
||||
monitoringServer *http.Server
|
||||
profilingServer *http.Server
|
||||
}
|
||||
|
||||
// Make sure all are 64bits for atomic use
|
||||
@@ -477,20 +482,24 @@ func (s *Server) StartProfiler() {
|
||||
srv := &http.Server{
|
||||
Addr: hp,
|
||||
Handler: http.DefaultServeMux,
|
||||
ReadTimeout: 2 * time.Second,
|
||||
WriteTimeout: 2 * time.Second,
|
||||
MaxHeaderBytes: 1 << 20,
|
||||
}
|
||||
|
||||
s.mu.Lock()
|
||||
s.profiler = l
|
||||
s.profilingServer = srv
|
||||
s.mu.Unlock()
|
||||
|
||||
go func() {
|
||||
// if this errors out, it's probably because the server is being shutdown
|
||||
err := srv.Serve(l)
|
||||
if err != nil {
|
||||
s.Fatalf("error starting profiler: %s", err)
|
||||
s.mu.Lock()
|
||||
shutdown := s.shutdown
|
||||
s.mu.Unlock()
|
||||
if !shutdown {
|
||||
s.Fatalf("error starting profiler: %s", err)
|
||||
}
|
||||
}
|
||||
s.done <- true
|
||||
}()
|
||||
@@ -606,16 +615,18 @@ func (s *Server) startMonitoring(secure bool) error {
|
||||
// Stacksz
|
||||
mux.HandleFunc(StackszPath, s.HandleStacksz)
|
||||
|
||||
// Do not set a WriteTimeout because it could cause cURL/browser
|
||||
// to return empty response or unable to display page if the
|
||||
// server needs more time to build the response.
|
||||
srv := &http.Server{
|
||||
Addr: hp,
|
||||
Handler: mux,
|
||||
ReadTimeout: 2 * time.Second,
|
||||
WriteTimeout: 2 * time.Second,
|
||||
MaxHeaderBytes: 1 << 20,
|
||||
}
|
||||
s.mu.Lock()
|
||||
s.http = httpListener
|
||||
s.httpHandler = mux
|
||||
s.monitoringServer = srv
|
||||
s.mu.Unlock()
|
||||
|
||||
go func() {
|
||||
@@ -919,7 +930,7 @@ func (s *Server) MonitorAddr() *net.TCPAddr {
|
||||
return s.http.Addr().(*net.TCPAddr)
|
||||
}
|
||||
|
||||
// RouteAddr returns the net.Addr object for the route listener.
|
||||
// ClusterAddr returns the net.Addr object for the route listener.
|
||||
func (s *Server) ClusterAddr() *net.TCPAddr {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
@@ -929,6 +940,16 @@ func (s *Server) ClusterAddr() *net.TCPAddr {
|
||||
return s.routeListener.Addr().(*net.TCPAddr)
|
||||
}
|
||||
|
||||
// ProfilerAddr returns the net.Addr object for the route listener.
|
||||
func (s *Server) ProfilerAddr() *net.TCPAddr {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
if s.profiler == nil {
|
||||
return nil
|
||||
}
|
||||
return s.profiler.Addr().(*net.TCPAddr)
|
||||
}
|
||||
|
||||
// ReadyForConnections returns `true` if the server is ready to accept client
|
||||
// and, if routing is enabled, route connections. If after the duration
|
||||
// `dur` the server is still not ready, returns `false`.
|
||||
|
||||
@@ -19,7 +19,6 @@ func DefaultOptions() *Options {
|
||||
Port: -1,
|
||||
HTTPPort: -1,
|
||||
Cluster: ClusterOpts{Port: -1},
|
||||
ProfPort: -1,
|
||||
NoLog: true,
|
||||
NoSigs: true,
|
||||
Debug: true,
|
||||
@@ -442,3 +441,51 @@ func TestCustomRouterAuthentication(t *testing.T) {
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func TestMonitoringNoTimeout(t *testing.T) {
|
||||
s := runMonitorServer()
|
||||
defer s.Shutdown()
|
||||
|
||||
s.mu.Lock()
|
||||
srv := s.monitoringServer
|
||||
s.mu.Unlock()
|
||||
|
||||
if srv == nil {
|
||||
t.Fatalf("Monitoring server not set")
|
||||
}
|
||||
if srv.ReadTimeout != 0 {
|
||||
t.Fatalf("ReadTimeout should not be set, was set to %v", srv.ReadTimeout)
|
||||
}
|
||||
if srv.WriteTimeout != 0 {
|
||||
t.Fatalf("WriteTimeout should not be set, was set to %v", srv.WriteTimeout)
|
||||
}
|
||||
}
|
||||
|
||||
func TestProfilingNoTimeout(t *testing.T) {
|
||||
opts := DefaultOptions()
|
||||
opts.ProfPort = -1
|
||||
s := RunServer(opts)
|
||||
defer s.Shutdown()
|
||||
|
||||
paddr := s.ProfilerAddr()
|
||||
if paddr == nil {
|
||||
t.Fatalf("Profiler not started")
|
||||
}
|
||||
pport := paddr.Port
|
||||
if pport <= 0 {
|
||||
t.Fatalf("Expected profiler port to be set, got %v", pport)
|
||||
}
|
||||
s.mu.Lock()
|
||||
srv := s.profilingServer
|
||||
s.mu.Unlock()
|
||||
|
||||
if srv == nil {
|
||||
t.Fatalf("Profiling server not set")
|
||||
}
|
||||
if srv.ReadTimeout != 0 {
|
||||
t.Fatalf("ReadTimeout should not be set, was set to %v", srv.ReadTimeout)
|
||||
}
|
||||
if srv.WriteTimeout != 0 {
|
||||
t.Fatalf("WriteTimeout should not be set, was set to %v", srv.WriteTimeout)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -71,7 +71,7 @@ func runMonitorServerNoHTTPPort() *server.Server {
|
||||
}
|
||||
|
||||
func resetPreviousHTTPConnections() {
|
||||
http.DefaultTransport = &http.Transport{}
|
||||
http.DefaultTransport.(*http.Transport).CloseIdleConnections()
|
||||
}
|
||||
|
||||
// Make sure that we do not run the http server for monitoring unless asked.
|
||||
|
||||
@@ -289,8 +289,6 @@ func TestStressSeedSolicitWorks(t *testing.T) {
|
||||
var err error
|
||||
maxTime := time.Now().Add(5 * time.Second)
|
||||
for time.Now().Before(maxTime) {
|
||||
resetPreviousHTTPConnections()
|
||||
|
||||
for j := 0; j < len(serversInfo); j++ {
|
||||
err = checkConnected(t, serversInfo, j, true)
|
||||
// If error, start this for loop from beginning
|
||||
@@ -430,8 +428,6 @@ func TestStressChainedSolicitWorks(t *testing.T) {
|
||||
var err error
|
||||
maxTime := time.Now().Add(5 * time.Second)
|
||||
for time.Now().Before(maxTime) {
|
||||
resetPreviousHTTPConnections()
|
||||
|
||||
for j := 0; j < len(serversInfo); j++ {
|
||||
err = checkConnected(t, serversInfo, j, false)
|
||||
// If error, start this for loop from beginning
|
||||
@@ -554,25 +550,22 @@ func expectRidsNoFatal(t *testing.T, direct bool, rz *server.Routez, rids []stri
|
||||
|
||||
// Helper to easily grab routez info.
|
||||
func readHTTPRoutez(t *testing.T, url string) *server.Routez {
|
||||
resetPreviousHTTPConnections()
|
||||
resp, err := http.Get(url + "routez")
|
||||
if err != nil {
|
||||
t.Fatalf("Expected no error: Got %v\n", err)
|
||||
}
|
||||
if resp.StatusCode != 200 {
|
||||
// Do one retry - FIXME(dlc) - Why does this fail when running the solicit tests b2b?
|
||||
resp, _ = http.Get(url + "routez")
|
||||
if resp.StatusCode != 200 {
|
||||
t.Fatalf("Expected a 200 response, got %d\n", resp.StatusCode)
|
||||
}
|
||||
stackFatalf(t, "Expected no error: Got %v\n", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode != 200 {
|
||||
stackFatalf(t, "Expected a 200 response, got %d\n", resp.StatusCode)
|
||||
}
|
||||
body, err := ioutil.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
t.Fatalf("Got an error reading the body: %v\n", err)
|
||||
stackFatalf(t, "Got an error reading the body: %v\n", err)
|
||||
}
|
||||
r := server.Routez{}
|
||||
if err := json.Unmarshal(body, &r); err != nil {
|
||||
t.Fatalf("Got an error unmarshalling the body: %v\n", err)
|
||||
stackFatalf(t, "Got an error unmarshalling the body: %v\n", err)
|
||||
}
|
||||
return &r
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user