[Fixed] Profiling and Monitoring timeout issues

The http servers for those two were recently modified to set
a ReadTimeout and WriteTimeout. The WriteTimeout specifically
caused issues for Profiling since it is common to ask sampling
of several seconds. Pprof code would reject the request if it
detected that http server's WriteTimeout was more than sampling
in request.
For monitoring, any situation that would cause the monitoring code
to take more than 2 seconds to gather information (could be due
to locking, amount of objects to return, time required for sorting,
etc..) would also cause cURL to return empty response or WebBrowser
to fail to display the page.

Resolves #600
This commit is contained in:
Ivan Kozlovic
2017-11-08 14:52:27 -07:00
parent 92c4481d1b
commit 22cff99e58
4 changed files with 83 additions and 22 deletions

View File

@@ -85,6 +85,11 @@ type Server struct {
trace int32
debug int32
}
// Used by tests to check that http.Servers do
// not set any timeout.
monitoringServer *http.Server
profilingServer *http.Server
}
// Make sure all are 64bits for atomic use
@@ -477,20 +482,24 @@ func (s *Server) StartProfiler() {
srv := &http.Server{
Addr: hp,
Handler: http.DefaultServeMux,
ReadTimeout: 2 * time.Second,
WriteTimeout: 2 * time.Second,
MaxHeaderBytes: 1 << 20,
}
s.mu.Lock()
s.profiler = l
s.profilingServer = srv
s.mu.Unlock()
go func() {
// if this errors out, it's probably because the server is being shutdown
err := srv.Serve(l)
if err != nil {
s.Fatalf("error starting profiler: %s", err)
s.mu.Lock()
shutdown := s.shutdown
s.mu.Unlock()
if !shutdown {
s.Fatalf("error starting profiler: %s", err)
}
}
s.done <- true
}()
@@ -606,16 +615,18 @@ func (s *Server) startMonitoring(secure bool) error {
// Stacksz
mux.HandleFunc(StackszPath, s.HandleStacksz)
// Do not set a WriteTimeout because it could cause cURL/browser
// to return empty response or unable to display page if the
// server needs more time to build the response.
srv := &http.Server{
Addr: hp,
Handler: mux,
ReadTimeout: 2 * time.Second,
WriteTimeout: 2 * time.Second,
MaxHeaderBytes: 1 << 20,
}
s.mu.Lock()
s.http = httpListener
s.httpHandler = mux
s.monitoringServer = srv
s.mu.Unlock()
go func() {
@@ -919,7 +930,7 @@ func (s *Server) MonitorAddr() *net.TCPAddr {
return s.http.Addr().(*net.TCPAddr)
}
// RouteAddr returns the net.Addr object for the route listener.
// ClusterAddr returns the net.Addr object for the route listener.
func (s *Server) ClusterAddr() *net.TCPAddr {
s.mu.Lock()
defer s.mu.Unlock()
@@ -929,6 +940,16 @@ func (s *Server) ClusterAddr() *net.TCPAddr {
return s.routeListener.Addr().(*net.TCPAddr)
}
// ProfilerAddr returns the net.Addr object for the route listener.
func (s *Server) ProfilerAddr() *net.TCPAddr {
s.mu.Lock()
defer s.mu.Unlock()
if s.profiler == nil {
return nil
}
return s.profiler.Addr().(*net.TCPAddr)
}
// ReadyForConnections returns `true` if the server is ready to accept client
// and, if routing is enabled, route connections. If after the duration
// `dur` the server is still not ready, returns `false`.

View File

@@ -19,7 +19,6 @@ func DefaultOptions() *Options {
Port: -1,
HTTPPort: -1,
Cluster: ClusterOpts{Port: -1},
ProfPort: -1,
NoLog: true,
NoSigs: true,
Debug: true,
@@ -442,3 +441,51 @@ func TestCustomRouterAuthentication(t *testing.T) {
}
}
func TestMonitoringNoTimeout(t *testing.T) {
s := runMonitorServer()
defer s.Shutdown()
s.mu.Lock()
srv := s.monitoringServer
s.mu.Unlock()
if srv == nil {
t.Fatalf("Monitoring server not set")
}
if srv.ReadTimeout != 0 {
t.Fatalf("ReadTimeout should not be set, was set to %v", srv.ReadTimeout)
}
if srv.WriteTimeout != 0 {
t.Fatalf("WriteTimeout should not be set, was set to %v", srv.WriteTimeout)
}
}
func TestProfilingNoTimeout(t *testing.T) {
opts := DefaultOptions()
opts.ProfPort = -1
s := RunServer(opts)
defer s.Shutdown()
paddr := s.ProfilerAddr()
if paddr == nil {
t.Fatalf("Profiler not started")
}
pport := paddr.Port
if pport <= 0 {
t.Fatalf("Expected profiler port to be set, got %v", pport)
}
s.mu.Lock()
srv := s.profilingServer
s.mu.Unlock()
if srv == nil {
t.Fatalf("Profiling server not set")
}
if srv.ReadTimeout != 0 {
t.Fatalf("ReadTimeout should not be set, was set to %v", srv.ReadTimeout)
}
if srv.WriteTimeout != 0 {
t.Fatalf("WriteTimeout should not be set, was set to %v", srv.WriteTimeout)
}
}

View File

@@ -71,7 +71,7 @@ func runMonitorServerNoHTTPPort() *server.Server {
}
func resetPreviousHTTPConnections() {
http.DefaultTransport = &http.Transport{}
http.DefaultTransport.(*http.Transport).CloseIdleConnections()
}
// Make sure that we do not run the http server for monitoring unless asked.

View File

@@ -289,8 +289,6 @@ func TestStressSeedSolicitWorks(t *testing.T) {
var err error
maxTime := time.Now().Add(5 * time.Second)
for time.Now().Before(maxTime) {
resetPreviousHTTPConnections()
for j := 0; j < len(serversInfo); j++ {
err = checkConnected(t, serversInfo, j, true)
// If error, start this for loop from beginning
@@ -430,8 +428,6 @@ func TestStressChainedSolicitWorks(t *testing.T) {
var err error
maxTime := time.Now().Add(5 * time.Second)
for time.Now().Before(maxTime) {
resetPreviousHTTPConnections()
for j := 0; j < len(serversInfo); j++ {
err = checkConnected(t, serversInfo, j, false)
// If error, start this for loop from beginning
@@ -554,25 +550,22 @@ func expectRidsNoFatal(t *testing.T, direct bool, rz *server.Routez, rids []stri
// Helper to easily grab routez info.
func readHTTPRoutez(t *testing.T, url string) *server.Routez {
resetPreviousHTTPConnections()
resp, err := http.Get(url + "routez")
if err != nil {
t.Fatalf("Expected no error: Got %v\n", err)
}
if resp.StatusCode != 200 {
// Do one retry - FIXME(dlc) - Why does this fail when running the solicit tests b2b?
resp, _ = http.Get(url + "routez")
if resp.StatusCode != 200 {
t.Fatalf("Expected a 200 response, got %d\n", resp.StatusCode)
}
stackFatalf(t, "Expected no error: Got %v\n", err)
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
stackFatalf(t, "Expected a 200 response, got %d\n", resp.StatusCode)
}
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
t.Fatalf("Got an error reading the body: %v\n", err)
stackFatalf(t, "Got an error reading the body: %v\n", err)
}
r := server.Routez{}
if err := json.Unmarshal(body, &r); err != nil {
t.Fatalf("Got an error unmarshalling the body: %v\n", err)
stackFatalf(t, "Got an error unmarshalling the body: %v\n", err)
}
return &r
}