Merge pull request #603 from nats-io/fix_http_servers_timeout

[FIXED] Profiling and Monitoring timeout issues
This commit is contained in:
Ivan Kozlovic
2017-11-17 12:17:20 -07:00
committed by GitHub
4 changed files with 83 additions and 22 deletions

View File

@@ -85,6 +85,11 @@ type Server struct {
trace int32
debug int32
}
// Used by tests to check that http.Servers do
// not set any timeout.
monitoringServer *http.Server
profilingServer *http.Server
}
// Make sure all are 64bits for atomic use
@@ -477,20 +482,24 @@ func (s *Server) StartProfiler() {
srv := &http.Server{
Addr: hp,
Handler: http.DefaultServeMux,
ReadTimeout: 2 * time.Second,
WriteTimeout: 2 * time.Second,
MaxHeaderBytes: 1 << 20,
}
s.mu.Lock()
s.profiler = l
s.profilingServer = srv
s.mu.Unlock()
go func() {
// if this errors out, it's probably because the server is being shutdown
err := srv.Serve(l)
if err != nil {
s.Fatalf("error starting profiler: %s", err)
s.mu.Lock()
shutdown := s.shutdown
s.mu.Unlock()
if !shutdown {
s.Fatalf("error starting profiler: %s", err)
}
}
s.done <- true
}()
@@ -606,16 +615,18 @@ func (s *Server) startMonitoring(secure bool) error {
// Stacksz
mux.HandleFunc(StackszPath, s.HandleStacksz)
// Do not set a WriteTimeout because it could cause cURL/browser
// to return empty response or unable to display page if the
// server needs more time to build the response.
srv := &http.Server{
Addr: hp,
Handler: mux,
ReadTimeout: 2 * time.Second,
WriteTimeout: 2 * time.Second,
MaxHeaderBytes: 1 << 20,
}
s.mu.Lock()
s.http = httpListener
s.httpHandler = mux
s.monitoringServer = srv
s.mu.Unlock()
go func() {
@@ -919,7 +930,7 @@ func (s *Server) MonitorAddr() *net.TCPAddr {
return s.http.Addr().(*net.TCPAddr)
}
// RouteAddr returns the net.Addr object for the route listener.
// ClusterAddr returns the net.Addr object for the route listener.
func (s *Server) ClusterAddr() *net.TCPAddr {
s.mu.Lock()
defer s.mu.Unlock()
@@ -929,6 +940,16 @@ func (s *Server) ClusterAddr() *net.TCPAddr {
return s.routeListener.Addr().(*net.TCPAddr)
}
// ProfilerAddr returns the net.Addr object for the route listener.
func (s *Server) ProfilerAddr() *net.TCPAddr {
s.mu.Lock()
defer s.mu.Unlock()
if s.profiler == nil {
return nil
}
return s.profiler.Addr().(*net.TCPAddr)
}
// ReadyForConnections returns `true` if the server is ready to accept client
// and, if routing is enabled, route connections. If after the duration
// `dur` the server is still not ready, returns `false`.

View File

@@ -19,7 +19,6 @@ func DefaultOptions() *Options {
Port: -1,
HTTPPort: -1,
Cluster: ClusterOpts{Port: -1},
ProfPort: -1,
NoLog: true,
NoSigs: true,
Debug: true,
@@ -442,3 +441,51 @@ func TestCustomRouterAuthentication(t *testing.T) {
}
}
func TestMonitoringNoTimeout(t *testing.T) {
s := runMonitorServer()
defer s.Shutdown()
s.mu.Lock()
srv := s.monitoringServer
s.mu.Unlock()
if srv == nil {
t.Fatalf("Monitoring server not set")
}
if srv.ReadTimeout != 0 {
t.Fatalf("ReadTimeout should not be set, was set to %v", srv.ReadTimeout)
}
if srv.WriteTimeout != 0 {
t.Fatalf("WriteTimeout should not be set, was set to %v", srv.WriteTimeout)
}
}
func TestProfilingNoTimeout(t *testing.T) {
opts := DefaultOptions()
opts.ProfPort = -1
s := RunServer(opts)
defer s.Shutdown()
paddr := s.ProfilerAddr()
if paddr == nil {
t.Fatalf("Profiler not started")
}
pport := paddr.Port
if pport <= 0 {
t.Fatalf("Expected profiler port to be set, got %v", pport)
}
s.mu.Lock()
srv := s.profilingServer
s.mu.Unlock()
if srv == nil {
t.Fatalf("Profiling server not set")
}
if srv.ReadTimeout != 0 {
t.Fatalf("ReadTimeout should not be set, was set to %v", srv.ReadTimeout)
}
if srv.WriteTimeout != 0 {
t.Fatalf("WriteTimeout should not be set, was set to %v", srv.WriteTimeout)
}
}

View File

@@ -71,7 +71,7 @@ func runMonitorServerNoHTTPPort() *server.Server {
}
func resetPreviousHTTPConnections() {
http.DefaultTransport = &http.Transport{}
http.DefaultTransport.(*http.Transport).CloseIdleConnections()
}
// Make sure that we do not run the http server for monitoring unless asked.

View File

@@ -289,8 +289,6 @@ func TestStressSeedSolicitWorks(t *testing.T) {
var err error
maxTime := time.Now().Add(5 * time.Second)
for time.Now().Before(maxTime) {
resetPreviousHTTPConnections()
for j := 0; j < len(serversInfo); j++ {
err = checkConnected(t, serversInfo, j, true)
// If error, start this for loop from beginning
@@ -430,8 +428,6 @@ func TestStressChainedSolicitWorks(t *testing.T) {
var err error
maxTime := time.Now().Add(5 * time.Second)
for time.Now().Before(maxTime) {
resetPreviousHTTPConnections()
for j := 0; j < len(serversInfo); j++ {
err = checkConnected(t, serversInfo, j, false)
// If error, start this for loop from beginning
@@ -554,25 +550,22 @@ func expectRidsNoFatal(t *testing.T, direct bool, rz *server.Routez, rids []stri
// Helper to easily grab routez info.
func readHTTPRoutez(t *testing.T, url string) *server.Routez {
resetPreviousHTTPConnections()
resp, err := http.Get(url + "routez")
if err != nil {
t.Fatalf("Expected no error: Got %v\n", err)
}
if resp.StatusCode != 200 {
// Do one retry - FIXME(dlc) - Why does this fail when running the solicit tests b2b?
resp, _ = http.Get(url + "routez")
if resp.StatusCode != 200 {
t.Fatalf("Expected a 200 response, got %d\n", resp.StatusCode)
}
stackFatalf(t, "Expected no error: Got %v\n", err)
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
stackFatalf(t, "Expected a 200 response, got %d\n", resp.StatusCode)
}
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
t.Fatalf("Got an error reading the body: %v\n", err)
stackFatalf(t, "Got an error reading the body: %v\n", err)
}
r := server.Routez{}
if err := json.Unmarshal(body, &r); err != nil {
t.Fatalf("Got an error unmarshalling the body: %v\n", err)
stackFatalf(t, "Got an error unmarshalling the body: %v\n", err)
}
return &r
}