mirror of
https://github.com/gogrlx/nats-server.git
synced 2026-04-02 11:48:43 -07:00
We used these in tests and for experimenting with sandboxed environments like the demo network. Signed-off-by: Derek Collison <derek@nats.io>
3686 lines
99 KiB
Go
3686 lines
99 KiB
Go
// Copyright 2012-2022 The NATS Authors
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package server
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"crypto/tls"
|
|
"encoding/json"
|
|
"errors"
|
|
"flag"
|
|
"fmt"
|
|
"io"
|
|
"io/ioutil"
|
|
"log"
|
|
"math/rand"
|
|
"net"
|
|
"net/http"
|
|
"regexp"
|
|
|
|
// Allow dynamic profiling.
|
|
_ "net/http/pprof"
|
|
"os"
|
|
"path"
|
|
"path/filepath"
|
|
"runtime"
|
|
"strconv"
|
|
"strings"
|
|
"sync"
|
|
"sync/atomic"
|
|
"time"
|
|
|
|
"github.com/nats-io/jwt/v2"
|
|
"github.com/nats-io/nkeys"
|
|
"github.com/nats-io/nuid"
|
|
|
|
"github.com/nats-io/nats-server/v2/logger"
|
|
)
|
|
|
|
const (
|
|
// Interval for the first PING for non client connections.
|
|
firstPingInterval = time.Second
|
|
|
|
// This is for the first ping for client connections.
|
|
firstClientPingInterval = 2 * time.Second
|
|
)
|
|
|
|
// Info is the information sent to clients, routes, gateways, and leaf nodes,
|
|
// to help them understand information about this server.
|
|
type Info struct {
|
|
ID string `json:"server_id"`
|
|
Name string `json:"server_name"`
|
|
Version string `json:"version"`
|
|
Proto int `json:"proto"`
|
|
GitCommit string `json:"git_commit,omitempty"`
|
|
GoVersion string `json:"go"`
|
|
Host string `json:"host"`
|
|
Port int `json:"port"`
|
|
Headers bool `json:"headers"`
|
|
AuthRequired bool `json:"auth_required,omitempty"`
|
|
TLSRequired bool `json:"tls_required,omitempty"`
|
|
TLSVerify bool `json:"tls_verify,omitempty"`
|
|
TLSAvailable bool `json:"tls_available,omitempty"`
|
|
MaxPayload int32 `json:"max_payload"`
|
|
JetStream bool `json:"jetstream,omitempty"`
|
|
IP string `json:"ip,omitempty"`
|
|
CID uint64 `json:"client_id,omitempty"`
|
|
ClientIP string `json:"client_ip,omitempty"`
|
|
Nonce string `json:"nonce,omitempty"`
|
|
Cluster string `json:"cluster,omitempty"`
|
|
Dynamic bool `json:"cluster_dynamic,omitempty"`
|
|
Domain string `json:"domain,omitempty"`
|
|
ClientConnectURLs []string `json:"connect_urls,omitempty"` // Contains URLs a client can connect to.
|
|
WSConnectURLs []string `json:"ws_connect_urls,omitempty"` // Contains URLs a ws client can connect to.
|
|
LameDuckMode bool `json:"ldm,omitempty"`
|
|
|
|
// Route Specific
|
|
Import *SubjectPermission `json:"import,omitempty"`
|
|
Export *SubjectPermission `json:"export,omitempty"`
|
|
LNOC bool `json:"lnoc,omitempty"`
|
|
InfoOnConnect bool `json:"info_on_connect,omitempty"` // When true the server will respond to CONNECT with an INFO
|
|
ConnectInfo bool `json:"connect_info,omitempty"` // When true this is the server INFO response to CONNECT
|
|
|
|
// Gateways Specific
|
|
Gateway string `json:"gateway,omitempty"` // Name of the origin Gateway (sent by gateway's INFO)
|
|
GatewayURLs []string `json:"gateway_urls,omitempty"` // Gateway URLs in the originating cluster (sent by gateway's INFO)
|
|
GatewayURL string `json:"gateway_url,omitempty"` // Gateway URL on that server (sent by route's INFO)
|
|
GatewayCmd byte `json:"gateway_cmd,omitempty"` // Command code for the receiving server to know what to do
|
|
GatewayCmdPayload []byte `json:"gateway_cmd_payload,omitempty"` // Command payload when needed
|
|
GatewayNRP bool `json:"gateway_nrp,omitempty"` // Uses new $GNR. prefix for mapped replies
|
|
|
|
// LeafNode Specific
|
|
LeafNodeURLs []string `json:"leafnode_urls,omitempty"` // LeafNode URLs that the server can reconnect to.
|
|
RemoteAccount string `json:"remote_account,omitempty"` // Lets the other side know the remote account that they bind to.
|
|
}
|
|
|
|
// Server is our main struct.
|
|
type Server struct {
|
|
gcid uint64
|
|
stats
|
|
mu sync.Mutex
|
|
kp nkeys.KeyPair
|
|
prand *rand.Rand
|
|
info Info
|
|
configFile string
|
|
optsMu sync.RWMutex
|
|
opts *Options
|
|
running bool
|
|
shutdown bool
|
|
reloading bool
|
|
listener net.Listener
|
|
listenerErr error
|
|
gacc *Account
|
|
sys *internal
|
|
js *jetStream
|
|
accounts sync.Map
|
|
tmpAccounts sync.Map // Temporarily stores accounts that are being built
|
|
activeAccounts int32
|
|
accResolver AccountResolver
|
|
clients map[uint64]*client
|
|
routes map[uint64]*client
|
|
routesByHash sync.Map
|
|
remotes map[string]*client
|
|
leafs map[uint64]*client
|
|
users map[string]*User
|
|
nkeys map[string]*NkeyUser
|
|
totalClients uint64
|
|
closed *closedRingBuffer
|
|
done chan bool
|
|
start time.Time
|
|
http net.Listener
|
|
httpHandler http.Handler
|
|
httpBasePath string
|
|
profiler net.Listener
|
|
httpReqStats map[string]uint64
|
|
routeListener net.Listener
|
|
routeListenerErr error
|
|
routeInfo Info
|
|
routeInfoJSON []byte
|
|
routeResolver netResolver
|
|
routesToSelf map[string]struct{}
|
|
leafNodeListener net.Listener
|
|
leafNodeListenerErr error
|
|
leafNodeInfo Info
|
|
leafNodeInfoJSON []byte
|
|
leafURLsMap refCountedUrlSet
|
|
leafNodeOpts struct {
|
|
resolver netResolver
|
|
dialTimeout time.Duration
|
|
}
|
|
leafRemoteCfgs []*leafNodeCfg
|
|
leafRemoteAccounts sync.Map
|
|
|
|
quitCh chan struct{}
|
|
shutdownComplete chan struct{}
|
|
|
|
// Tracking Go routines
|
|
grMu sync.Mutex
|
|
grTmpClients map[uint64]*client
|
|
grRunning bool
|
|
grWG sync.WaitGroup // to wait on various go routines
|
|
|
|
cproto int64 // number of clients supporting async INFO
|
|
configTime time.Time // last time config was loaded
|
|
|
|
logging struct {
|
|
sync.RWMutex
|
|
logger Logger
|
|
trace int32
|
|
debug int32
|
|
traceSysAcc int32
|
|
}
|
|
|
|
clientConnectURLs []string
|
|
|
|
// Used internally for quick look-ups.
|
|
clientConnectURLsMap refCountedUrlSet
|
|
|
|
lastCURLsUpdate int64
|
|
|
|
// For Gateways
|
|
gatewayListener net.Listener // Accept listener
|
|
gatewayListenerErr error
|
|
gateway *srvGateway
|
|
|
|
// Used by tests to check that http.Servers do
|
|
// not set any timeout.
|
|
monitoringServer *http.Server
|
|
profilingServer *http.Server
|
|
|
|
// LameDuck mode
|
|
ldm bool
|
|
ldmCh chan bool
|
|
|
|
// Trusted public operator keys.
|
|
trustedKeys []string
|
|
// map of trusted keys to operator setting StrictSigningKeyUsage
|
|
strictSigningKeyUsage map[string]struct{}
|
|
|
|
// We use this to minimize mem copies for requests to monitoring
|
|
// endpoint /varz (when it comes from http).
|
|
varzMu sync.Mutex
|
|
varz *Varz
|
|
// This is set during a config reload if we detect that we have
|
|
// added/removed routes. The monitoring code then check that
|
|
// to know if it should update the cluster's URLs array.
|
|
varzUpdateRouteURLs bool
|
|
|
|
// Keeps a sublist of of subscriptions attached to leafnode connections
|
|
// for the $GNR.*.*.*.> subject so that a server can send back a mapped
|
|
// gateway reply.
|
|
gwLeafSubs *Sublist
|
|
|
|
// Used for expiration of mapped GW replies
|
|
gwrm struct {
|
|
w int32
|
|
ch chan time.Duration
|
|
m sync.Map
|
|
}
|
|
|
|
// For eventIDs
|
|
eventIds *nuid.NUID
|
|
|
|
// Websocket structure
|
|
websocket srvWebsocket
|
|
|
|
// MQTT structure
|
|
mqtt srvMQTT
|
|
|
|
// OCSP monitoring
|
|
ocsps []*OCSPMonitor
|
|
|
|
// exporting account name the importer experienced issues with
|
|
incompleteAccExporterMap sync.Map
|
|
|
|
// Holds cluster name under different lock for mapping
|
|
cnMu sync.RWMutex
|
|
cn string
|
|
|
|
// For registering raft nodes with the server.
|
|
rnMu sync.RWMutex
|
|
raftNodes map[string]RaftNode
|
|
|
|
// For mapping from a raft node name back to a server name and cluster. Node has to be in the same domain.
|
|
nodeToInfo sync.Map
|
|
|
|
// For out of resources to not log errors too fast.
|
|
rerrMu sync.Mutex
|
|
rerrLast time.Time
|
|
|
|
connRateCounter *rateCounter
|
|
|
|
// If there is a system account configured, to still support the $G account,
|
|
// the server will create a fake user and add it to the list of users.
|
|
// Keep track of what that user name is for config reload purposes.
|
|
sysAccOnlyNoAuthUser string
|
|
|
|
// How often user logon fails due to the issuer account not being pinned.
|
|
pinnedAccFail uint64
|
|
|
|
// This is a central logger for IPQueues when the number of pending
|
|
// messages reaches a certain thresold (per queue)
|
|
ipqLog *srvIPQueueLogger
|
|
}
|
|
|
|
type srvIPQueueLogger struct {
|
|
ch chan string
|
|
done chan struct{}
|
|
s *Server
|
|
}
|
|
|
|
// For tracking JS nodes.
|
|
type nodeInfo struct {
|
|
name string
|
|
version string
|
|
cluster string
|
|
domain string
|
|
id string
|
|
cfg *JetStreamConfig
|
|
stats *JetStreamStats
|
|
offline bool
|
|
js bool
|
|
}
|
|
|
|
// Make sure all are 64bits for atomic use
|
|
type stats struct {
|
|
inMsgs int64
|
|
outMsgs int64
|
|
inBytes int64
|
|
outBytes int64
|
|
slowConsumers int64
|
|
}
|
|
|
|
// New will setup a new server struct after parsing the options.
|
|
// DEPRECATED: Use NewServer(opts)
|
|
func New(opts *Options) *Server {
|
|
s, _ := NewServer(opts)
|
|
return s
|
|
}
|
|
|
|
// NewServer will setup a new server struct after parsing the options.
|
|
// Could return an error if options can not be validated.
|
|
func NewServer(opts *Options) (*Server, error) {
|
|
setBaselineOptions(opts)
|
|
|
|
// Process TLS options, including whether we require client certificates.
|
|
tlsReq := opts.TLSConfig != nil
|
|
verify := (tlsReq && opts.TLSConfig.ClientAuth == tls.RequireAndVerifyClientCert)
|
|
|
|
// Created server's nkey identity.
|
|
kp, _ := nkeys.CreateServer()
|
|
pub, _ := kp.PublicKey()
|
|
|
|
serverName := pub
|
|
if opts.ServerName != _EMPTY_ {
|
|
serverName = opts.ServerName
|
|
}
|
|
|
|
httpBasePath := normalizeBasePath(opts.HTTPBasePath)
|
|
|
|
// Validate some options. This is here because we cannot assume that
|
|
// server will always be started with configuration parsing (that could
|
|
// report issues). Its options can be (incorrectly) set by hand when
|
|
// server is embedded. If there is an error, return nil.
|
|
if err := validateOptions(opts); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
info := Info{
|
|
ID: pub,
|
|
Version: VERSION,
|
|
Proto: PROTO,
|
|
GitCommit: gitCommit,
|
|
GoVersion: runtime.Version(),
|
|
Name: serverName,
|
|
Host: opts.Host,
|
|
Port: opts.Port,
|
|
AuthRequired: false,
|
|
TLSRequired: tlsReq && !opts.AllowNonTLS,
|
|
TLSVerify: verify,
|
|
MaxPayload: opts.MaxPayload,
|
|
JetStream: opts.JetStream,
|
|
Headers: !opts.NoHeaderSupport,
|
|
Cluster: opts.Cluster.Name,
|
|
Domain: opts.JetStreamDomain,
|
|
}
|
|
|
|
if tlsReq && !info.TLSRequired {
|
|
info.TLSAvailable = true
|
|
}
|
|
|
|
now := time.Now().UTC()
|
|
|
|
s := &Server{
|
|
kp: kp,
|
|
configFile: opts.ConfigFile,
|
|
info: info,
|
|
prand: rand.New(rand.NewSource(time.Now().UnixNano())),
|
|
opts: opts,
|
|
done: make(chan bool, 1),
|
|
start: now,
|
|
configTime: now,
|
|
gwLeafSubs: NewSublistWithCache(),
|
|
httpBasePath: httpBasePath,
|
|
eventIds: nuid.New(),
|
|
routesToSelf: make(map[string]struct{}),
|
|
httpReqStats: make(map[string]uint64), // Used to track HTTP requests
|
|
}
|
|
|
|
if opts.TLSRateLimit > 0 {
|
|
s.connRateCounter = newRateCounter(opts.tlsConfigOpts.RateLimit)
|
|
}
|
|
|
|
// Trusted root operator keys.
|
|
if !s.processTrustedKeys() {
|
|
return nil, fmt.Errorf("Error processing trusted operator keys")
|
|
}
|
|
|
|
// If we have solicited leafnodes but no clustering and no clustername.
|
|
// However we may need a stable clustername so use the server name.
|
|
if len(opts.LeafNode.Remotes) > 0 && opts.Cluster.Port == 0 && opts.Cluster.Name == _EMPTY_ {
|
|
opts.Cluster.Name = opts.ServerName
|
|
}
|
|
|
|
if opts.Cluster.Name != _EMPTY_ {
|
|
// Also place into mapping cn with cnMu lock.
|
|
s.cnMu.Lock()
|
|
s.cn = opts.Cluster.Name
|
|
s.cnMu.Unlock()
|
|
}
|
|
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
|
|
// Place ourselves in the JetStream nodeInfo if needed.
|
|
if opts.JetStream {
|
|
ourNode := string(getHash(serverName))
|
|
s.nodeToInfo.Store(ourNode, nodeInfo{
|
|
serverName,
|
|
VERSION,
|
|
opts.Cluster.Name,
|
|
opts.JetStreamDomain,
|
|
info.ID,
|
|
&JetStreamConfig{MaxMemory: opts.JetStreamMaxMemory, MaxStore: opts.JetStreamMaxStore},
|
|
nil,
|
|
false, true,
|
|
})
|
|
}
|
|
|
|
s.routeResolver = opts.Cluster.resolver
|
|
if s.routeResolver == nil {
|
|
s.routeResolver = net.DefaultResolver
|
|
}
|
|
|
|
// Used internally for quick look-ups.
|
|
s.clientConnectURLsMap = make(refCountedUrlSet)
|
|
s.websocket.connectURLsMap = make(refCountedUrlSet)
|
|
s.leafURLsMap = make(refCountedUrlSet)
|
|
|
|
// Ensure that non-exported options (used in tests) are properly set.
|
|
s.setLeafNodeNonExportedOptions()
|
|
|
|
// Setup OCSP Stapling. This will abort server from starting if there
|
|
// are no valid staples and OCSP policy is to Always or MustStaple.
|
|
if err := s.enableOCSP(); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Call this even if there is no gateway defined. It will
|
|
// initialize the structure so we don't have to check for
|
|
// it to be nil or not in various places in the code.
|
|
if err := s.newGateway(opts); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// If we have a cluster definition but do not have a cluster name, create one.
|
|
if opts.Cluster.Port != 0 && opts.Cluster.Name == _EMPTY_ {
|
|
s.info.Cluster = nuid.Next()
|
|
} else if opts.Cluster.Name != _EMPTY_ {
|
|
// Likewise here if we have a cluster name set.
|
|
s.info.Cluster = opts.Cluster.Name
|
|
}
|
|
|
|
// This is normally done in the AcceptLoop, once the
|
|
// listener has been created (possibly with random port),
|
|
// but since some tests may expect the INFO to be properly
|
|
// set after New(), let's do it now.
|
|
s.setInfoHostPort()
|
|
|
|
// For tracking clients
|
|
s.clients = make(map[uint64]*client)
|
|
|
|
// For tracking closed clients.
|
|
s.closed = newClosedRingBuffer(opts.MaxClosedClients)
|
|
|
|
// For tracking connections that are not yet registered
|
|
// in s.routes, but for which readLoop has started.
|
|
s.grTmpClients = make(map[uint64]*client)
|
|
|
|
// For tracking routes and their remote ids
|
|
s.routes = make(map[uint64]*client)
|
|
s.remotes = make(map[string]*client)
|
|
|
|
// For tracking leaf nodes.
|
|
s.leafs = make(map[uint64]*client)
|
|
|
|
// Used to kick out all go routines possibly waiting on server
|
|
// to shutdown.
|
|
s.quitCh = make(chan struct{})
|
|
// Closed when Shutdown() is complete. Allows WaitForShutdown() to block
|
|
// waiting for complete shutdown.
|
|
s.shutdownComplete = make(chan struct{})
|
|
|
|
// Check for configured account resolvers.
|
|
if err := s.configureResolver(); err != nil {
|
|
return nil, err
|
|
}
|
|
// If there is an URL account resolver, do basic test to see if anyone is home.
|
|
if ar := opts.AccountResolver; ar != nil {
|
|
if ur, ok := ar.(*URLAccResolver); ok {
|
|
if _, err := ur.Fetch(""); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
}
|
|
// For other resolver:
|
|
// In operator mode, when the account resolver depends on an external system and
|
|
// the system account can't be fetched, inject a temporary one.
|
|
if ar := s.accResolver; len(opts.TrustedOperators) == 1 && ar != nil &&
|
|
opts.SystemAccount != _EMPTY_ && opts.SystemAccount != DEFAULT_SYSTEM_ACCOUNT {
|
|
if _, ok := ar.(*MemAccResolver); !ok {
|
|
s.mu.Unlock()
|
|
var a *Account
|
|
// perform direct lookup to avoid warning trace
|
|
if _, err := fetchAccount(ar, s.opts.SystemAccount); err == nil {
|
|
a, _ = s.lookupAccount(s.opts.SystemAccount)
|
|
}
|
|
s.mu.Lock()
|
|
if a == nil {
|
|
sac := NewAccount(s.opts.SystemAccount)
|
|
sac.Issuer = opts.TrustedOperators[0].Issuer
|
|
sac.signingKeys = map[string]jwt.Scope{}
|
|
sac.signingKeys[s.opts.SystemAccount] = nil
|
|
s.registerAccountNoLock(sac)
|
|
}
|
|
}
|
|
}
|
|
|
|
// For tracking accounts
|
|
if err := s.configureAccounts(); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Used to setup Authorization.
|
|
s.configureAuthorization()
|
|
|
|
// Start signal handler
|
|
s.handleSignals()
|
|
|
|
return s, nil
|
|
}
|
|
|
|
var semVerRe = regexp.MustCompile(`\Av?([0-9]+)\.?([0-9]+)?\.?([0-9]+)?`)
|
|
|
|
func versionComponents(version string) (major, minor, patch int, err error) {
|
|
m := semVerRe.FindStringSubmatch(version)
|
|
if m == nil {
|
|
return 0, 0, 0, errors.New("invalid semver")
|
|
}
|
|
major, err = strconv.Atoi(m[1])
|
|
if err != nil {
|
|
return -1, -1, -1, err
|
|
}
|
|
minor, err = strconv.Atoi(m[2])
|
|
if err != nil {
|
|
return -1, -1, -1, err
|
|
}
|
|
patch, err = strconv.Atoi(m[3])
|
|
if err != nil {
|
|
return -1, -1, -1, err
|
|
}
|
|
return major, minor, patch, err
|
|
}
|
|
|
|
func versionAtLeast(version string, emajor, eminor, epatch int) bool {
|
|
major, minor, patch, err := versionComponents(version)
|
|
if err != nil {
|
|
return false
|
|
}
|
|
if major < emajor || minor < eminor || patch < epatch {
|
|
return false
|
|
}
|
|
return true
|
|
}
|
|
|
|
func (s *Server) logRejectedTLSConns() {
|
|
defer s.grWG.Done()
|
|
t := time.NewTicker(time.Second)
|
|
defer t.Stop()
|
|
for {
|
|
select {
|
|
case <-s.quitCh:
|
|
return
|
|
case <-t.C:
|
|
blocked := s.connRateCounter.countBlocked()
|
|
if blocked > 0 {
|
|
s.Warnf("Rejected %d connections due to TLS rate limiting", blocked)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// clusterName returns our cluster name which could be dynamic.
|
|
func (s *Server) ClusterName() string {
|
|
s.mu.Lock()
|
|
cn := s.info.Cluster
|
|
s.mu.Unlock()
|
|
return cn
|
|
}
|
|
|
|
// Grabs cluster name with cluster name specific lock.
|
|
func (s *Server) cachedClusterName() string {
|
|
s.cnMu.RLock()
|
|
cn := s.cn
|
|
s.cnMu.RUnlock()
|
|
return cn
|
|
}
|
|
|
|
// setClusterName will update the cluster name for this server.
|
|
func (s *Server) setClusterName(name string) {
|
|
s.mu.Lock()
|
|
var resetCh chan struct{}
|
|
if s.sys != nil && s.info.Cluster != name {
|
|
// can't hold the lock as go routine reading it may be waiting for lock as well
|
|
resetCh = s.sys.resetCh
|
|
}
|
|
s.info.Cluster = name
|
|
s.routeInfo.Cluster = name
|
|
|
|
// Regenerate the info byte array
|
|
s.generateRouteInfoJSON()
|
|
// Need to close solicited leaf nodes. The close has to be done outside of the server lock.
|
|
var leafs []*client
|
|
for _, c := range s.leafs {
|
|
c.mu.Lock()
|
|
if c.leaf != nil && c.leaf.remote != nil {
|
|
leafs = append(leafs, c)
|
|
}
|
|
c.mu.Unlock()
|
|
}
|
|
s.mu.Unlock()
|
|
|
|
// Also place into mapping cn with cnMu lock.
|
|
s.cnMu.Lock()
|
|
s.cn = name
|
|
s.cnMu.Unlock()
|
|
|
|
for _, l := range leafs {
|
|
l.closeConnection(ClusterNameConflict)
|
|
}
|
|
if resetCh != nil {
|
|
resetCh <- struct{}{}
|
|
}
|
|
s.Noticef("Cluster name updated to %s", name)
|
|
}
|
|
|
|
// Return whether the cluster name is dynamic.
|
|
func (s *Server) isClusterNameDynamic() bool {
|
|
return s.getOpts().Cluster.Name == _EMPTY_
|
|
}
|
|
|
|
// Returns our configured serverName.
|
|
func (s *Server) serverName() string {
|
|
return s.getOpts().ServerName
|
|
}
|
|
|
|
// ClientURL returns the URL used to connect clients. Helpful in testing
|
|
// when we designate a random client port (-1).
|
|
func (s *Server) ClientURL() string {
|
|
// FIXME(dlc) - should we add in user and pass if defined single?
|
|
opts := s.getOpts()
|
|
scheme := "nats://"
|
|
if opts.TLSConfig != nil {
|
|
scheme = "tls://"
|
|
}
|
|
return fmt.Sprintf("%s%s:%d", scheme, opts.Host, opts.Port)
|
|
}
|
|
|
|
func validateCluster(o *Options) error {
|
|
if err := validatePinnedCerts(o.Cluster.TLSPinnedCerts); err != nil {
|
|
return fmt.Errorf("cluster: %v", err)
|
|
}
|
|
// Check that cluster name if defined matches any gateway name.
|
|
if o.Gateway.Name != "" && o.Gateway.Name != o.Cluster.Name {
|
|
if o.Cluster.Name != "" {
|
|
return ErrClusterNameConfigConflict
|
|
}
|
|
// Set this here so we do not consider it dynamic.
|
|
o.Cluster.Name = o.Gateway.Name
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func validatePinnedCerts(pinned PinnedCertSet) error {
|
|
re := regexp.MustCompile("^[a-f0-9]{64}$")
|
|
for certId := range pinned {
|
|
entry := strings.ToLower(certId)
|
|
if !re.MatchString(entry) {
|
|
return fmt.Errorf("error parsing 'pinned_certs' key %s does not look like lower case hex-encoded sha256 of DER encoded SubjectPublicKeyInfo", entry)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func validateOptions(o *Options) error {
|
|
if o.LameDuckDuration > 0 && o.LameDuckGracePeriod >= o.LameDuckDuration {
|
|
return fmt.Errorf("lame duck grace period (%v) should be strictly lower than lame duck duration (%v)",
|
|
o.LameDuckGracePeriod, o.LameDuckDuration)
|
|
}
|
|
if int64(o.MaxPayload) > o.MaxPending {
|
|
return fmt.Errorf("max_payload (%v) cannot be higher than max_pending (%v)",
|
|
o.MaxPayload, o.MaxPending)
|
|
}
|
|
// Check that the trust configuration is correct.
|
|
if err := validateTrustedOperators(o); err != nil {
|
|
return err
|
|
}
|
|
// Check on leaf nodes which will require a system
|
|
// account when gateways are also configured.
|
|
if err := validateLeafNode(o); err != nil {
|
|
return err
|
|
}
|
|
// Check that authentication is properly configured.
|
|
if err := validateAuth(o); err != nil {
|
|
return err
|
|
}
|
|
// Check that gateway is properly configured. Returns no error
|
|
// if there is no gateway defined.
|
|
if err := validateGatewayOptions(o); err != nil {
|
|
return err
|
|
}
|
|
// Check that cluster name if defined matches any gateway name.
|
|
if err := validateCluster(o); err != nil {
|
|
return err
|
|
}
|
|
if err := validateMQTTOptions(o); err != nil {
|
|
return err
|
|
}
|
|
if err := validateJetStreamOptions(o); err != nil {
|
|
return err
|
|
}
|
|
// Finally check websocket options.
|
|
return validateWebsocketOptions(o)
|
|
}
|
|
|
|
func (s *Server) getOpts() *Options {
|
|
s.optsMu.RLock()
|
|
opts := s.opts
|
|
s.optsMu.RUnlock()
|
|
return opts
|
|
}
|
|
|
|
func (s *Server) setOpts(opts *Options) {
|
|
s.optsMu.Lock()
|
|
s.opts = opts
|
|
s.optsMu.Unlock()
|
|
}
|
|
|
|
func (s *Server) globalAccount() *Account {
|
|
s.mu.Lock()
|
|
gacc := s.gacc
|
|
s.mu.Unlock()
|
|
return gacc
|
|
}
|
|
|
|
// Used to setup Accounts.
|
|
// Lock is held upon entry.
|
|
func (s *Server) configureAccounts() error {
|
|
// Create the global account.
|
|
if s.gacc == nil {
|
|
s.gacc = NewAccount(globalAccountName)
|
|
s.registerAccountNoLock(s.gacc)
|
|
}
|
|
|
|
opts := s.opts
|
|
|
|
// Check opts and walk through them. We need to copy them here
|
|
// so that we do not keep a real one sitting in the options.
|
|
for _, acc := range s.opts.Accounts {
|
|
var a *Account
|
|
if acc.Name == globalAccountName {
|
|
a = s.gacc
|
|
} else {
|
|
a = acc.shallowCopy()
|
|
}
|
|
if acc.hasMappings() {
|
|
// For now just move and wipe from opts.Accounts version.
|
|
a.mappings = acc.mappings
|
|
acc.mappings = nil
|
|
// We use this for selecting between multiple weighted destinations.
|
|
a.prand = rand.New(rand.NewSource(time.Now().UnixNano()))
|
|
}
|
|
acc.sl = nil
|
|
acc.clients = nil
|
|
s.registerAccountNoLock(a)
|
|
|
|
// If we see an account defined using $SYS we will make sure that is set as system account.
|
|
if acc.Name == DEFAULT_SYSTEM_ACCOUNT && opts.SystemAccount == _EMPTY_ {
|
|
s.opts.SystemAccount = DEFAULT_SYSTEM_ACCOUNT
|
|
}
|
|
}
|
|
|
|
// Now that we have this we need to remap any referenced accounts in
|
|
// import or export maps to the new ones.
|
|
swapApproved := func(ea *exportAuth) {
|
|
for sub, a := range ea.approved {
|
|
var acc *Account
|
|
if v, ok := s.accounts.Load(a.Name); ok {
|
|
acc = v.(*Account)
|
|
}
|
|
ea.approved[sub] = acc
|
|
}
|
|
}
|
|
var numAccounts int
|
|
s.accounts.Range(func(k, v interface{}) bool {
|
|
numAccounts++
|
|
acc := v.(*Account)
|
|
// Exports
|
|
for _, se := range acc.exports.streams {
|
|
if se != nil {
|
|
swapApproved(&se.exportAuth)
|
|
}
|
|
}
|
|
for _, se := range acc.exports.services {
|
|
if se != nil {
|
|
// Swap over the bound account for service exports.
|
|
if se.acc != nil {
|
|
if v, ok := s.accounts.Load(se.acc.Name); ok {
|
|
se.acc = v.(*Account)
|
|
}
|
|
}
|
|
swapApproved(&se.exportAuth)
|
|
}
|
|
}
|
|
// Imports
|
|
for _, si := range acc.imports.streams {
|
|
if v, ok := s.accounts.Load(si.acc.Name); ok {
|
|
si.acc = v.(*Account)
|
|
}
|
|
}
|
|
for _, si := range acc.imports.services {
|
|
if v, ok := s.accounts.Load(si.acc.Name); ok {
|
|
si.acc = v.(*Account)
|
|
si.se = si.acc.getServiceExport(si.to)
|
|
}
|
|
}
|
|
// Make sure the subs are running, but only if not reloading.
|
|
if len(acc.imports.services) > 0 && acc.ic == nil && !s.reloading {
|
|
acc.ic = s.createInternalAccountClient()
|
|
acc.ic.acc = acc
|
|
acc.addAllServiceImportSubs()
|
|
}
|
|
acc.updated = time.Now().UTC()
|
|
return true
|
|
})
|
|
|
|
// Set the system account if it was configured.
|
|
// Otherwise create a default one.
|
|
if opts.SystemAccount != _EMPTY_ {
|
|
// Lock may be acquired in lookupAccount, so release to call lookupAccount.
|
|
s.mu.Unlock()
|
|
acc, err := s.lookupAccount(opts.SystemAccount)
|
|
s.mu.Lock()
|
|
if err == nil && s.sys != nil && acc != s.sys.account {
|
|
// sys.account.clients (including internal client)/respmap/etc... are transferred separately
|
|
s.sys.account = acc
|
|
s.mu.Unlock()
|
|
// acquires server lock separately
|
|
s.addSystemAccountExports(acc)
|
|
s.mu.Lock()
|
|
}
|
|
if err != nil {
|
|
return fmt.Errorf("error resolving system account: %v", err)
|
|
}
|
|
|
|
// If we have defined a system account here check to see if its just us and the $G account.
|
|
// We would do this to add user/pass to the system account. If this is the case add in
|
|
// no-auth-user for $G.
|
|
if numAccounts == 2 && s.opts.NoAuthUser == _EMPTY_ {
|
|
// If we come here from config reload, let's not recreate the fake user name otherwise
|
|
// it will cause currently clients to be disconnected.
|
|
uname := s.sysAccOnlyNoAuthUser
|
|
if uname == _EMPTY_ {
|
|
// Create a unique name so we do not collide.
|
|
var b [8]byte
|
|
rn := rand.Int63()
|
|
for i, l := 0, rn; i < len(b); i++ {
|
|
b[i] = digits[l%base]
|
|
l /= base
|
|
}
|
|
uname = fmt.Sprintf("nats-%s", b[:])
|
|
s.sysAccOnlyNoAuthUser = uname
|
|
}
|
|
s.opts.Users = append(s.opts.Users, &User{Username: uname, Password: uname[6:], Account: s.gacc})
|
|
s.opts.NoAuthUser = uname
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// Setup the account resolver. For memory resolver, make sure the JWTs are
|
|
// properly formed but do not enforce expiration etc.
|
|
func (s *Server) configureResolver() error {
|
|
opts := s.getOpts()
|
|
s.accResolver = opts.AccountResolver
|
|
if opts.AccountResolver != nil {
|
|
// For URL resolver, set the TLSConfig if specified.
|
|
if opts.AccountResolverTLSConfig != nil {
|
|
if ar, ok := opts.AccountResolver.(*URLAccResolver); ok {
|
|
if t, ok := ar.c.Transport.(*http.Transport); ok {
|
|
t.CloseIdleConnections()
|
|
t.TLSClientConfig = opts.AccountResolverTLSConfig.Clone()
|
|
}
|
|
}
|
|
}
|
|
if len(opts.resolverPreloads) > 0 {
|
|
if s.accResolver.IsReadOnly() {
|
|
return fmt.Errorf("resolver preloads only available for writeable resolver types MEM/DIR/CACHE_DIR")
|
|
}
|
|
for k, v := range opts.resolverPreloads {
|
|
_, err := jwt.DecodeAccountClaims(v)
|
|
if err != nil {
|
|
return fmt.Errorf("preload account error for %q: %v", k, err)
|
|
}
|
|
s.accResolver.Store(k, v)
|
|
}
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// This will check preloads for validation issues.
|
|
func (s *Server) checkResolvePreloads() {
|
|
opts := s.getOpts()
|
|
// We can just check the read-only opts versions here, that way we do not need
|
|
// to grab server lock or access s.accResolver.
|
|
for k, v := range opts.resolverPreloads {
|
|
claims, err := jwt.DecodeAccountClaims(v)
|
|
if err != nil {
|
|
s.Errorf("Preloaded account [%s] not valid", k)
|
|
continue
|
|
}
|
|
// Check if it is expired.
|
|
vr := jwt.CreateValidationResults()
|
|
claims.Validate(vr)
|
|
if vr.IsBlocking(true) {
|
|
s.Warnf("Account [%s] has validation issues:", k)
|
|
for _, v := range vr.Issues {
|
|
s.Warnf(" - %s", v.Description)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
func (s *Server) generateRouteInfoJSON() {
|
|
b, _ := json.Marshal(s.routeInfo)
|
|
pcs := [][]byte{[]byte("INFO"), b, []byte(CR_LF)}
|
|
s.routeInfoJSON = bytes.Join(pcs, []byte(" "))
|
|
}
|
|
|
|
// Determines if we are in pre NATS 2.0 setup with no accounts.
|
|
func (s *Server) globalAccountOnly() bool {
|
|
var hasOthers bool
|
|
|
|
if s.trustedKeys != nil {
|
|
return false
|
|
}
|
|
|
|
s.mu.Lock()
|
|
s.accounts.Range(func(k, v interface{}) bool {
|
|
acc := v.(*Account)
|
|
// Ignore global and system
|
|
if acc == s.gacc || (s.sys != nil && acc == s.sys.account) {
|
|
return true
|
|
}
|
|
hasOthers = true
|
|
return false
|
|
})
|
|
s.mu.Unlock()
|
|
|
|
return !hasOthers
|
|
}
|
|
|
|
// Determines if this server is in standalone mode, meaning no routes or gateways.
|
|
func (s *Server) standAloneMode() bool {
|
|
opts := s.getOpts()
|
|
return opts.Cluster.Port == 0 && opts.Gateway.Port == 0
|
|
}
|
|
|
|
func (s *Server) configuredRoutes() int {
|
|
return len(s.getOpts().Routes)
|
|
}
|
|
|
|
// activePeers is used in bootstrapping raft groups like the JetStream meta controller.
|
|
func (s *Server) ActivePeers() (peers []string) {
|
|
s.nodeToInfo.Range(func(k, v interface{}) bool {
|
|
si := v.(nodeInfo)
|
|
if !si.offline {
|
|
peers = append(peers, k.(string))
|
|
}
|
|
return true
|
|
})
|
|
return peers
|
|
}
|
|
|
|
// isTrustedIssuer will check that the issuer is a trusted public key.
|
|
// This is used to make sure an account was signed by a trusted operator.
|
|
func (s *Server) isTrustedIssuer(issuer string) bool {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
// If we are not running in trusted mode and there is no issuer, that is ok.
|
|
if s.trustedKeys == nil && issuer == "" {
|
|
return true
|
|
}
|
|
for _, tk := range s.trustedKeys {
|
|
if tk == issuer {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
// processTrustedKeys will process binary stamped and
|
|
// options-based trusted nkeys. Returns success.
|
|
func (s *Server) processTrustedKeys() bool {
|
|
s.strictSigningKeyUsage = map[string]struct{}{}
|
|
if trustedKeys != "" && !s.initStampedTrustedKeys() {
|
|
return false
|
|
} else if s.opts.TrustedKeys != nil {
|
|
for _, key := range s.opts.TrustedKeys {
|
|
if !nkeys.IsValidPublicOperatorKey(key) {
|
|
return false
|
|
}
|
|
}
|
|
s.trustedKeys = append([]string(nil), s.opts.TrustedKeys...)
|
|
for _, claim := range s.opts.TrustedOperators {
|
|
if !claim.StrictSigningKeyUsage {
|
|
continue
|
|
}
|
|
for _, key := range claim.SigningKeys {
|
|
s.strictSigningKeyUsage[key] = struct{}{}
|
|
}
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
// checkTrustedKeyString will check that the string is a valid array
|
|
// of public operator nkeys.
|
|
func checkTrustedKeyString(keys string) []string {
|
|
tks := strings.Fields(keys)
|
|
if len(tks) == 0 {
|
|
return nil
|
|
}
|
|
// Walk all the keys and make sure they are valid.
|
|
for _, key := range tks {
|
|
if !nkeys.IsValidPublicOperatorKey(key) {
|
|
return nil
|
|
}
|
|
}
|
|
return tks
|
|
}
|
|
|
|
// initStampedTrustedKeys will check the stamped trusted keys
|
|
// and will set the server field 'trustedKeys'. Returns whether
|
|
// it succeeded or not.
|
|
func (s *Server) initStampedTrustedKeys() bool {
|
|
// Check to see if we have an override in options, which will cause us to fail.
|
|
if len(s.opts.TrustedKeys) > 0 {
|
|
return false
|
|
}
|
|
tks := checkTrustedKeyString(trustedKeys)
|
|
if len(tks) == 0 {
|
|
return false
|
|
}
|
|
s.trustedKeys = tks
|
|
return true
|
|
}
|
|
|
|
// PrintAndDie is exported for access in other packages.
|
|
func PrintAndDie(msg string) {
|
|
fmt.Fprintln(os.Stderr, msg)
|
|
os.Exit(1)
|
|
}
|
|
|
|
// PrintServerAndExit will print our version and exit.
|
|
func PrintServerAndExit() {
|
|
fmt.Printf("nats-server: v%s\n", VERSION)
|
|
os.Exit(0)
|
|
}
|
|
|
|
// ProcessCommandLineArgs takes the command line arguments
|
|
// validating and setting flags for handling in case any
|
|
// sub command was present.
|
|
func ProcessCommandLineArgs(cmd *flag.FlagSet) (showVersion bool, showHelp bool, err error) {
|
|
if len(cmd.Args()) > 0 {
|
|
arg := cmd.Args()[0]
|
|
switch strings.ToLower(arg) {
|
|
case "version":
|
|
return true, false, nil
|
|
case "help":
|
|
return false, true, nil
|
|
default:
|
|
return false, false, fmt.Errorf("unrecognized command: %q", arg)
|
|
}
|
|
}
|
|
|
|
return false, false, nil
|
|
}
|
|
|
|
// Public version.
|
|
func (s *Server) Running() bool {
|
|
return s.isRunning()
|
|
}
|
|
|
|
// Protected check on running state
|
|
func (s *Server) isRunning() bool {
|
|
s.mu.Lock()
|
|
running := s.running
|
|
s.mu.Unlock()
|
|
return running
|
|
}
|
|
|
|
func (s *Server) logPid() error {
|
|
pidStr := strconv.Itoa(os.Getpid())
|
|
return ioutil.WriteFile(s.getOpts().PidFile, []byte(pidStr), 0660)
|
|
}
|
|
|
|
// numReservedAccounts will return the number of reserved accounts configured in the server.
|
|
// Currently this is 1, one for the global default account.
|
|
func (s *Server) numReservedAccounts() int {
|
|
return 1
|
|
}
|
|
|
|
// NumActiveAccounts reports number of active accounts on this server.
|
|
func (s *Server) NumActiveAccounts() int32 {
|
|
return atomic.LoadInt32(&s.activeAccounts)
|
|
}
|
|
|
|
// incActiveAccounts() just adds one under lock.
|
|
func (s *Server) incActiveAccounts() {
|
|
atomic.AddInt32(&s.activeAccounts, 1)
|
|
}
|
|
|
|
// decActiveAccounts() just subtracts one under lock.
|
|
func (s *Server) decActiveAccounts() {
|
|
atomic.AddInt32(&s.activeAccounts, -1)
|
|
}
|
|
|
|
// This should be used for testing only. Will be slow since we have to
|
|
// range over all accounts in the sync.Map to count.
|
|
func (s *Server) numAccounts() int {
|
|
count := 0
|
|
s.mu.Lock()
|
|
s.accounts.Range(func(k, v interface{}) bool {
|
|
count++
|
|
return true
|
|
})
|
|
s.mu.Unlock()
|
|
return count
|
|
}
|
|
|
|
// NumLoadedAccounts returns the number of loaded accounts.
|
|
func (s *Server) NumLoadedAccounts() int {
|
|
return s.numAccounts()
|
|
}
|
|
|
|
// LookupOrRegisterAccount will return the given account if known or create a new entry.
|
|
func (s *Server) LookupOrRegisterAccount(name string) (account *Account, isNew bool) {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
if v, ok := s.accounts.Load(name); ok {
|
|
return v.(*Account), false
|
|
}
|
|
acc := NewAccount(name)
|
|
s.registerAccountNoLock(acc)
|
|
return acc, true
|
|
}
|
|
|
|
// RegisterAccount will register an account. The account must be new
|
|
// or this call will fail.
|
|
func (s *Server) RegisterAccount(name string) (*Account, error) {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
if _, ok := s.accounts.Load(name); ok {
|
|
return nil, ErrAccountExists
|
|
}
|
|
acc := NewAccount(name)
|
|
s.registerAccountNoLock(acc)
|
|
return acc, nil
|
|
}
|
|
|
|
// SetSystemAccount will set the internal system account.
|
|
// If root operators are present it will also check validity.
|
|
func (s *Server) SetSystemAccount(accName string) error {
|
|
// Lookup from sync.Map first.
|
|
if v, ok := s.accounts.Load(accName); ok {
|
|
return s.setSystemAccount(v.(*Account))
|
|
}
|
|
|
|
// If we are here we do not have local knowledge of this account.
|
|
// Do this one by hand to return more useful error.
|
|
ac, jwt, err := s.fetchAccountClaims(accName)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
acc := s.buildInternalAccount(ac)
|
|
acc.claimJWT = jwt
|
|
// Due to race, we need to make sure that we are not
|
|
// registering twice.
|
|
if racc := s.registerAccount(acc); racc != nil {
|
|
return nil
|
|
}
|
|
return s.setSystemAccount(acc)
|
|
}
|
|
|
|
// SystemAccount returns the system account if set.
|
|
func (s *Server) SystemAccount() *Account {
|
|
var sacc *Account
|
|
s.mu.Lock()
|
|
if s.sys != nil {
|
|
sacc = s.sys.account
|
|
}
|
|
s.mu.Unlock()
|
|
return sacc
|
|
}
|
|
|
|
// GlobalAccount returns the global account.
|
|
// Default clients will use the global account.
|
|
func (s *Server) GlobalAccount() *Account {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
return s.gacc
|
|
}
|
|
|
|
// SetDefaultSystemAccount will create a default system account if one is not present.
|
|
func (s *Server) SetDefaultSystemAccount() error {
|
|
if _, isNew := s.LookupOrRegisterAccount(DEFAULT_SYSTEM_ACCOUNT); !isNew {
|
|
return nil
|
|
}
|
|
s.Debugf("Created system account: %q", DEFAULT_SYSTEM_ACCOUNT)
|
|
return s.SetSystemAccount(DEFAULT_SYSTEM_ACCOUNT)
|
|
}
|
|
|
|
// Assign a system account. Should only be called once.
|
|
// This sets up a server to send and receive messages from
|
|
// inside the server itself.
|
|
func (s *Server) setSystemAccount(acc *Account) error {
|
|
if acc == nil {
|
|
return ErrMissingAccount
|
|
}
|
|
// Don't try to fix this here.
|
|
if acc.IsExpired() {
|
|
return ErrAccountExpired
|
|
}
|
|
// If we are running with trusted keys for an operator
|
|
// make sure we check the account is legit.
|
|
if !s.isTrustedIssuer(acc.Issuer) {
|
|
return ErrAccountValidation
|
|
}
|
|
|
|
s.mu.Lock()
|
|
|
|
if s.sys != nil {
|
|
s.mu.Unlock()
|
|
return ErrAccountExists
|
|
}
|
|
|
|
// This is here in an attempt to quiet the race detector and not have to place
|
|
// locks on fast path for inbound messages and checking service imports.
|
|
acc.mu.Lock()
|
|
if acc.imports.services == nil {
|
|
acc.imports.services = make(map[string]*serviceImport)
|
|
}
|
|
acc.mu.Unlock()
|
|
|
|
s.sys = &internal{
|
|
account: acc,
|
|
client: s.createInternalSystemClient(),
|
|
seq: 1,
|
|
sid: 1,
|
|
servers: make(map[string]*serverUpdate),
|
|
replies: make(map[string]msgHandler),
|
|
sendq: newIPQueue(ipQueue_Logger("System send", s.ipqLog)), // of *pubMsg
|
|
resetCh: make(chan struct{}),
|
|
sq: s.newSendQ(),
|
|
statsz: eventsHBInterval,
|
|
orphMax: 5 * eventsHBInterval,
|
|
chkOrph: 3 * eventsHBInterval,
|
|
}
|
|
|
|
s.sys.wg.Add(1)
|
|
s.mu.Unlock()
|
|
|
|
// Register with the account.
|
|
s.sys.client.registerWithAccount(acc)
|
|
|
|
s.addSystemAccountExports(acc)
|
|
|
|
// Start our internal loop to serialize outbound messages.
|
|
// We do our own wg here since we will stop first during shutdown.
|
|
go s.internalSendLoop(&s.sys.wg)
|
|
|
|
// Start up our general subscriptions
|
|
s.initEventTracking()
|
|
|
|
// Track for dead remote servers.
|
|
s.wrapChk(s.startRemoteServerSweepTimer)()
|
|
|
|
// Send out statsz updates periodically.
|
|
s.wrapChk(s.startStatszTimer)()
|
|
|
|
// If we have existing accounts make sure we enable account tracking.
|
|
s.mu.Lock()
|
|
s.accounts.Range(func(k, v interface{}) bool {
|
|
acc := v.(*Account)
|
|
s.enableAccountTracking(acc)
|
|
return true
|
|
})
|
|
s.mu.Unlock()
|
|
|
|
return nil
|
|
}
|
|
|
|
// Creates an internal system client.
|
|
func (s *Server) createInternalSystemClient() *client {
|
|
return s.createInternalClient(SYSTEM)
|
|
}
|
|
|
|
// Creates an internal jetstream client.
|
|
func (s *Server) createInternalJetStreamClient() *client {
|
|
return s.createInternalClient(JETSTREAM)
|
|
}
|
|
|
|
// Creates an internal client for Account.
|
|
func (s *Server) createInternalAccountClient() *client {
|
|
return s.createInternalClient(ACCOUNT)
|
|
}
|
|
|
|
// Internal clients. kind should be SYSTEM or JETSTREAM
|
|
func (s *Server) createInternalClient(kind int) *client {
|
|
if kind != SYSTEM && kind != JETSTREAM && kind != ACCOUNT {
|
|
return nil
|
|
}
|
|
now := time.Now().UTC()
|
|
c := &client{srv: s, kind: kind, opts: internalOpts, msubs: -1, mpay: -1, start: now, last: now}
|
|
c.initClient()
|
|
c.echo = false
|
|
c.headers = true
|
|
c.flags.set(noReconnect)
|
|
return c
|
|
}
|
|
|
|
// Determine if accounts should track subscriptions for
|
|
// efficient propagation.
|
|
// Lock should be held on entry.
|
|
func (s *Server) shouldTrackSubscriptions() bool {
|
|
return (s.opts.Cluster.Port != 0 || s.opts.Gateway.Port != 0)
|
|
}
|
|
|
|
// Invokes registerAccountNoLock under the protection of the server lock.
|
|
// That is, server lock is acquired/released in this function.
|
|
// See registerAccountNoLock for comment on returned value.
|
|
func (s *Server) registerAccount(acc *Account) *Account {
|
|
s.mu.Lock()
|
|
racc := s.registerAccountNoLock(acc)
|
|
s.mu.Unlock()
|
|
return racc
|
|
}
|
|
|
|
// Helper to set the sublist based on preferences.
|
|
func (s *Server) setAccountSublist(acc *Account) {
|
|
if acc != nil && acc.sl == nil {
|
|
opts := s.getOpts()
|
|
if opts != nil && opts.NoSublistCache {
|
|
acc.sl = NewSublistNoCache()
|
|
} else {
|
|
acc.sl = NewSublistWithCache()
|
|
}
|
|
}
|
|
}
|
|
|
|
// Registers an account in the server.
|
|
// Due to some locking considerations, we may end-up trying
|
|
// to register the same account twice. This function will
|
|
// then return the already registered account.
|
|
// Lock should be held on entry.
|
|
func (s *Server) registerAccountNoLock(acc *Account) *Account {
|
|
// We are under the server lock. Lookup from map, if present
|
|
// return existing account.
|
|
if a, _ := s.accounts.Load(acc.Name); a != nil {
|
|
s.tmpAccounts.Delete(acc.Name)
|
|
return a.(*Account)
|
|
}
|
|
// Finish account setup and store.
|
|
s.setAccountSublist(acc)
|
|
|
|
acc.mu.Lock()
|
|
if acc.clients == nil {
|
|
acc.clients = make(map[*client]struct{})
|
|
}
|
|
|
|
// If we are capable of routing we will track subscription
|
|
// information for efficient interest propagation.
|
|
// During config reload, it is possible that account was
|
|
// already created (global account), so use locking and
|
|
// make sure we create only if needed.
|
|
// TODO(dlc)- Double check that we need this for GWs.
|
|
if acc.rm == nil && s.opts != nil && s.shouldTrackSubscriptions() {
|
|
acc.rm = make(map[string]int32)
|
|
acc.lqws = make(map[string]int32)
|
|
}
|
|
acc.srv = s
|
|
acc.updated = time.Now().UTC()
|
|
accName := acc.Name
|
|
jsEnabled := acc.jsLimits != nil
|
|
acc.mu.Unlock()
|
|
|
|
if opts := s.getOpts(); opts != nil && len(opts.JsAccDefaultDomain) > 0 {
|
|
if defDomain, ok := opts.JsAccDefaultDomain[accName]; ok {
|
|
if jsEnabled {
|
|
s.Warnf("Skipping Default Domain %q, set for JetStream enabled account %q", defDomain, accName)
|
|
} else if defDomain != _EMPTY_ {
|
|
dest := fmt.Sprintf(jsDomainAPI, defDomain)
|
|
s.Noticef("Adding default domain mapping %q -> %q to account %q %p", jsAllAPI, dest, accName, acc)
|
|
if err := acc.AddMapping(jsAllAPI, dest); err != nil {
|
|
s.Errorf("Error adding JetStream default domain mapping: %v", err)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
s.accounts.Store(acc.Name, acc)
|
|
s.tmpAccounts.Delete(acc.Name)
|
|
s.enableAccountTracking(acc)
|
|
|
|
// Can not have server lock here.
|
|
s.mu.Unlock()
|
|
s.registerSystemImports(acc)
|
|
s.mu.Lock()
|
|
|
|
return nil
|
|
}
|
|
|
|
// lookupAccount is a function to return the account structure
|
|
// associated with an account name.
|
|
// Lock MUST NOT be held upon entry.
|
|
func (s *Server) lookupAccount(name string) (*Account, error) {
|
|
var acc *Account
|
|
if v, ok := s.accounts.Load(name); ok {
|
|
acc = v.(*Account)
|
|
}
|
|
if acc != nil {
|
|
// If we are expired and we have a resolver, then
|
|
// return the latest information from the resolver.
|
|
if acc.IsExpired() {
|
|
s.Debugf("Requested account [%s] has expired", name)
|
|
if s.AccountResolver() != nil {
|
|
if err := s.updateAccount(acc); err != nil {
|
|
// This error could mask expired, so just return expired here.
|
|
return nil, ErrAccountExpired
|
|
}
|
|
} else {
|
|
return nil, ErrAccountExpired
|
|
}
|
|
}
|
|
return acc, nil
|
|
}
|
|
// If we have a resolver see if it can fetch the account.
|
|
if s.AccountResolver() == nil {
|
|
return nil, ErrMissingAccount
|
|
}
|
|
return s.fetchAccount(name)
|
|
}
|
|
|
|
// LookupAccount is a public function to return the account structure
|
|
// associated with name.
|
|
func (s *Server) LookupAccount(name string) (*Account, error) {
|
|
return s.lookupAccount(name)
|
|
}
|
|
|
|
// This will fetch new claims and if found update the account with new claims.
|
|
// Lock MUST NOT be held upon entry.
|
|
func (s *Server) updateAccount(acc *Account) error {
|
|
// TODO(dlc) - Make configurable
|
|
if !acc.incomplete && time.Since(acc.updated) < time.Second {
|
|
s.Debugf("Requested account update for [%s] ignored, too soon", acc.Name)
|
|
return ErrAccountResolverUpdateTooSoon
|
|
}
|
|
claimJWT, err := s.fetchRawAccountClaims(acc.Name)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
return s.updateAccountWithClaimJWT(acc, claimJWT)
|
|
}
|
|
|
|
// updateAccountWithClaimJWT will check and apply the claim update.
|
|
// Lock MUST NOT be held upon entry.
|
|
func (s *Server) updateAccountWithClaimJWT(acc *Account, claimJWT string) error {
|
|
if acc == nil {
|
|
return ErrMissingAccount
|
|
}
|
|
acc.mu.RLock()
|
|
sameClaim := acc.claimJWT != _EMPTY_ && acc.claimJWT == claimJWT && !acc.incomplete
|
|
acc.mu.RUnlock()
|
|
if sameClaim {
|
|
s.Debugf("Requested account update for [%s], same claims detected", acc.Name)
|
|
return nil
|
|
}
|
|
accClaims, _, err := s.verifyAccountClaims(claimJWT)
|
|
if err == nil && accClaims != nil {
|
|
acc.mu.Lock()
|
|
if acc.Issuer == _EMPTY_ {
|
|
acc.Issuer = accClaims.Issuer
|
|
}
|
|
if acc.Name != accClaims.Subject {
|
|
acc.mu.Unlock()
|
|
return ErrAccountValidation
|
|
}
|
|
acc.mu.Unlock()
|
|
s.UpdateAccountClaims(acc, accClaims)
|
|
acc.mu.Lock()
|
|
// needs to be set after update completed.
|
|
// This causes concurrent calls to return with sameClaim=true if the change is effective.
|
|
acc.claimJWT = claimJWT
|
|
acc.mu.Unlock()
|
|
return nil
|
|
}
|
|
return err
|
|
}
|
|
|
|
// fetchRawAccountClaims will grab raw account claims iff we have a resolver.
|
|
// Lock is NOT held upon entry.
|
|
func (s *Server) fetchRawAccountClaims(name string) (string, error) {
|
|
accResolver := s.AccountResolver()
|
|
if accResolver == nil {
|
|
return _EMPTY_, ErrNoAccountResolver
|
|
}
|
|
// Need to do actual Fetch
|
|
start := time.Now()
|
|
claimJWT, err := fetchAccount(accResolver, name)
|
|
fetchTime := time.Since(start)
|
|
if fetchTime > time.Second {
|
|
s.Warnf("Account [%s] fetch took %v", name, fetchTime)
|
|
} else {
|
|
s.Debugf("Account [%s] fetch took %v", name, fetchTime)
|
|
}
|
|
if err != nil {
|
|
s.Warnf("Account fetch failed: %v", err)
|
|
return "", err
|
|
}
|
|
return claimJWT, nil
|
|
}
|
|
|
|
// fetchAccountClaims will attempt to fetch new claims if a resolver is present.
|
|
// Lock is NOT held upon entry.
|
|
func (s *Server) fetchAccountClaims(name string) (*jwt.AccountClaims, string, error) {
|
|
claimJWT, err := s.fetchRawAccountClaims(name)
|
|
if err != nil {
|
|
return nil, _EMPTY_, err
|
|
}
|
|
var claim *jwt.AccountClaims
|
|
claim, claimJWT, err = s.verifyAccountClaims(claimJWT)
|
|
if claim != nil && claim.Subject != name {
|
|
return nil, _EMPTY_, ErrAccountValidation
|
|
}
|
|
return claim, claimJWT, err
|
|
}
|
|
|
|
// verifyAccountClaims will decode and validate any account claims.
|
|
func (s *Server) verifyAccountClaims(claimJWT string) (*jwt.AccountClaims, string, error) {
|
|
accClaims, err := jwt.DecodeAccountClaims(claimJWT)
|
|
if err != nil {
|
|
return nil, _EMPTY_, err
|
|
}
|
|
if !s.isTrustedIssuer(accClaims.Issuer) {
|
|
return nil, _EMPTY_, ErrAccountValidation
|
|
}
|
|
vr := jwt.CreateValidationResults()
|
|
accClaims.Validate(vr)
|
|
if vr.IsBlocking(true) {
|
|
return nil, _EMPTY_, ErrAccountValidation
|
|
}
|
|
return accClaims, claimJWT, nil
|
|
}
|
|
|
|
// This will fetch an account from a resolver if defined.
|
|
// Lock is NOT held upon entry.
|
|
func (s *Server) fetchAccount(name string) (*Account, error) {
|
|
accClaims, claimJWT, err := s.fetchAccountClaims(name)
|
|
if accClaims == nil {
|
|
return nil, err
|
|
}
|
|
acc := s.buildInternalAccount(accClaims)
|
|
acc.claimJWT = claimJWT
|
|
// Due to possible race, if registerAccount() returns a non
|
|
// nil account, it means the same account was already
|
|
// registered and we should use this one.
|
|
if racc := s.registerAccount(acc); racc != nil {
|
|
// Update with the new claims in case they are new.
|
|
if err = s.updateAccountWithClaimJWT(racc, claimJWT); err != nil {
|
|
return nil, err
|
|
}
|
|
return racc, nil
|
|
}
|
|
// The sub imports may have been setup but will not have had their
|
|
// subscriptions properly setup. Do that here.
|
|
if len(acc.imports.services) > 0 {
|
|
if acc.ic == nil {
|
|
acc.ic = s.createInternalAccountClient()
|
|
acc.ic.acc = acc
|
|
}
|
|
acc.addAllServiceImportSubs()
|
|
}
|
|
return acc, nil
|
|
}
|
|
|
|
// Start up the server, this will block.
|
|
// Start via a Go routine if needed.
|
|
func (s *Server) Start() {
|
|
s.Noticef("Starting nats-server")
|
|
|
|
gc := gitCommit
|
|
if gc == "" {
|
|
gc = "not set"
|
|
}
|
|
|
|
// Snapshot server options.
|
|
opts := s.getOpts()
|
|
|
|
s.Noticef(" Version: %s", VERSION)
|
|
s.Noticef(" Git: [%s]", gc)
|
|
s.Debugf(" Go build: %s", s.info.GoVersion)
|
|
s.Noticef(" Name: %s", s.info.Name)
|
|
if opts.JetStream {
|
|
s.Noticef(" Node: %s", getHash(s.info.Name))
|
|
}
|
|
s.Noticef(" ID: %s", s.info.ID)
|
|
|
|
defer s.Noticef("Server is ready")
|
|
|
|
// Check for insecure configurations.
|
|
s.checkAuthforWarnings()
|
|
|
|
// Avoid RACE between Start() and Shutdown()
|
|
s.mu.Lock()
|
|
s.running = true
|
|
s.mu.Unlock()
|
|
|
|
s.grMu.Lock()
|
|
s.grRunning = true
|
|
s.grMu.Unlock()
|
|
|
|
s.startIPQLogger()
|
|
|
|
// Pprof http endpoint for the profiler.
|
|
if opts.ProfPort != 0 {
|
|
s.StartProfiler()
|
|
}
|
|
|
|
if opts.ConfigFile != _EMPTY_ {
|
|
s.Noticef("Using configuration file: %s", opts.ConfigFile)
|
|
}
|
|
|
|
hasOperators := len(opts.TrustedOperators) > 0
|
|
if hasOperators {
|
|
s.Noticef("Trusted Operators")
|
|
}
|
|
for _, opc := range opts.TrustedOperators {
|
|
s.Noticef(" System : %q", opc.Audience)
|
|
s.Noticef(" Operator: %q", opc.Name)
|
|
s.Noticef(" Issued : %v", time.Unix(opc.IssuedAt, 0))
|
|
s.Noticef(" Expires : %v", time.Unix(opc.Expires, 0))
|
|
}
|
|
if hasOperators && opts.SystemAccount == _EMPTY_ {
|
|
s.Warnf("Trusted Operators should utilize a System Account")
|
|
}
|
|
if opts.MaxPayload > MAX_PAYLOAD_MAX_SIZE {
|
|
s.Warnf("Maximum payloads over %v are generally discouraged and could lead to poor performance",
|
|
friendlyBytes(int64(MAX_PAYLOAD_MAX_SIZE)))
|
|
}
|
|
|
|
if len(opts.JsAccDefaultDomain) > 0 {
|
|
s.Warnf("The option `default_js_domain` is a temporary backwards compatibility measure and will be removed")
|
|
}
|
|
|
|
// If we have a memory resolver, check the accounts here for validation exceptions.
|
|
// This allows them to be logged right away vs when they are accessed via a client.
|
|
if hasOperators && len(opts.resolverPreloads) > 0 {
|
|
s.checkResolvePreloads()
|
|
}
|
|
|
|
// Log the pid to a file.
|
|
if opts.PidFile != _EMPTY_ {
|
|
if err := s.logPid(); err != nil {
|
|
s.Fatalf("Could not write pidfile: %v", err)
|
|
return
|
|
}
|
|
}
|
|
|
|
// Setup system account which will start the eventing stack.
|
|
if sa := opts.SystemAccount; sa != _EMPTY_ {
|
|
if err := s.SetSystemAccount(sa); err != nil {
|
|
s.Fatalf("Can't set system account: %v", err)
|
|
return
|
|
}
|
|
} else if !opts.NoSystemAccount {
|
|
// We will create a default system account here.
|
|
s.SetDefaultSystemAccount()
|
|
}
|
|
|
|
// Start monitoring before enabling other subsystems of the
|
|
// server to be able to monitor during startup.
|
|
if err := s.StartMonitoring(); err != nil {
|
|
s.Fatalf("Can't start monitoring: %v", err)
|
|
return
|
|
}
|
|
|
|
// Start up resolver machinery.
|
|
if ar := s.AccountResolver(); ar != nil {
|
|
if err := ar.Start(s); err != nil {
|
|
s.Fatalf("Could not start resolver: %v", err)
|
|
return
|
|
}
|
|
// In operator mode, when the account resolver depends on an external system and
|
|
// the system account is the bootstrapping account, start fetching it.
|
|
if len(opts.TrustedOperators) == 1 && opts.SystemAccount != _EMPTY_ && opts.SystemAccount != DEFAULT_SYSTEM_ACCOUNT {
|
|
_, isMemResolver := ar.(*MemAccResolver)
|
|
if v, ok := s.accounts.Load(s.opts.SystemAccount); !isMemResolver && ok && v.(*Account).claimJWT == "" {
|
|
s.Noticef("Using bootstrapping system account")
|
|
s.startGoRoutine(func() {
|
|
defer s.grWG.Done()
|
|
t := time.NewTicker(time.Second)
|
|
defer t.Stop()
|
|
for {
|
|
select {
|
|
case <-s.quitCh:
|
|
return
|
|
case <-t.C:
|
|
sacc := s.SystemAccount()
|
|
if claimJWT, err := fetchAccount(ar, s.opts.SystemAccount); err != nil {
|
|
continue
|
|
} else if err = s.updateAccountWithClaimJWT(sacc, claimJWT); err != nil {
|
|
continue
|
|
}
|
|
s.Noticef("System account fetched and updated")
|
|
return
|
|
}
|
|
}
|
|
})
|
|
}
|
|
}
|
|
}
|
|
|
|
// Start expiration of mapped GW replies, regardless if
|
|
// this server is configured with gateway or not.
|
|
s.startGWReplyMapExpiration()
|
|
|
|
// Check if JetStream has been enabled. This needs to be after
|
|
// the system account setup above. JetStream will create its
|
|
// own system account if one is not present.
|
|
if opts.JetStream {
|
|
// Make sure someone is not trying to enable on the system account.
|
|
if sa := s.SystemAccount(); sa != nil && sa.jsLimits != nil {
|
|
s.Fatalf("Not allowed to enable JetStream on the system account")
|
|
}
|
|
cfg := &JetStreamConfig{
|
|
StoreDir: opts.StoreDir,
|
|
MaxMemory: opts.JetStreamMaxMemory,
|
|
MaxStore: opts.JetStreamMaxStore,
|
|
Domain: opts.JetStreamDomain,
|
|
}
|
|
if err := s.EnableJetStream(cfg); err != nil {
|
|
s.Fatalf("Can't start JetStream: %v", err)
|
|
return
|
|
}
|
|
} else {
|
|
// Check to see if any configured accounts have JetStream enabled.
|
|
sa, ga := s.SystemAccount(), s.GlobalAccount()
|
|
var hasSys, hasGlobal bool
|
|
var total int
|
|
|
|
s.accounts.Range(func(k, v interface{}) bool {
|
|
total++
|
|
acc := v.(*Account)
|
|
if acc == sa {
|
|
hasSys = true
|
|
} else if acc == ga {
|
|
hasGlobal = true
|
|
}
|
|
acc.mu.RLock()
|
|
hasJs := acc.jsLimits != nil
|
|
acc.mu.RUnlock()
|
|
if hasJs {
|
|
s.checkJetStreamExports()
|
|
acc.enableAllJetStreamServiceImportsAndMappings()
|
|
}
|
|
return true
|
|
})
|
|
// If we only have the system account and the global account and we are not standalone,
|
|
// go ahead and enable JS on $G in case we are in simple mixed mode setup.
|
|
if total == 2 && hasSys && hasGlobal && !s.standAloneMode() {
|
|
ga.mu.Lock()
|
|
ga.jsLimits = dynamicJSAccountLimits
|
|
ga.mu.Unlock()
|
|
s.checkJetStreamExports()
|
|
ga.enableAllJetStreamServiceImportsAndMappings()
|
|
}
|
|
}
|
|
|
|
// Start OCSP Stapling monitoring for TLS certificates if enabled.
|
|
s.startOCSPMonitoring()
|
|
|
|
// Start up gateway if needed. Do this before starting the routes, because
|
|
// we want to resolve the gateway host:port so that this information can
|
|
// be sent to other routes.
|
|
if opts.Gateway.Port != 0 {
|
|
s.startGateways()
|
|
}
|
|
|
|
// Start websocket server if needed. Do this before starting the routes, and
|
|
// leaf node because we want to resolve the gateway host:port so that this
|
|
// information can be sent to other routes.
|
|
if opts.Websocket.Port != 0 {
|
|
s.startWebsocketServer()
|
|
}
|
|
|
|
// Start up listen if we want to accept leaf node connections.
|
|
if opts.LeafNode.Port != 0 {
|
|
// Will resolve or assign the advertise address for the leafnode listener.
|
|
// We need that in StartRouting().
|
|
s.startLeafNodeAcceptLoop()
|
|
}
|
|
|
|
// Solicit remote servers for leaf node connections.
|
|
if len(opts.LeafNode.Remotes) > 0 {
|
|
s.solicitLeafNodeRemotes(opts.LeafNode.Remotes)
|
|
}
|
|
|
|
// TODO (ik): I wanted to refactor this by starting the client
|
|
// accept loop first, that is, it would resolve listen spec
|
|
// in place, but start the accept-for-loop in a different go
|
|
// routine. This would get rid of the synchronization between
|
|
// this function and StartRouting, which I also would have wanted
|
|
// to refactor, but both AcceptLoop() and StartRouting() have
|
|
// been exported and not sure if that would break users using them.
|
|
// We could mark them as deprecated and remove in a release or two...
|
|
|
|
// The Routing routine needs to wait for the client listen
|
|
// port to be opened and potential ephemeral port selected.
|
|
clientListenReady := make(chan struct{})
|
|
|
|
// MQTT
|
|
if opts.MQTT.Port != 0 {
|
|
s.startMQTT()
|
|
}
|
|
|
|
// Start up routing as well if needed.
|
|
if opts.Cluster.Port != 0 {
|
|
s.startGoRoutine(func() {
|
|
s.StartRouting(clientListenReady)
|
|
})
|
|
}
|
|
|
|
if opts.PortsFileDir != _EMPTY_ {
|
|
s.logPorts()
|
|
}
|
|
|
|
if opts.TLSRateLimit > 0 {
|
|
s.startGoRoutine(s.logRejectedTLSConns)
|
|
}
|
|
|
|
// Wait for clients.
|
|
s.AcceptLoop(clientListenReady)
|
|
}
|
|
|
|
// Shutdown will shutdown the server instance by kicking out the AcceptLoop
|
|
// and closing all associated clients.
|
|
func (s *Server) Shutdown() {
|
|
if s == nil {
|
|
return
|
|
}
|
|
// Transfer off any raft nodes that we are a leader by shutting them all down.
|
|
s.shutdownRaftNodes()
|
|
|
|
// This is for clustered JetStream and ephemeral consumers.
|
|
// No-op if not clustered or not running JetStream.
|
|
s.migrateEphemerals()
|
|
|
|
// Shutdown the eventing system as needed.
|
|
// This is done first to send out any messages for
|
|
// account status. We will also clean up any
|
|
// eventing items associated with accounts.
|
|
s.shutdownEventing()
|
|
|
|
s.mu.Lock()
|
|
// Prevent issues with multiple calls.
|
|
if s.shutdown {
|
|
s.mu.Unlock()
|
|
return
|
|
}
|
|
s.Noticef("Initiating Shutdown...")
|
|
|
|
accRes := s.accResolver
|
|
|
|
opts := s.getOpts()
|
|
|
|
s.shutdown = true
|
|
s.running = false
|
|
s.grMu.Lock()
|
|
s.grRunning = false
|
|
s.grMu.Unlock()
|
|
s.mu.Unlock()
|
|
|
|
if accRes != nil {
|
|
accRes.Close()
|
|
}
|
|
|
|
// Now check jetstream.
|
|
s.shutdownJetStream()
|
|
|
|
s.mu.Lock()
|
|
conns := make(map[uint64]*client)
|
|
|
|
// Copy off the clients
|
|
for i, c := range s.clients {
|
|
conns[i] = c
|
|
}
|
|
// Copy off the connections that are not yet registered
|
|
// in s.routes, but for which the readLoop has started
|
|
s.grMu.Lock()
|
|
for i, c := range s.grTmpClients {
|
|
conns[i] = c
|
|
}
|
|
s.grMu.Unlock()
|
|
// Copy off the routes
|
|
for i, r := range s.routes {
|
|
conns[i] = r
|
|
}
|
|
// Copy off the gateways
|
|
s.getAllGatewayConnections(conns)
|
|
|
|
// Copy off the leaf nodes
|
|
for i, c := range s.leafs {
|
|
conns[i] = c
|
|
}
|
|
|
|
// Number of done channel responses we expect.
|
|
doneExpected := 0
|
|
|
|
// Kick client AcceptLoop()
|
|
if s.listener != nil {
|
|
doneExpected++
|
|
s.listener.Close()
|
|
s.listener = nil
|
|
}
|
|
|
|
// Kick websocket server
|
|
if s.websocket.server != nil {
|
|
doneExpected++
|
|
s.websocket.server.Close()
|
|
s.websocket.server = nil
|
|
s.websocket.listener = nil
|
|
}
|
|
|
|
// Kick MQTT accept loop
|
|
if s.mqtt.listener != nil {
|
|
doneExpected++
|
|
s.mqtt.listener.Close()
|
|
s.mqtt.listener = nil
|
|
}
|
|
|
|
// Kick leafnodes AcceptLoop()
|
|
if s.leafNodeListener != nil {
|
|
doneExpected++
|
|
s.leafNodeListener.Close()
|
|
s.leafNodeListener = nil
|
|
}
|
|
|
|
// Kick route AcceptLoop()
|
|
if s.routeListener != nil {
|
|
doneExpected++
|
|
s.routeListener.Close()
|
|
s.routeListener = nil
|
|
}
|
|
|
|
// Kick Gateway AcceptLoop()
|
|
if s.gatewayListener != nil {
|
|
doneExpected++
|
|
s.gatewayListener.Close()
|
|
s.gatewayListener = nil
|
|
}
|
|
|
|
// Kick HTTP monitoring if its running
|
|
if s.http != nil {
|
|
doneExpected++
|
|
s.http.Close()
|
|
s.http = nil
|
|
}
|
|
|
|
// Kick Profiling if its running
|
|
if s.profiler != nil {
|
|
doneExpected++
|
|
s.profiler.Close()
|
|
}
|
|
|
|
s.mu.Unlock()
|
|
|
|
// Release go routines that wait on that channel
|
|
close(s.quitCh)
|
|
|
|
// Close client and route connections
|
|
for _, c := range conns {
|
|
c.setNoReconnect()
|
|
c.closeConnection(ServerShutdown)
|
|
}
|
|
|
|
// Block until the accept loops exit
|
|
for doneExpected > 0 {
|
|
<-s.done
|
|
doneExpected--
|
|
}
|
|
|
|
// Stop the IPQueue logger (before the grWG.Wait() call)
|
|
if s.ipqLog != nil {
|
|
s.ipqLog.stop()
|
|
}
|
|
|
|
// Wait for go routines to be done.
|
|
s.grWG.Wait()
|
|
|
|
if opts.PortsFileDir != _EMPTY_ {
|
|
s.deletePortsFile(opts.PortsFileDir)
|
|
}
|
|
|
|
s.Noticef("Server Exiting..")
|
|
// Close logger if applicable. It allows tests on Windows
|
|
// to be able to do proper cleanup (delete log file).
|
|
s.logging.RLock()
|
|
log := s.logging.logger
|
|
s.logging.RUnlock()
|
|
if log != nil {
|
|
if l, ok := log.(*logger.Logger); ok {
|
|
l.Close()
|
|
}
|
|
}
|
|
// Notify that the shutdown is complete
|
|
close(s.shutdownComplete)
|
|
}
|
|
|
|
// WaitForShutdown will block until the server has been fully shutdown.
|
|
func (s *Server) WaitForShutdown() {
|
|
<-s.shutdownComplete
|
|
}
|
|
|
|
// AcceptLoop is exported for easier testing.
|
|
func (s *Server) AcceptLoop(clr chan struct{}) {
|
|
// If we were to exit before the listener is setup properly,
|
|
// make sure we close the channel.
|
|
defer func() {
|
|
if clr != nil {
|
|
close(clr)
|
|
}
|
|
}()
|
|
|
|
// Snapshot server options.
|
|
opts := s.getOpts()
|
|
|
|
// Setup state that can enable shutdown
|
|
s.mu.Lock()
|
|
if s.shutdown {
|
|
s.mu.Unlock()
|
|
return
|
|
}
|
|
|
|
hp := net.JoinHostPort(opts.Host, strconv.Itoa(opts.Port))
|
|
l, e := natsListen("tcp", hp)
|
|
s.listenerErr = e
|
|
if e != nil {
|
|
s.mu.Unlock()
|
|
s.Fatalf("Error listening on port: %s, %q", hp, e)
|
|
return
|
|
}
|
|
s.Noticef("Listening for client connections on %s",
|
|
net.JoinHostPort(opts.Host, strconv.Itoa(l.Addr().(*net.TCPAddr).Port)))
|
|
|
|
// Alert of TLS enabled.
|
|
if opts.TLSConfig != nil {
|
|
s.Noticef("TLS required for client connections")
|
|
}
|
|
|
|
// If server was started with RANDOM_PORT (-1), opts.Port would be equal
|
|
// to 0 at the beginning this function. So we need to get the actual port
|
|
if opts.Port == 0 {
|
|
// Write resolved port back to options.
|
|
opts.Port = l.Addr().(*net.TCPAddr).Port
|
|
}
|
|
|
|
// Now that port has been set (if it was set to RANDOM), set the
|
|
// server's info Host/Port with either values from Options or
|
|
// ClientAdvertise.
|
|
if err := s.setInfoHostPort(); err != nil {
|
|
s.Fatalf("Error setting server INFO with ClientAdvertise value of %s, err=%v", s.opts.ClientAdvertise, err)
|
|
l.Close()
|
|
s.mu.Unlock()
|
|
return
|
|
}
|
|
// Keep track of client connect URLs. We may need them later.
|
|
s.clientConnectURLs = s.getClientConnectURLs()
|
|
s.listener = l
|
|
|
|
go s.acceptConnections(l, "Client", func(conn net.Conn) { s.createClient(conn) },
|
|
func(_ error) bool {
|
|
if s.isLameDuckMode() {
|
|
// Signal that we are not accepting new clients
|
|
s.ldmCh <- true
|
|
// Now wait for the Shutdown...
|
|
<-s.quitCh
|
|
return true
|
|
}
|
|
return false
|
|
})
|
|
s.mu.Unlock()
|
|
|
|
// Let the caller know that we are ready
|
|
close(clr)
|
|
clr = nil
|
|
}
|
|
|
|
func (s *Server) acceptConnections(l net.Listener, acceptName string, createFunc func(conn net.Conn), errFunc func(err error) bool) {
|
|
tmpDelay := ACCEPT_MIN_SLEEP
|
|
|
|
for {
|
|
conn, err := l.Accept()
|
|
if err != nil {
|
|
if errFunc != nil && errFunc(err) {
|
|
return
|
|
}
|
|
if tmpDelay = s.acceptError(acceptName, err, tmpDelay); tmpDelay < 0 {
|
|
break
|
|
}
|
|
continue
|
|
}
|
|
tmpDelay = ACCEPT_MIN_SLEEP
|
|
if !s.startGoRoutine(func() {
|
|
createFunc(conn)
|
|
s.grWG.Done()
|
|
}) {
|
|
conn.Close()
|
|
}
|
|
}
|
|
s.Debugf(acceptName + " accept loop exiting..")
|
|
s.done <- true
|
|
}
|
|
|
|
// This function sets the server's info Host/Port based on server Options.
|
|
// Note that this function may be called during config reload, this is why
|
|
// Host/Port may be reset to original Options if the ClientAdvertise option
|
|
// is not set (since it may have previously been).
|
|
func (s *Server) setInfoHostPort() error {
|
|
// When this function is called, opts.Port is set to the actual listen
|
|
// port (if option was originally set to RANDOM), even during a config
|
|
// reload. So use of s.opts.Port is safe.
|
|
if s.opts.ClientAdvertise != _EMPTY_ {
|
|
h, p, err := parseHostPort(s.opts.ClientAdvertise, s.opts.Port)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
s.info.Host = h
|
|
s.info.Port = p
|
|
} else {
|
|
s.info.Host = s.opts.Host
|
|
s.info.Port = s.opts.Port
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// StartProfiler is called to enable dynamic profiling.
|
|
func (s *Server) StartProfiler() {
|
|
// Snapshot server options.
|
|
opts := s.getOpts()
|
|
|
|
port := opts.ProfPort
|
|
|
|
// Check for Random Port
|
|
if port == -1 {
|
|
port = 0
|
|
}
|
|
|
|
s.mu.Lock()
|
|
if s.shutdown {
|
|
s.mu.Unlock()
|
|
return
|
|
}
|
|
hp := net.JoinHostPort(opts.Host, strconv.Itoa(port))
|
|
|
|
l, err := net.Listen("tcp", hp)
|
|
|
|
if err != nil {
|
|
s.mu.Unlock()
|
|
s.Fatalf("error starting profiler: %s", err)
|
|
return
|
|
}
|
|
s.Noticef("profiling port: %d", l.Addr().(*net.TCPAddr).Port)
|
|
|
|
srv := &http.Server{
|
|
Addr: hp,
|
|
Handler: http.DefaultServeMux,
|
|
MaxHeaderBytes: 1 << 20,
|
|
}
|
|
s.profiler = l
|
|
s.profilingServer = srv
|
|
|
|
// Enable blocking profile
|
|
runtime.SetBlockProfileRate(1)
|
|
|
|
go func() {
|
|
// if this errors out, it's probably because the server is being shutdown
|
|
err := srv.Serve(l)
|
|
if err != nil {
|
|
s.mu.Lock()
|
|
shutdown := s.shutdown
|
|
s.mu.Unlock()
|
|
if !shutdown {
|
|
s.Fatalf("error starting profiler: %s", err)
|
|
}
|
|
}
|
|
srv.Close()
|
|
s.done <- true
|
|
}()
|
|
s.mu.Unlock()
|
|
}
|
|
|
|
// StartHTTPMonitoring will enable the HTTP monitoring port.
|
|
// DEPRECATED: Should use StartMonitoring.
|
|
func (s *Server) StartHTTPMonitoring() {
|
|
s.startMonitoring(false)
|
|
}
|
|
|
|
// StartHTTPSMonitoring will enable the HTTPS monitoring port.
|
|
// DEPRECATED: Should use StartMonitoring.
|
|
func (s *Server) StartHTTPSMonitoring() {
|
|
s.startMonitoring(true)
|
|
}
|
|
|
|
// StartMonitoring starts the HTTP or HTTPs server if needed.
|
|
func (s *Server) StartMonitoring() error {
|
|
// Snapshot server options.
|
|
opts := s.getOpts()
|
|
|
|
// Specifying both HTTP and HTTPS ports is a misconfiguration
|
|
if opts.HTTPPort != 0 && opts.HTTPSPort != 0 {
|
|
return fmt.Errorf("can't specify both HTTP (%v) and HTTPs (%v) ports", opts.HTTPPort, opts.HTTPSPort)
|
|
}
|
|
var err error
|
|
if opts.HTTPPort != 0 {
|
|
err = s.startMonitoring(false)
|
|
} else if opts.HTTPSPort != 0 {
|
|
if opts.TLSConfig == nil {
|
|
return fmt.Errorf("TLS cert and key required for HTTPS")
|
|
}
|
|
err = s.startMonitoring(true)
|
|
}
|
|
return err
|
|
}
|
|
|
|
// HTTP endpoints
|
|
const (
|
|
RootPath = "/"
|
|
VarzPath = "/varz"
|
|
ConnzPath = "/connz"
|
|
RoutezPath = "/routez"
|
|
GatewayzPath = "/gatewayz"
|
|
LeafzPath = "/leafz"
|
|
SubszPath = "/subsz"
|
|
StackszPath = "/stacksz"
|
|
AccountzPath = "/accountz"
|
|
JszPath = "/jsz"
|
|
HealthzPath = "/healthz"
|
|
)
|
|
|
|
func (s *Server) basePath(p string) string {
|
|
return path.Join(s.httpBasePath, p)
|
|
}
|
|
|
|
type captureHTTPServerLog struct {
|
|
s *Server
|
|
prefix string
|
|
}
|
|
|
|
func (cl *captureHTTPServerLog) Write(p []byte) (int, error) {
|
|
var buf [128]byte
|
|
var b = buf[:0]
|
|
|
|
b = append(b, []byte(cl.prefix)...)
|
|
offset := 0
|
|
if bytes.HasPrefix(p, []byte("http:")) {
|
|
offset = 6
|
|
}
|
|
b = append(b, p[offset:]...)
|
|
cl.s.Errorf(string(b))
|
|
return len(p), nil
|
|
}
|
|
|
|
// The TLS configuration is passed to the listener when the monitoring
|
|
// "server" is setup. That prevents TLS configuration updates on reload
|
|
// from being used. By setting this function in tls.Config.GetConfigForClient
|
|
// we instruct the TLS handshake to ask for the tls configuration to be
|
|
// used for a specific client. We don't care which client, we always use
|
|
// the same TLS configuration.
|
|
func (s *Server) getTLSConfig(_ *tls.ClientHelloInfo) (*tls.Config, error) {
|
|
opts := s.getOpts()
|
|
return opts.TLSConfig, nil
|
|
}
|
|
|
|
// Start the monitoring server
|
|
func (s *Server) startMonitoring(secure bool) error {
|
|
// Snapshot server options.
|
|
opts := s.getOpts()
|
|
|
|
var (
|
|
hp string
|
|
err error
|
|
httpListener net.Listener
|
|
port int
|
|
)
|
|
|
|
monitorProtocol := "http"
|
|
|
|
if secure {
|
|
monitorProtocol += "s"
|
|
port = opts.HTTPSPort
|
|
if port == -1 {
|
|
port = 0
|
|
}
|
|
hp = net.JoinHostPort(opts.HTTPHost, strconv.Itoa(port))
|
|
config := opts.TLSConfig.Clone()
|
|
config.GetConfigForClient = s.getTLSConfig
|
|
config.ClientAuth = tls.NoClientCert
|
|
httpListener, err = tls.Listen("tcp", hp, config)
|
|
|
|
} else {
|
|
port = opts.HTTPPort
|
|
if port == -1 {
|
|
port = 0
|
|
}
|
|
hp = net.JoinHostPort(opts.HTTPHost, strconv.Itoa(port))
|
|
httpListener, err = net.Listen("tcp", hp)
|
|
}
|
|
|
|
if err != nil {
|
|
return fmt.Errorf("can't listen to the monitor port: %v", err)
|
|
}
|
|
|
|
rport := httpListener.Addr().(*net.TCPAddr).Port
|
|
s.Noticef("Starting %s monitor on %s", monitorProtocol, net.JoinHostPort(opts.HTTPHost, strconv.Itoa(rport)))
|
|
|
|
mux := http.NewServeMux()
|
|
|
|
// Root
|
|
mux.HandleFunc(s.basePath(RootPath), s.HandleRoot)
|
|
// Varz
|
|
mux.HandleFunc(s.basePath(VarzPath), s.HandleVarz)
|
|
// Connz
|
|
mux.HandleFunc(s.basePath(ConnzPath), s.HandleConnz)
|
|
// Routez
|
|
mux.HandleFunc(s.basePath(RoutezPath), s.HandleRoutez)
|
|
// Gatewayz
|
|
mux.HandleFunc(s.basePath(GatewayzPath), s.HandleGatewayz)
|
|
// Leafz
|
|
mux.HandleFunc(s.basePath(LeafzPath), s.HandleLeafz)
|
|
// Subz
|
|
mux.HandleFunc(s.basePath(SubszPath), s.HandleSubsz)
|
|
// Subz alias for backwards compatibility
|
|
mux.HandleFunc(s.basePath("/subscriptionsz"), s.HandleSubsz)
|
|
// Stacksz
|
|
mux.HandleFunc(s.basePath(StackszPath), s.HandleStacksz)
|
|
// Accountz
|
|
mux.HandleFunc(s.basePath(AccountzPath), s.HandleAccountz)
|
|
// Jsz
|
|
mux.HandleFunc(s.basePath(JszPath), s.HandleJsz)
|
|
// Healthz
|
|
mux.HandleFunc(s.basePath(HealthzPath), s.HandleHealthz)
|
|
|
|
// Do not set a WriteTimeout because it could cause cURL/browser
|
|
// to return empty response or unable to display page if the
|
|
// server needs more time to build the response.
|
|
srv := &http.Server{
|
|
Addr: hp,
|
|
Handler: mux,
|
|
MaxHeaderBytes: 1 << 20,
|
|
ErrorLog: log.New(&captureHTTPServerLog{s, "monitoring: "}, _EMPTY_, 0),
|
|
}
|
|
s.mu.Lock()
|
|
if s.shutdown {
|
|
httpListener.Close()
|
|
s.mu.Unlock()
|
|
return nil
|
|
}
|
|
s.http = httpListener
|
|
s.httpHandler = mux
|
|
s.monitoringServer = srv
|
|
s.mu.Unlock()
|
|
|
|
go func() {
|
|
if err := srv.Serve(httpListener); err != nil {
|
|
s.mu.Lock()
|
|
shutdown := s.shutdown
|
|
s.mu.Unlock()
|
|
if !shutdown {
|
|
s.Fatalf("Error starting monitor on %q: %v", hp, err)
|
|
}
|
|
}
|
|
srv.Close()
|
|
s.mu.Lock()
|
|
s.httpHandler = nil
|
|
s.mu.Unlock()
|
|
s.done <- true
|
|
}()
|
|
|
|
return nil
|
|
}
|
|
|
|
// HTTPHandler returns the http.Handler object used to handle monitoring
|
|
// endpoints. It will return nil if the server is not configured for
|
|
// monitoring, or if the server has not been started yet (Server.Start()).
|
|
func (s *Server) HTTPHandler() http.Handler {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
return s.httpHandler
|
|
}
|
|
|
|
// Perform a conditional deep copy due to reference nature of [Client|WS]ConnectURLs.
|
|
// If updates are made to Info, this function should be consulted and updated.
|
|
// Assume lock is held.
|
|
func (s *Server) copyInfo() Info {
|
|
info := s.info
|
|
if len(info.ClientConnectURLs) > 0 {
|
|
info.ClientConnectURLs = append([]string(nil), s.info.ClientConnectURLs...)
|
|
}
|
|
if len(info.WSConnectURLs) > 0 {
|
|
info.WSConnectURLs = append([]string(nil), s.info.WSConnectURLs...)
|
|
}
|
|
return info
|
|
}
|
|
|
|
// tlsMixConn is used when we can receive both TLS and non-TLS connections on same port.
|
|
type tlsMixConn struct {
|
|
net.Conn
|
|
pre *bytes.Buffer
|
|
}
|
|
|
|
// Read for our mixed multi-reader.
|
|
func (c *tlsMixConn) Read(b []byte) (int, error) {
|
|
if c.pre != nil {
|
|
n, err := c.pre.Read(b)
|
|
if c.pre.Len() == 0 {
|
|
c.pre = nil
|
|
}
|
|
return n, err
|
|
}
|
|
return c.Conn.Read(b)
|
|
}
|
|
|
|
func (s *Server) createClient(conn net.Conn) *client {
|
|
// Snapshot server options.
|
|
opts := s.getOpts()
|
|
|
|
maxPay := int32(opts.MaxPayload)
|
|
maxSubs := int32(opts.MaxSubs)
|
|
// For system, maxSubs of 0 means unlimited, so re-adjust here.
|
|
if maxSubs == 0 {
|
|
maxSubs = -1
|
|
}
|
|
now := time.Now().UTC()
|
|
|
|
c := &client{srv: s, nc: conn, opts: defaultOpts, mpay: maxPay, msubs: maxSubs, start: now, last: now}
|
|
|
|
c.registerWithAccount(s.globalAccount())
|
|
|
|
var info Info
|
|
var authRequired bool
|
|
|
|
s.mu.Lock()
|
|
// Grab JSON info string
|
|
info = s.copyInfo()
|
|
if s.nonceRequired() {
|
|
// Nonce handling
|
|
var raw [nonceLen]byte
|
|
nonce := raw[:]
|
|
s.generateNonce(nonce)
|
|
info.Nonce = string(nonce)
|
|
}
|
|
c.nonce = []byte(info.Nonce)
|
|
authRequired = info.AuthRequired
|
|
|
|
s.totalClients++
|
|
s.mu.Unlock()
|
|
|
|
// Grab lock
|
|
c.mu.Lock()
|
|
if authRequired {
|
|
c.flags.set(expectConnect)
|
|
}
|
|
|
|
// Initialize
|
|
c.initClient()
|
|
|
|
c.Debugf("Client connection created")
|
|
|
|
// Send our information.
|
|
// Need to be sent in place since writeLoop cannot be started until
|
|
// TLS handshake is done (if applicable).
|
|
c.sendProtoNow(c.generateClientInfoJSON(info))
|
|
|
|
// Unlock to register
|
|
c.mu.Unlock()
|
|
|
|
// Register with the server.
|
|
s.mu.Lock()
|
|
// If server is not running, Shutdown() may have already gathered the
|
|
// list of connections to close. It won't contain this one, so we need
|
|
// to bail out now otherwise the readLoop started down there would not
|
|
// be interrupted. Skip also if in lame duck mode.
|
|
if !s.running || s.ldm {
|
|
// There are some tests that create a server but don't start it,
|
|
// and use "async" clients and perform the parsing manually. Such
|
|
// clients would branch here (since server is not running). However,
|
|
// when a server was really running and has been shutdown, we must
|
|
// close this connection.
|
|
if s.shutdown {
|
|
conn.Close()
|
|
}
|
|
s.mu.Unlock()
|
|
return c
|
|
}
|
|
|
|
// If there is a max connections specified, check that adding
|
|
// this new client would not push us over the max
|
|
if opts.MaxConn > 0 && len(s.clients) >= opts.MaxConn {
|
|
s.mu.Unlock()
|
|
c.maxConnExceeded()
|
|
return nil
|
|
}
|
|
s.clients[c.cid] = c
|
|
|
|
tlsRequired := info.TLSRequired
|
|
s.mu.Unlock()
|
|
|
|
// Re-Grab lock
|
|
c.mu.Lock()
|
|
|
|
// Connection could have been closed while sending the INFO proto.
|
|
isClosed := c.isClosed()
|
|
|
|
var pre []byte
|
|
// If we have both TLS and non-TLS allowed we need to see which
|
|
// one the client wants.
|
|
if !isClosed && opts.TLSConfig != nil && opts.AllowNonTLS {
|
|
pre = make([]byte, 4)
|
|
c.nc.SetReadDeadline(time.Now().Add(secondsToDuration(opts.TLSTimeout)))
|
|
n, _ := io.ReadFull(c.nc, pre[:])
|
|
c.nc.SetReadDeadline(time.Time{})
|
|
pre = pre[:n]
|
|
if n > 0 && pre[0] == 0x16 {
|
|
tlsRequired = true
|
|
} else {
|
|
tlsRequired = false
|
|
}
|
|
}
|
|
|
|
// Check for TLS
|
|
if !isClosed && tlsRequired {
|
|
if s.connRateCounter != nil && !s.connRateCounter.allow() {
|
|
c.mu.Unlock()
|
|
c.sendErr("Connection throttling is active. Please try again later.")
|
|
c.closeConnection(MaxConnectionsExceeded)
|
|
return nil
|
|
}
|
|
|
|
// If we have a prebuffer create a multi-reader.
|
|
if len(pre) > 0 {
|
|
c.nc = &tlsMixConn{c.nc, bytes.NewBuffer(pre)}
|
|
// Clear pre so it is not parsed.
|
|
pre = nil
|
|
}
|
|
// Performs server-side TLS handshake.
|
|
if err := c.doTLSServerHandshake(_EMPTY_, opts.TLSConfig, opts.TLSTimeout, opts.TLSPinnedCerts); err != nil {
|
|
c.mu.Unlock()
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// If connection is marked as closed, bail out.
|
|
if isClosed {
|
|
c.mu.Unlock()
|
|
// Connection could have been closed due to TLS timeout or while trying
|
|
// to send the INFO protocol. We need to call closeConnection() to make
|
|
// sure that proper cleanup is done.
|
|
c.closeConnection(WriteError)
|
|
return nil
|
|
}
|
|
|
|
// Check for Auth. We schedule this timer after the TLS handshake to avoid
|
|
// the race where the timer fires during the handshake and causes the
|
|
// server to write bad data to the socket. See issue #432.
|
|
if authRequired {
|
|
c.setAuthTimer(secondsToDuration(opts.AuthTimeout))
|
|
}
|
|
|
|
// Do final client initialization
|
|
|
|
// Set the Ping timer. Will be reset once connect was received.
|
|
c.setPingTimer()
|
|
|
|
// Spin up the read loop.
|
|
s.startGoRoutine(func() { c.readLoop(pre) })
|
|
|
|
// Spin up the write loop.
|
|
s.startGoRoutine(func() { c.writeLoop() })
|
|
|
|
if tlsRequired {
|
|
c.Debugf("TLS handshake complete")
|
|
cs := c.nc.(*tls.Conn).ConnectionState()
|
|
c.Debugf("TLS version %s, cipher suite %s", tlsVersion(cs.Version), tlsCipher(cs.CipherSuite))
|
|
}
|
|
|
|
c.mu.Unlock()
|
|
|
|
return c
|
|
}
|
|
|
|
// This will save off a closed client in a ring buffer such that
|
|
// /connz can inspect. Useful for debugging, etc.
|
|
func (s *Server) saveClosedClient(c *client, nc net.Conn, reason ClosedState) {
|
|
now := time.Now().UTC()
|
|
|
|
s.accountDisconnectEvent(c, now, reason.String())
|
|
|
|
c.mu.Lock()
|
|
|
|
cc := &closedClient{}
|
|
cc.fill(c, nc, now)
|
|
cc.Stop = &now
|
|
cc.Reason = reason.String()
|
|
|
|
// Do subs, do not place by default in main ConnInfo
|
|
if len(c.subs) > 0 {
|
|
cc.subs = make([]SubDetail, 0, len(c.subs))
|
|
for _, sub := range c.subs {
|
|
cc.subs = append(cc.subs, newSubDetail(sub))
|
|
}
|
|
}
|
|
// Hold user as well.
|
|
cc.user = c.opts.Username
|
|
// Hold account name if not the global account.
|
|
if c.acc != nil && c.acc.Name != globalAccountName {
|
|
cc.acc = c.acc.Name
|
|
}
|
|
cc.JWT = c.opts.JWT
|
|
cc.IssuerKey = issuerForClient(c)
|
|
cc.Tags = c.tags
|
|
cc.NameTag = c.nameTag
|
|
c.mu.Unlock()
|
|
|
|
// Place in the ring buffer
|
|
s.mu.Lock()
|
|
if s.closed != nil {
|
|
s.closed.append(cc)
|
|
}
|
|
s.mu.Unlock()
|
|
}
|
|
|
|
// Adds to the list of client and websocket clients connect URLs.
|
|
// If there was a change, an INFO protocol is sent to registered clients
|
|
// that support async INFO protocols.
|
|
func (s *Server) addConnectURLsAndSendINFOToClients(curls, wsurls []string) {
|
|
s.updateServerINFOAndSendINFOToClients(curls, wsurls, true)
|
|
}
|
|
|
|
// Removes from the list of client and websocket clients connect URLs.
|
|
// If there was a change, an INFO protocol is sent to registered clients
|
|
// that support async INFO protocols.
|
|
func (s *Server) removeConnectURLsAndSendINFOToClients(curls, wsurls []string) {
|
|
s.updateServerINFOAndSendINFOToClients(curls, wsurls, false)
|
|
}
|
|
|
|
// Updates the list of client and websocket clients connect URLs and if any change
|
|
// sends an async INFO update to clients that support it.
|
|
func (s *Server) updateServerINFOAndSendINFOToClients(curls, wsurls []string, add bool) {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
|
|
remove := !add
|
|
// Will return true if we need alter the server's Info object.
|
|
updateMap := func(urls []string, m refCountedUrlSet) bool {
|
|
wasUpdated := false
|
|
for _, url := range urls {
|
|
if add && m.addUrl(url) {
|
|
wasUpdated = true
|
|
} else if remove && m.removeUrl(url) {
|
|
wasUpdated = true
|
|
}
|
|
}
|
|
return wasUpdated
|
|
}
|
|
cliUpdated := updateMap(curls, s.clientConnectURLsMap)
|
|
wsUpdated := updateMap(wsurls, s.websocket.connectURLsMap)
|
|
|
|
updateInfo := func(infoURLs *[]string, urls []string, m refCountedUrlSet) {
|
|
// Recreate the info's slice from the map
|
|
*infoURLs = (*infoURLs)[:0]
|
|
// Add this server client connect ULRs first...
|
|
*infoURLs = append(*infoURLs, urls...)
|
|
// Then the ones from the map
|
|
for url := range m {
|
|
*infoURLs = append(*infoURLs, url)
|
|
}
|
|
}
|
|
if cliUpdated {
|
|
updateInfo(&s.info.ClientConnectURLs, s.clientConnectURLs, s.clientConnectURLsMap)
|
|
}
|
|
if wsUpdated {
|
|
updateInfo(&s.info.WSConnectURLs, s.websocket.connectURLs, s.websocket.connectURLsMap)
|
|
}
|
|
if cliUpdated || wsUpdated {
|
|
// Update the time of this update
|
|
s.lastCURLsUpdate = time.Now().UnixNano()
|
|
// Send to all registered clients that support async INFO protocols.
|
|
s.sendAsyncInfoToClients(cliUpdated, wsUpdated)
|
|
}
|
|
}
|
|
|
|
// Handle closing down a connection when the handshake has timedout.
|
|
func tlsTimeout(c *client, conn *tls.Conn) {
|
|
c.mu.Lock()
|
|
closed := c.isClosed()
|
|
c.mu.Unlock()
|
|
// Check if already closed
|
|
if closed {
|
|
return
|
|
}
|
|
cs := conn.ConnectionState()
|
|
if !cs.HandshakeComplete {
|
|
c.Errorf("TLS handshake timeout")
|
|
c.sendErr("Secure Connection - TLS Required")
|
|
c.closeConnection(TLSHandshakeError)
|
|
}
|
|
}
|
|
|
|
// Seems silly we have to write these
|
|
func tlsVersion(ver uint16) string {
|
|
switch ver {
|
|
case tls.VersionTLS10:
|
|
return "1.0"
|
|
case tls.VersionTLS11:
|
|
return "1.1"
|
|
case tls.VersionTLS12:
|
|
return "1.2"
|
|
case tls.VersionTLS13:
|
|
return "1.3"
|
|
}
|
|
return fmt.Sprintf("Unknown [0x%x]", ver)
|
|
}
|
|
|
|
// We use hex here so we don't need multiple versions
|
|
func tlsCipher(cs uint16) string {
|
|
name, present := cipherMapByID[cs]
|
|
if present {
|
|
return name
|
|
}
|
|
return fmt.Sprintf("Unknown [0x%x]", cs)
|
|
}
|
|
|
|
// Remove a client or route from our internal accounting.
|
|
func (s *Server) removeClient(c *client) {
|
|
// kind is immutable, so can check without lock
|
|
switch c.kind {
|
|
case CLIENT:
|
|
c.mu.Lock()
|
|
cid := c.cid
|
|
updateProtoInfoCount := false
|
|
if c.kind == CLIENT && c.opts.Protocol >= ClientProtoInfo {
|
|
updateProtoInfoCount = true
|
|
}
|
|
c.mu.Unlock()
|
|
|
|
s.mu.Lock()
|
|
delete(s.clients, cid)
|
|
if updateProtoInfoCount {
|
|
s.cproto--
|
|
}
|
|
s.mu.Unlock()
|
|
case ROUTER:
|
|
s.removeRoute(c)
|
|
case GATEWAY:
|
|
s.removeRemoteGatewayConnection(c)
|
|
case LEAF:
|
|
s.removeLeafNodeConnection(c)
|
|
}
|
|
}
|
|
|
|
func (s *Server) removeFromTempClients(cid uint64) {
|
|
s.grMu.Lock()
|
|
delete(s.grTmpClients, cid)
|
|
s.grMu.Unlock()
|
|
}
|
|
|
|
func (s *Server) addToTempClients(cid uint64, c *client) bool {
|
|
added := false
|
|
s.grMu.Lock()
|
|
if s.grRunning {
|
|
s.grTmpClients[cid] = c
|
|
added = true
|
|
}
|
|
s.grMu.Unlock()
|
|
return added
|
|
}
|
|
|
|
/////////////////////////////////////////////////////////////////
|
|
// These are some helpers for accounting in functional tests.
|
|
/////////////////////////////////////////////////////////////////
|
|
|
|
// NumRoutes will report the number of registered routes.
|
|
func (s *Server) NumRoutes() int {
|
|
s.mu.Lock()
|
|
nr := len(s.routes)
|
|
s.mu.Unlock()
|
|
return nr
|
|
}
|
|
|
|
// NumRemotes will report number of registered remotes.
|
|
func (s *Server) NumRemotes() int {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
return len(s.remotes)
|
|
}
|
|
|
|
// NumLeafNodes will report number of leaf node connections.
|
|
func (s *Server) NumLeafNodes() int {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
return len(s.leafs)
|
|
}
|
|
|
|
// NumClients will report the number of registered clients.
|
|
func (s *Server) NumClients() int {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
return len(s.clients)
|
|
}
|
|
|
|
// GetClient will return the client associated with cid.
|
|
func (s *Server) GetClient(cid uint64) *client {
|
|
return s.getClient(cid)
|
|
}
|
|
|
|
// getClient will return the client associated with cid.
|
|
func (s *Server) getClient(cid uint64) *client {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
return s.clients[cid]
|
|
}
|
|
|
|
// GetLeafNode returns the leafnode associated with the cid.
|
|
func (s *Server) GetLeafNode(cid uint64) *client {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
return s.leafs[cid]
|
|
}
|
|
|
|
// NumSubscriptions will report how many subscriptions are active.
|
|
func (s *Server) NumSubscriptions() uint32 {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
return s.numSubscriptions()
|
|
}
|
|
|
|
// numSubscriptions will report how many subscriptions are active.
|
|
// Lock should be held.
|
|
func (s *Server) numSubscriptions() uint32 {
|
|
var subs int
|
|
s.accounts.Range(func(k, v interface{}) bool {
|
|
acc := v.(*Account)
|
|
if acc.sl != nil {
|
|
subs += acc.TotalSubs()
|
|
}
|
|
return true
|
|
})
|
|
return uint32(subs)
|
|
}
|
|
|
|
// NumSlowConsumers will report the number of slow consumers.
|
|
func (s *Server) NumSlowConsumers() int64 {
|
|
return atomic.LoadInt64(&s.slowConsumers)
|
|
}
|
|
|
|
// ConfigTime will report the last time the server configuration was loaded.
|
|
func (s *Server) ConfigTime() time.Time {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
return s.configTime
|
|
}
|
|
|
|
// Addr will return the net.Addr object for the current listener.
|
|
func (s *Server) Addr() net.Addr {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
if s.listener == nil {
|
|
return nil
|
|
}
|
|
return s.listener.Addr()
|
|
}
|
|
|
|
// MonitorAddr will return the net.Addr object for the monitoring listener.
|
|
func (s *Server) MonitorAddr() *net.TCPAddr {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
if s.http == nil {
|
|
return nil
|
|
}
|
|
return s.http.Addr().(*net.TCPAddr)
|
|
}
|
|
|
|
// ClusterAddr returns the net.Addr object for the route listener.
|
|
func (s *Server) ClusterAddr() *net.TCPAddr {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
if s.routeListener == nil {
|
|
return nil
|
|
}
|
|
return s.routeListener.Addr().(*net.TCPAddr)
|
|
}
|
|
|
|
// ProfilerAddr returns the net.Addr object for the profiler listener.
|
|
func (s *Server) ProfilerAddr() *net.TCPAddr {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
if s.profiler == nil {
|
|
return nil
|
|
}
|
|
return s.profiler.Addr().(*net.TCPAddr)
|
|
}
|
|
|
|
func (s *Server) readyForConnections(d time.Duration) error {
|
|
// Snapshot server options.
|
|
opts := s.getOpts()
|
|
|
|
type info struct {
|
|
ok bool
|
|
err error
|
|
}
|
|
chk := make(map[string]info)
|
|
|
|
end := time.Now().Add(d)
|
|
for time.Now().Before(end) {
|
|
s.mu.Lock()
|
|
chk["server"] = info{ok: s.listener != nil, err: s.listenerErr}
|
|
chk["route"] = info{ok: (opts.Cluster.Port == 0 || s.routeListener != nil), err: s.routeListenerErr}
|
|
chk["gateway"] = info{ok: (opts.Gateway.Name == _EMPTY_ || s.gatewayListener != nil), err: s.gatewayListenerErr}
|
|
chk["leafNode"] = info{ok: (opts.LeafNode.Port == 0 || s.leafNodeListener != nil), err: s.leafNodeListenerErr}
|
|
chk["websocket"] = info{ok: (opts.Websocket.Port == 0 || s.websocket.listener != nil), err: s.websocket.listenerErr}
|
|
chk["mqtt"] = info{ok: (opts.MQTT.Port == 0 || s.mqtt.listener != nil), err: s.mqtt.listenerErr}
|
|
s.mu.Unlock()
|
|
|
|
var numOK int
|
|
for _, inf := range chk {
|
|
if inf.ok {
|
|
numOK++
|
|
}
|
|
}
|
|
if numOK == len(chk) {
|
|
return nil
|
|
}
|
|
if d > 25*time.Millisecond {
|
|
time.Sleep(25 * time.Millisecond)
|
|
}
|
|
}
|
|
|
|
failed := make([]string, 0, len(chk))
|
|
for name, inf := range chk {
|
|
if inf.ok && inf.err != nil {
|
|
failed = append(failed, fmt.Sprintf("%s(ok, but %s)", name, inf.err))
|
|
}
|
|
if !inf.ok && inf.err == nil {
|
|
failed = append(failed, name)
|
|
}
|
|
if !inf.ok && inf.err != nil {
|
|
failed = append(failed, fmt.Sprintf("%s(%s)", name, inf.err))
|
|
}
|
|
}
|
|
|
|
return fmt.Errorf(
|
|
"failed to be ready for connections after %s: %s",
|
|
d, strings.Join(failed, ", "),
|
|
)
|
|
}
|
|
|
|
// ReadyForConnections returns `true` if the server is ready to accept clients
|
|
// and, if routing is enabled, route connections. If after the duration
|
|
// `dur` the server is still not ready, returns `false`.
|
|
func (s *Server) ReadyForConnections(dur time.Duration) bool {
|
|
return s.readyForConnections(dur) == nil
|
|
}
|
|
|
|
// Quick utility to function to tell if the server supports headers.
|
|
func (s *Server) supportsHeaders() bool {
|
|
if s == nil {
|
|
return false
|
|
}
|
|
return !(s.getOpts().NoHeaderSupport)
|
|
}
|
|
|
|
// ID returns the server's ID
|
|
func (s *Server) ID() string {
|
|
return s.info.ID
|
|
}
|
|
|
|
// NodeName returns the node name for this server.
|
|
func (s *Server) NodeName() string {
|
|
return string(getHash(s.info.Name))
|
|
}
|
|
|
|
// Name returns the server's name. This will be the same as the ID if it was not set.
|
|
func (s *Server) Name() string {
|
|
return s.info.Name
|
|
}
|
|
|
|
func (s *Server) String() string {
|
|
return s.info.Name
|
|
}
|
|
|
|
func (s *Server) startGoRoutine(f func()) bool {
|
|
var started bool
|
|
s.grMu.Lock()
|
|
if s.grRunning {
|
|
s.grWG.Add(1)
|
|
go f()
|
|
started = true
|
|
}
|
|
s.grMu.Unlock()
|
|
return started
|
|
}
|
|
|
|
func (s *Server) numClosedConns() int {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
return s.closed.len()
|
|
}
|
|
|
|
func (s *Server) totalClosedConns() uint64 {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
return s.closed.totalConns()
|
|
}
|
|
|
|
func (s *Server) closedClients() []*closedClient {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
return s.closed.closedClients()
|
|
}
|
|
|
|
// getClientConnectURLs returns suitable URLs for clients to connect to the listen
|
|
// port based on the server options' Host and Port. If the Host corresponds to
|
|
// "any" interfaces, this call returns the list of resolved IP addresses.
|
|
// If ClientAdvertise is set, returns the client advertise host and port.
|
|
// The server lock is assumed held on entry.
|
|
func (s *Server) getClientConnectURLs() []string {
|
|
// Snapshot server options.
|
|
opts := s.getOpts()
|
|
// Ignore error here since we know that if there is client advertise, the
|
|
// parseHostPort is correct because we did it right before calling this
|
|
// function in Server.New().
|
|
urls, _ := s.getConnectURLs(opts.ClientAdvertise, opts.Host, opts.Port)
|
|
return urls
|
|
}
|
|
|
|
// Generic version that will return an array of URLs based on the given
|
|
// advertise, host and port values.
|
|
func (s *Server) getConnectURLs(advertise, host string, port int) ([]string, error) {
|
|
urls := make([]string, 0, 1)
|
|
|
|
// short circuit if advertise is set
|
|
if advertise != "" {
|
|
h, p, err := parseHostPort(advertise, port)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
urls = append(urls, net.JoinHostPort(h, strconv.Itoa(p)))
|
|
} else {
|
|
sPort := strconv.Itoa(port)
|
|
_, ips, err := s.getNonLocalIPsIfHostIsIPAny(host, true)
|
|
for _, ip := range ips {
|
|
urls = append(urls, net.JoinHostPort(ip, sPort))
|
|
}
|
|
if err != nil || len(urls) == 0 {
|
|
// We are here if s.opts.Host is not "0.0.0.0" nor "::", or if for some
|
|
// reason we could not add any URL in the loop above.
|
|
// We had a case where a Windows VM was hosed and would have err == nil
|
|
// and not add any address in the array in the loop above, and we
|
|
// ended-up returning 0.0.0.0, which is problematic for Windows clients.
|
|
// Check for 0.0.0.0 or :: specifically, and ignore if that's the case.
|
|
if host == "0.0.0.0" || host == "::" {
|
|
s.Errorf("Address %q can not be resolved properly", host)
|
|
} else {
|
|
urls = append(urls, net.JoinHostPort(host, sPort))
|
|
}
|
|
}
|
|
}
|
|
return urls, nil
|
|
}
|
|
|
|
// Returns an array of non local IPs if the provided host is
|
|
// 0.0.0.0 or ::. It returns the first resolved if `all` is
|
|
// false.
|
|
// The boolean indicate if the provided host was 0.0.0.0 (or ::)
|
|
// so that if the returned array is empty caller can decide
|
|
// what to do next.
|
|
func (s *Server) getNonLocalIPsIfHostIsIPAny(host string, all bool) (bool, []string, error) {
|
|
ip := net.ParseIP(host)
|
|
// If this is not an IP, we are done
|
|
if ip == nil {
|
|
return false, nil, nil
|
|
}
|
|
// If this is not 0.0.0.0 or :: we have nothing to do.
|
|
if !ip.IsUnspecified() {
|
|
return false, nil, nil
|
|
}
|
|
s.Debugf("Get non local IPs for %q", host)
|
|
var ips []string
|
|
ifaces, _ := net.Interfaces()
|
|
for _, i := range ifaces {
|
|
addrs, _ := i.Addrs()
|
|
for _, addr := range addrs {
|
|
switch v := addr.(type) {
|
|
case *net.IPNet:
|
|
ip = v.IP
|
|
case *net.IPAddr:
|
|
ip = v.IP
|
|
}
|
|
ipStr := ip.String()
|
|
// Skip non global unicast addresses
|
|
if !ip.IsGlobalUnicast() || ip.IsUnspecified() {
|
|
ip = nil
|
|
continue
|
|
}
|
|
s.Debugf(" ip=%s", ipStr)
|
|
ips = append(ips, ipStr)
|
|
if !all {
|
|
break
|
|
}
|
|
}
|
|
}
|
|
return true, ips, nil
|
|
}
|
|
|
|
// if the ip is not specified, attempt to resolve it
|
|
func resolveHostPorts(addr net.Listener) []string {
|
|
hostPorts := make([]string, 0)
|
|
hp := addr.Addr().(*net.TCPAddr)
|
|
port := strconv.Itoa(hp.Port)
|
|
if hp.IP.IsUnspecified() {
|
|
var ip net.IP
|
|
ifaces, _ := net.Interfaces()
|
|
for _, i := range ifaces {
|
|
addrs, _ := i.Addrs()
|
|
for _, addr := range addrs {
|
|
switch v := addr.(type) {
|
|
case *net.IPNet:
|
|
ip = v.IP
|
|
hostPorts = append(hostPorts, net.JoinHostPort(ip.String(), port))
|
|
case *net.IPAddr:
|
|
ip = v.IP
|
|
hostPorts = append(hostPorts, net.JoinHostPort(ip.String(), port))
|
|
default:
|
|
continue
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
hostPorts = append(hostPorts, net.JoinHostPort(hp.IP.String(), port))
|
|
}
|
|
return hostPorts
|
|
}
|
|
|
|
// format the address of a net.Listener with a protocol
|
|
func formatURL(protocol string, addr net.Listener) []string {
|
|
hostports := resolveHostPorts(addr)
|
|
for i, hp := range hostports {
|
|
hostports[i] = fmt.Sprintf("%s://%s", protocol, hp)
|
|
}
|
|
return hostports
|
|
}
|
|
|
|
// Ports describes URLs that the server can be contacted in
|
|
type Ports struct {
|
|
Nats []string `json:"nats,omitempty"`
|
|
Monitoring []string `json:"monitoring,omitempty"`
|
|
Cluster []string `json:"cluster,omitempty"`
|
|
Profile []string `json:"profile,omitempty"`
|
|
WebSocket []string `json:"websocket,omitempty"`
|
|
}
|
|
|
|
// PortsInfo attempts to resolve all the ports. If after maxWait the ports are not
|
|
// resolved, it returns nil. Otherwise it returns a Ports struct
|
|
// describing ports where the server can be contacted
|
|
func (s *Server) PortsInfo(maxWait time.Duration) *Ports {
|
|
if s.readyForListeners(maxWait) {
|
|
opts := s.getOpts()
|
|
|
|
s.mu.Lock()
|
|
tls := s.info.TLSRequired
|
|
listener := s.listener
|
|
httpListener := s.http
|
|
clusterListener := s.routeListener
|
|
profileListener := s.profiler
|
|
wsListener := s.websocket.listener
|
|
wss := s.websocket.tls
|
|
s.mu.Unlock()
|
|
|
|
ports := Ports{}
|
|
|
|
if listener != nil {
|
|
natsProto := "nats"
|
|
if tls {
|
|
natsProto = "tls"
|
|
}
|
|
ports.Nats = formatURL(natsProto, listener)
|
|
}
|
|
|
|
if httpListener != nil {
|
|
monProto := "http"
|
|
if opts.HTTPSPort != 0 {
|
|
monProto = "https"
|
|
}
|
|
ports.Monitoring = formatURL(monProto, httpListener)
|
|
}
|
|
|
|
if clusterListener != nil {
|
|
clusterProto := "nats"
|
|
if opts.Cluster.TLSConfig != nil {
|
|
clusterProto = "tls"
|
|
}
|
|
ports.Cluster = formatURL(clusterProto, clusterListener)
|
|
}
|
|
|
|
if profileListener != nil {
|
|
ports.Profile = formatURL("http", profileListener)
|
|
}
|
|
|
|
if wsListener != nil {
|
|
protocol := wsSchemePrefix
|
|
if wss {
|
|
protocol = wsSchemePrefixTLS
|
|
}
|
|
ports.WebSocket = formatURL(protocol, wsListener)
|
|
}
|
|
|
|
return &ports
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// Returns the portsFile. If a non-empty dirHint is provided, the dirHint
|
|
// path is used instead of the server option value
|
|
func (s *Server) portFile(dirHint string) string {
|
|
dirname := s.getOpts().PortsFileDir
|
|
if dirHint != "" {
|
|
dirname = dirHint
|
|
}
|
|
if dirname == _EMPTY_ {
|
|
return _EMPTY_
|
|
}
|
|
return filepath.Join(dirname, fmt.Sprintf("%s_%d.ports", filepath.Base(os.Args[0]), os.Getpid()))
|
|
}
|
|
|
|
// Delete the ports file. If a non-empty dirHint is provided, the dirHint
|
|
// path is used instead of the server option value
|
|
func (s *Server) deletePortsFile(hintDir string) {
|
|
portsFile := s.portFile(hintDir)
|
|
if portsFile != "" {
|
|
if err := os.Remove(portsFile); err != nil {
|
|
s.Errorf("Error cleaning up ports file %s: %v", portsFile, err)
|
|
}
|
|
}
|
|
}
|
|
|
|
// Writes a file with a serialized Ports to the specified ports_file_dir.
|
|
// The name of the file is `exename_pid.ports`, typically nats-server_pid.ports.
|
|
// if ports file is not set, this function has no effect
|
|
func (s *Server) logPorts() {
|
|
opts := s.getOpts()
|
|
portsFile := s.portFile(opts.PortsFileDir)
|
|
if portsFile != _EMPTY_ {
|
|
go func() {
|
|
info := s.PortsInfo(5 * time.Second)
|
|
if info == nil {
|
|
s.Errorf("Unable to resolve the ports in the specified time")
|
|
return
|
|
}
|
|
data, err := json.Marshal(info)
|
|
if err != nil {
|
|
s.Errorf("Error marshaling ports file: %v", err)
|
|
return
|
|
}
|
|
if err := ioutil.WriteFile(portsFile, data, 0666); err != nil {
|
|
s.Errorf("Error writing ports file (%s): %v", portsFile, err)
|
|
return
|
|
}
|
|
|
|
}()
|
|
}
|
|
}
|
|
|
|
// waits until a calculated list of listeners is resolved or a timeout
|
|
func (s *Server) readyForListeners(dur time.Duration) bool {
|
|
end := time.Now().Add(dur)
|
|
for time.Now().Before(end) {
|
|
s.mu.Lock()
|
|
listeners := s.serviceListeners()
|
|
s.mu.Unlock()
|
|
if len(listeners) == 0 {
|
|
return false
|
|
}
|
|
|
|
ok := true
|
|
for _, l := range listeners {
|
|
if l == nil {
|
|
ok = false
|
|
break
|
|
}
|
|
}
|
|
if ok {
|
|
return true
|
|
}
|
|
select {
|
|
case <-s.quitCh:
|
|
return false
|
|
case <-time.After(25 * time.Millisecond):
|
|
// continue - unable to select from quit - we are still running
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
// returns a list of listeners that are intended for the process
|
|
// if the entry is nil, the interface is yet to be resolved
|
|
func (s *Server) serviceListeners() []net.Listener {
|
|
listeners := make([]net.Listener, 0)
|
|
opts := s.getOpts()
|
|
listeners = append(listeners, s.listener)
|
|
if opts.Cluster.Port != 0 {
|
|
listeners = append(listeners, s.routeListener)
|
|
}
|
|
if opts.HTTPPort != 0 || opts.HTTPSPort != 0 {
|
|
listeners = append(listeners, s.http)
|
|
}
|
|
if opts.ProfPort != 0 {
|
|
listeners = append(listeners, s.profiler)
|
|
}
|
|
if opts.Websocket.Port != 0 {
|
|
listeners = append(listeners, s.websocket.listener)
|
|
}
|
|
return listeners
|
|
}
|
|
|
|
// Returns true if in lame duck mode.
|
|
func (s *Server) isLameDuckMode() bool {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
return s.ldm
|
|
}
|
|
|
|
// This function will close the client listener then close the clients
|
|
// at some interval to avoid a reconnecting storm.
|
|
func (s *Server) lameDuckMode() {
|
|
s.mu.Lock()
|
|
// Check if there is actually anything to do
|
|
if s.shutdown || s.ldm || s.listener == nil {
|
|
s.mu.Unlock()
|
|
return
|
|
}
|
|
s.Noticef("Entering lame duck mode, stop accepting new clients")
|
|
s.ldm = true
|
|
expected := 1
|
|
s.listener.Close()
|
|
s.listener = nil
|
|
if s.websocket.server != nil {
|
|
expected++
|
|
s.websocket.server.Close()
|
|
s.websocket.server = nil
|
|
s.websocket.listener = nil
|
|
}
|
|
s.ldmCh = make(chan bool, expected)
|
|
opts := s.getOpts()
|
|
gp := opts.LameDuckGracePeriod
|
|
// For tests, we want the grace period to be in some cases bigger
|
|
// than the ldm duration, so to by-pass the validateOptions() check,
|
|
// we use negative number and flip it here.
|
|
if gp < 0 {
|
|
gp *= -1
|
|
}
|
|
s.mu.Unlock()
|
|
|
|
// If we are running any raftNodes transfer leaders.
|
|
if hadTransfers := s.transferRaftLeaders(); hadTransfers {
|
|
// They will tranfer leadership quickly, but wait here for a second.
|
|
select {
|
|
case <-time.After(time.Second):
|
|
case <-s.quitCh:
|
|
return
|
|
}
|
|
}
|
|
|
|
// Wait for accept loops to be done to make sure that no new
|
|
// client can connect
|
|
for i := 0; i < expected; i++ {
|
|
<-s.ldmCh
|
|
}
|
|
|
|
s.mu.Lock()
|
|
// Need to recheck few things
|
|
if s.shutdown || len(s.clients) == 0 {
|
|
s.mu.Unlock()
|
|
// If there is no client, we need to call Shutdown() to complete
|
|
// the LDMode. If server has been shutdown while lock was released,
|
|
// calling Shutdown() should be no-op.
|
|
s.Shutdown()
|
|
return
|
|
}
|
|
dur := int64(opts.LameDuckDuration)
|
|
dur -= int64(gp)
|
|
if dur <= 0 {
|
|
dur = int64(time.Second)
|
|
}
|
|
numClients := int64(len(s.clients))
|
|
batch := 1
|
|
// Sleep interval between each client connection close.
|
|
si := dur / numClients
|
|
if si < 1 {
|
|
// Should not happen (except in test with very small LD duration), but
|
|
// if there are too many clients, batch the number of close and
|
|
// use a tiny sleep interval that will result in yield likely.
|
|
si = 1
|
|
batch = int(numClients / dur)
|
|
} else if si > int64(time.Second) {
|
|
// Conversely, there is no need to sleep too long between clients
|
|
// and spread say 10 clients for the 2min duration. Sleeping no
|
|
// more than 1sec.
|
|
si = int64(time.Second)
|
|
}
|
|
|
|
// Now capture all clients
|
|
clients := make([]*client, 0, len(s.clients))
|
|
for _, client := range s.clients {
|
|
clients = append(clients, client)
|
|
}
|
|
// Now that we know that no new client can be accepted,
|
|
// send INFO to routes and clients to notify this state.
|
|
s.sendLDMToRoutes()
|
|
s.sendLDMToClients()
|
|
s.mu.Unlock()
|
|
|
|
t := time.NewTimer(gp)
|
|
// Delay start of closing of client connections in case
|
|
// we have several servers that we want to signal to enter LD mode
|
|
// and not have their client reconnect to each other.
|
|
select {
|
|
case <-t.C:
|
|
s.Noticef("Closing existing clients")
|
|
case <-s.quitCh:
|
|
t.Stop()
|
|
return
|
|
}
|
|
for i, client := range clients {
|
|
client.closeConnection(ServerShutdown)
|
|
if i == len(clients)-1 {
|
|
break
|
|
}
|
|
if batch == 1 || i%batch == 0 {
|
|
// We pick a random interval which will be at least si/2
|
|
v := rand.Int63n(si)
|
|
if v < si/2 {
|
|
v = si / 2
|
|
}
|
|
t.Reset(time.Duration(v))
|
|
// Sleep for given interval or bail out if kicked by Shutdown().
|
|
select {
|
|
case <-t.C:
|
|
case <-s.quitCh:
|
|
t.Stop()
|
|
return
|
|
}
|
|
}
|
|
}
|
|
s.Shutdown()
|
|
}
|
|
|
|
// Send an INFO update to routes with the indication that this server is in LDM mode.
|
|
// Server lock is held on entry.
|
|
func (s *Server) sendLDMToRoutes() {
|
|
s.routeInfo.LameDuckMode = true
|
|
s.generateRouteInfoJSON()
|
|
for _, r := range s.routes {
|
|
r.mu.Lock()
|
|
r.enqueueProto(s.routeInfoJSON)
|
|
r.mu.Unlock()
|
|
}
|
|
// Clear now so that we notify only once, should we have to send other INFOs.
|
|
s.routeInfo.LameDuckMode = false
|
|
}
|
|
|
|
// Send an INFO update to clients with the indication that this server is in
|
|
// LDM mode and with only URLs of other nodes.
|
|
// Server lock is held on entry.
|
|
func (s *Server) sendLDMToClients() {
|
|
s.info.LameDuckMode = true
|
|
// Clear this so that if there are further updates, we don't send our URLs.
|
|
s.clientConnectURLs = s.clientConnectURLs[:0]
|
|
if s.websocket.connectURLs != nil {
|
|
s.websocket.connectURLs = s.websocket.connectURLs[:0]
|
|
}
|
|
// Reset content first.
|
|
s.info.ClientConnectURLs = s.info.ClientConnectURLs[:0]
|
|
s.info.WSConnectURLs = s.info.WSConnectURLs[:0]
|
|
// Only add the other nodes if we are allowed to.
|
|
if !s.getOpts().Cluster.NoAdvertise {
|
|
for url := range s.clientConnectURLsMap {
|
|
s.info.ClientConnectURLs = append(s.info.ClientConnectURLs, url)
|
|
}
|
|
for url := range s.websocket.connectURLsMap {
|
|
s.info.WSConnectURLs = append(s.info.WSConnectURLs, url)
|
|
}
|
|
}
|
|
// Send to all registered clients that support async INFO protocols.
|
|
s.sendAsyncInfoToClients(true, true)
|
|
// We now clear the info.LameDuckMode flag so that if there are
|
|
// cluster updates and we send the INFO, we don't have the boolean
|
|
// set which would cause multiple LDM notifications to clients.
|
|
s.info.LameDuckMode = false
|
|
}
|
|
|
|
// If given error is a net.Error and is temporary, sleeps for the given
|
|
// delay and double it, but cap it to ACCEPT_MAX_SLEEP. The sleep is
|
|
// interrupted if the server is shutdown.
|
|
// An error message is displayed depending on the type of error.
|
|
// Returns the new (or unchanged) delay, or a negative value if the
|
|
// server has been or is being shutdown.
|
|
func (s *Server) acceptError(acceptName string, err error, tmpDelay time.Duration) time.Duration {
|
|
if !s.isRunning() {
|
|
return -1
|
|
}
|
|
if ne, ok := err.(net.Error); ok && ne.Temporary() {
|
|
s.Errorf("Temporary %s Accept Error(%v), sleeping %dms", acceptName, ne, tmpDelay/time.Millisecond)
|
|
select {
|
|
case <-time.After(tmpDelay):
|
|
case <-s.quitCh:
|
|
return -1
|
|
}
|
|
tmpDelay *= 2
|
|
if tmpDelay > ACCEPT_MAX_SLEEP {
|
|
tmpDelay = ACCEPT_MAX_SLEEP
|
|
}
|
|
} else {
|
|
s.Errorf("%s Accept error: %v", acceptName, err)
|
|
}
|
|
return tmpDelay
|
|
}
|
|
|
|
var errNoIPAvail = errors.New("no IP available")
|
|
|
|
func (s *Server) getRandomIP(resolver netResolver, url string, excludedAddresses map[string]struct{}) (string, error) {
|
|
host, port, err := net.SplitHostPort(url)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
// If already an IP, skip.
|
|
if net.ParseIP(host) != nil {
|
|
return url, nil
|
|
}
|
|
ips, err := resolver.LookupHost(context.Background(), host)
|
|
if err != nil {
|
|
return "", fmt.Errorf("lookup for host %q: %v", host, err)
|
|
}
|
|
if len(excludedAddresses) > 0 {
|
|
for i := 0; i < len(ips); i++ {
|
|
ip := ips[i]
|
|
addr := net.JoinHostPort(ip, port)
|
|
if _, excluded := excludedAddresses[addr]; excluded {
|
|
if len(ips) == 1 {
|
|
ips = nil
|
|
break
|
|
}
|
|
ips[i] = ips[len(ips)-1]
|
|
ips = ips[:len(ips)-1]
|
|
i--
|
|
}
|
|
}
|
|
if len(ips) == 0 {
|
|
return "", errNoIPAvail
|
|
}
|
|
}
|
|
var address string
|
|
if len(ips) == 0 {
|
|
s.Warnf("Unable to get IP for %s, will try with %s: %v", host, url, err)
|
|
address = url
|
|
} else {
|
|
var ip string
|
|
if len(ips) == 1 {
|
|
ip = ips[0]
|
|
} else {
|
|
ip = ips[rand.Int31n(int32(len(ips)))]
|
|
}
|
|
// add the port
|
|
address = net.JoinHostPort(ip, port)
|
|
}
|
|
return address, nil
|
|
}
|
|
|
|
// Returns true for the first attempt and depending on the nature
|
|
// of the attempt (first connect or a reconnect), when the number
|
|
// of attempts is equal to the configured report attempts.
|
|
func (s *Server) shouldReportConnectErr(firstConnect bool, attempts int) bool {
|
|
opts := s.getOpts()
|
|
if firstConnect {
|
|
if attempts == 1 || attempts%opts.ConnectErrorReports == 0 {
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
if attempts == 1 || attempts%opts.ReconnectErrorReports == 0 {
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
// Invoked for route, leaf and gateway connections. Set the very first
|
|
// PING to a lower interval to capture the initial RTT.
|
|
// After that the PING interval will be set to the user defined value.
|
|
// Client lock should be held.
|
|
func (s *Server) setFirstPingTimer(c *client) {
|
|
opts := s.getOpts()
|
|
d := opts.PingInterval
|
|
|
|
if !opts.DisableShortFirstPing {
|
|
if c.kind != CLIENT {
|
|
if d > firstPingInterval {
|
|
d = firstPingInterval
|
|
}
|
|
if c.kind == GATEWAY {
|
|
d = adjustPingIntervalForGateway(d)
|
|
}
|
|
} else if d > firstClientPingInterval {
|
|
d = firstClientPingInterval
|
|
}
|
|
}
|
|
// We randomize the first one by an offset up to 20%, e.g. 2m ~= max 24s.
|
|
addDelay := rand.Int63n(int64(d / 5))
|
|
d += time.Duration(addDelay)
|
|
c.ping.tmr = time.AfterFunc(d, c.processPingTimer)
|
|
}
|
|
|
|
func (s *Server) updateRemoteSubscription(acc *Account, sub *subscription, delta int32) {
|
|
s.updateRouteSubscriptionMap(acc, sub, delta)
|
|
if s.gateway.enabled {
|
|
s.gatewayUpdateSubInterest(acc.Name, sub, delta)
|
|
}
|
|
|
|
s.updateLeafNodes(acc, sub, delta)
|
|
}
|
|
|
|
func (s *Server) startIPQLogger() {
|
|
s.ipqLog = &srvIPQueueLogger{
|
|
ch: make(chan string, 128),
|
|
done: make(chan struct{}),
|
|
s: s,
|
|
}
|
|
s.startGoRoutine(s.ipqLog.run)
|
|
}
|
|
|
|
func (l *srvIPQueueLogger) stop() {
|
|
close(l.done)
|
|
}
|
|
|
|
func (l *srvIPQueueLogger) log(name string, pending int) {
|
|
select {
|
|
case l.ch <- fmt.Sprintf("%s queue pending size: %v", name, pending):
|
|
default:
|
|
}
|
|
}
|
|
|
|
func (l *srvIPQueueLogger) run() {
|
|
defer l.s.grWG.Done()
|
|
for {
|
|
select {
|
|
case w := <-l.ch:
|
|
l.s.Warnf("%s", w)
|
|
case <-l.done:
|
|
return
|
|
}
|
|
}
|
|
}
|