From b44dd491e8ace80d6233ab86d85ac47155f96c7d Mon Sep 17 00:00:00 2001 From: Colin Sullivan Date: Sat, 6 Feb 2021 11:15:16 -0700 Subject: [PATCH 1/2] Add lame duck mode documentation. Signed-off-by: Colin Sullivan --- SUMMARY.md | 1 + nats-server/nats_admin/README.md | 1 + nats-server/nats_admin/lame_duck_mode.md | 28 ++++++++++++++++++++++++ 3 files changed, 30 insertions(+) create mode 100644 nats-server/nats_admin/lame_duck_mode.md diff --git a/SUMMARY.md b/SUMMARY.md index 39a8193..ac9fbae 100644 --- a/SUMMARY.md +++ b/SUMMARY.md @@ -102,6 +102,7 @@ * [Upgrading a Cluster](nats-server/nats_admin/upgrading_cluster.md) * [Slow Consumers](nats-server/nats_admin/slow_consumers.md) * [Signals](nats-server/nats_admin/signals.md) + * [Lame Duck Mode](nats-server/nats_admin/lame_duck_mode.md) * [NATS and Docker](nats-server/nats_docker/README.md) * [Tutorial](nats-server/nats_docker/nats-docker-tutorial.md) * [Docker Swarm](nats-server/nats_docker/docker_swarm.md) diff --git a/nats-server/nats_admin/README.md b/nats-server/nats_admin/README.md index 4fc1a05..ef96055 100644 --- a/nats-server/nats_admin/README.md +++ b/nats-server/nats_admin/README.md @@ -8,4 +8,5 @@ Managing a NATS server is simple, typical lifecycle operations include: * Monitoring the server via: * The monitoring [endpoint](../configuration/monitoring.md) and tools like [nats-top](../../nats-tools/nats_top/) * By subscribing to [system events](../configuration/sys_accounts/) +* Gracefully shut down a server with [Lame Duck Mode](lame_duck_mode.md) diff --git a/nats-server/nats_admin/lame_duck_mode.md b/nats-server/nats_admin/lame_duck_mode.md new file mode 100644 index 0000000..dd9438b --- /dev/null +++ b/nats-server/nats_admin/lame_duck_mode.md @@ -0,0 +1,28 @@ +# Lame Duck Mode + +In production we recommend that a server is shut down with ​lame duck mode​ +as a graceful way to slowly evict clients. With large deployments this +mitigates the "thundering herd" situation that will place CPU pressure on +servers as TLS enabled clients reconnect. + +## Server + +Lame duck mode is initiated by signaling the server: + +```text +nats-server --signal ldm +``` + +After entering lame duck mode, the server will stop accepting new connections, +wait for a 10 second grace period, then begin to evict clients over a period of time +configurable by the [lame_duck_duration](https://docs.nats.io/nats-server/configuration#runtime-configuration) +configuration option. This period defaults to 2 minutes. + +## Clients + +When entering lame duck mode, the server will send a message to clients. Some +maintainer supported clients will invoke an optional callback indicating that +a server is entering lame duck mode. This is used for cases where an application +can benefit from preparing for the short outage between the time it is evicted and +automatically reconnected to another server. + From 9c95e9012c270651ac17aee159ecfe4e2bad1547 Mon Sep 17 00:00:00 2001 From: Matthias Hanel Date: Mon, 8 Feb 2021 18:18:30 -0500 Subject: [PATCH 2/2] [added] description for accountz endpoint Signed-off-by: Matthias Hanel --- nats-server/configuration/monitoring.md | 106 ++++++++++++++++++++++++ 1 file changed, 106 insertions(+) diff --git a/nats-server/configuration/monitoring.md b/nats-server/configuration/monitoring.md index 0416bad..8d40aa7 100644 --- a/nats-server/configuration/monitoring.md +++ b/nats-server/configuration/monitoring.md @@ -10,6 +10,7 @@ To monitor the NATS messaging system, `nats-server` provides a lightweight HTTP * [Gateways](monitoring.md#gateway-information) * [Leaf Nodes](monitoring.md#leaf-nodes-information) * [Subscription Routing](monitoring.md#subscription-routing-information) +* [Account Information](monitoring.md#account-information) All endpoints return a JSON object. @@ -520,6 +521,111 @@ The `/subsz` endpoint reports detailed information about the current subscriptio } ``` +### Account Information + +The `/accountz` endpoint reports information on a servers active accounts. +The default behavior is to return a list of all accounts known to the server. + +**Endpoint:** `http://server:port/accountz` + +| Result | Return Code | +| :--- | :--- | +| Success | 200 \(OK\) | +| Error | 400 \(Bad Request\) | + +#### Arguments + +| Argument | Values | Description | +| :--- | :--- | :--- | +| acc | account name | Include metrics for the specified account. Default is empty. When not set, a list of all accounts is included. | + +#### Example + +* Get list of all accounts: [http://demo.nats.io:8222/accountz](http://demo.nats.io:8222/accountz) +* Get details for specific account `$G`: [http://demo.nats.io:8222/accountz?acc=\$G](http://demo.nats.io:8222/accountz?acc=$G) + +#### Response + +Default behavior: + +```javascript +{ + "server_id": "NAB2EEQ3DLS2BHU4K2YMXMPIOOOAOFOAQAC5NQRIEUI4BHZKFBI4ZU4A", + "now": "2021-02-08T17:31:29.551146-05:00", + "system_account": "AAAXAUVSGK7TCRHFIRAS4SYXVJ76EWDMNXZM6ARFGXP7BASNDGLKU7A5", + "accounts": [ + "AAAXAUVSGK7TCRHFIRAS4SYXVJ76EWDMNXZM6ARFGXP7BASNDGLKU7A5", + "$G" + ] +} +``` + +Retrieve specific account: + +```javascript +{ + "server_id": "NAB2EEQ3DLS2BHU4K2YMXMPIOOOAOFOAQAC5NQRIEUI4BHZKFBI4ZU4A", + "now": "2021-02-08T17:37:55.80856-05:00", + "system_account": "AAAXAUVSGK7TCRHFIRAS4SYXVJ76EWDMNXZM6ARFGXP7BASNDGLKU7A5", + "account_detail": { + "account_name": "AAAXAUVSGK7TCRHFIRAS4SYXVJ76EWDMNXZM6ARFGXP7BASNDGLKU7A5", + "update_time": "2021-02-08T17:31:22.390334-05:00", + "is_system": true, + "expired": false, + "complete": true, + "jetstream_enabled": false, + "leafnode_connections": 0, + "client_connections": 0, + "subscriptions": 42, + "exports": [ + { + "subject": "$SYS.DEBUG.SUBSCRIBERS", + "type": "service", + "response_type": "Singleton" + } + ], + "jwt": "eyJ0eXAiOiJqd3QiLCJhbGciOiJlZDI1NTE5In0.eyJqdGkiOiJVVlU2VEpXRU8zS0hYWTZVMkgzM0RCVklET1A3U05DTkJPMlM0M1dPNUM2T1RTTDNVSUxBIiwiaWF0IjoxNjAzNDczNzg4LCJpc3MiOiJPQlU1TzVGSjMyNFVEUFJCSVZSR0Y3Q05FT0hHTFBTN0VZUEJUVlFaS1NCSElJWklCNkhENjZKRiIsIm5hbWUiOiJTWVMiLCJzdWIiOiJBQUFYQVVWU0dLN1RDUkhGSVJBUzRTWVhWSjc2RVdETU5YWk02QVJGR1hQN0JBU05ER0xLVTdBNSIsInR5cGUiOiJhY2NvdW50IiwibmF0cyI6eyJsaW1pdHMiOnsic3VicyI6LTEsImNvbm4iOi0xLCJsZWFmIjotMSwiaW1wb3J0cyI6LTEsImV4cG9ydHMiOi0xLCJkYXRhIjotMSwicGF5bG9hZCI6LTEsIndpbGRjYXJkcyI6dHJ1ZX19fQ.CeGo16i5oD0b1uBJ8UdGmLH-l9dL8yNqXHggkAt2T5c88fM7k4G08wLguMAnlvzrdlYvdZvOx_5tHLuDZmGgCg", + "issuer_key": "OBU5O5FJ324UDPRBIVRGF7CNEOHGLPS7EYPBTVQZKSBHIIZIB6HD66JF", + "name_tag": "SYS", + "decoded_jwt": { + "jti": "UVU6TJWEO3KHXY6U2H33DBVIDOP7SNCNBO2S43WO5C6OTSL3UILA", + "iat": 1603473788, + "iss": "OBU5O5FJ324UDPRBIVRGF7CNEOHGLPS7EYPBTVQZKSBHIIZIB6HD66JF", + "name": "SYS", + "sub": "AAAXAUVSGK7TCRHFIRAS4SYXVJ76EWDMNXZM6ARFGXP7BASNDGLKU7A5", + "nats": { + "limits": { + "subs": -1, + "data": -1, + "payload": -1, + "imports": -1, + "exports": -1, + "wildcards": true, + "conn": -1, + "leaf": -1 + }, + "default_permissions": { + "pub": {}, + "sub": {} + }, + "type": "account", + "version": 1 + } + }, + "sublist_stats": { + "num_subscriptions": 42, + "num_cache": 6, + "num_inserts": 42, + "num_removes": 0, + "num_matches": 6, + "cache_hit_rate": 0, + "max_fanout": 1, + "avg_fanout": 0.8333333333333334 + } + } +} +``` + ## Creating Monitoring Applications NATS monitoring endpoints support [JSONP](https://en.wikipedia.org/wiki/JSONP) and [CORS](https://en.wikipedia.org/wiki/Cross-origin_resource_sharing#How_CORS_works). You can easily create single page web applications for monitoring. To do this you simply pass the `callback` query parameter to any endpoint.