diff --git a/docs/configuration.md b/docs/configuration.md index e63db6b2d..2735e4e4a 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -21,67 +21,75 @@ Property **envoy-control.server.snapshot-cleanup.collection-interval-millis** | How often the collection background action should run | 10s ## Snapshot properties -Property | Description | Default value --------------------------------------------------------------------------------------------------------------| --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------- -**envoy-control.envoy.snapshot.dynamic-listeners.enabled** | Enable or disable creating listeners using dynamic configuration | true -**envoy-control.envoy.snapshot.dynamic-listeners.http-filters.access-log.time-format** | Time format for access logs | "%START_TIME(%FT%T.%3fZ)%" -**envoy-control.envoy.snapshot.dynamic-listeners.http-filters.access-log.message-format** | Message format for access logs | "%PROTOCOL% %REQ(:METHOD)% %REQ(:authority)% %REQ(:PATH)% %DOWNSTREAM_REMOTE_ADDRESS% -> %UPSTREAM_HOST%" -**envoy-control.envoy.snapshot.dynamic-listeners.http-filters.access-log.level** | Logging level for access logs | "TRACE" -**envoy-control.envoy.snapshot.dynamic-listeners.http-filters.access-log.logger** | Logger name for access logs | "envoy.AccessLog" -**envoy-control.envoy.snapshot.dynamic-listeners.http-filters.access-log.custom-fields** | Custom fields, which should be included in access logs | "empty map()" -**envoy-control.envoy.snapshot.dynamic-listeners.http-filters.ingress-xff-num-trusted-hops** | Number of trusted hops for ingress filter (refer to [envoy docs](https://www.envoyproxy.io/docs/envoy/latest/configuration/http/http_conn_man/headers.html?highlight=xff_num_trusted_hops#x-forwarded-for)) | 1 -**envoy-control.envoy.snapshot.dynamic-listeners.local-reply-mapper.enabled** | Enable or disable creating local reply mapper configuration (refer to [envoy docs](https://www.envoyproxy.io/docs/envoy/latest/configuration/http/http_conn_man/local_reply)) | false -**envoy-control.envoy.snapshot.dynamic-listeners.local-reply-mapper.response-format.text-format** | Text message format with placeholders (refer to [envoy docs](https://www.envoyproxy.io/docs/envoy/latest/configuration/observability/access_log/usage#command-operators)) | "" -**envoy-control.envoy.snapshot.dynamic-listeners.local-reply-mapper.response-format.json-format** | JSON message format with placeholders for matched response (refer to [envoy docs](https://www.envoyproxy.io/docs/envoy/latest/configuration/observability/access_log/usage#command-operators)). | "" -**envoy-control.envoy.snapshot.dynamic-listeners.local-reply-mapper.response-format.content-type** | Response content-type header value | "" -**envoy-control.envoy.snapshot.dynamic-listeners.local-reply-mapper.matchers.status-code-matcher** | Matcher which handles specific status codes formatted as string e.g.: EQ:400 - equal to status code 400 | "" -**envoy-control.envoy.snapshot.dynamic-listeners.local-reply-mapper.matchers.header-matcher.name** | Header name to match | "" -**envoy-control.envoy.snapshot.dynamic-listeners.local-reply-mapper.matchers.header-matcher.exact-match** | Header value to match for specified header (only one of: exactMatch, regexMatch can be specified. If none is specified, header name presence matcher will be used) | "" -**envoy-control.envoy.snapshot.dynamic-listeners.local-reply-mapper.matchers.header-matcher.regex-match** | Header value regex to match for specified header (only one of: exactMatch, regexMatch can be specified. If none is specified, header name presence matcher will be used) | "" -**envoy-control.envoy.snapshot.dynamic-listeners.local-reply-mapper.matchers.response-flag-matcher** | Response flags to match (refer to [envoy docs](https://www.envoyproxy.io/docs/envoy/latest/configuration/observability/access_log/usage#command-operators)) | empty list -**envoy-control.envoy.snapshot.dynamic-listeners.local-reply-mapper.matchers.status-code-to-return** | Status code to return for matched response | 0 (disabled) -**envoy-control.envoy.snapshot.dynamic-listeners.local-reply-mapper.matchers.body-to-return** | Response message to return for matched response | "" -**envoy-control.envoy.snapshot.dynamic-listeners.local-reply-mapper.matchers.response-format.text-format** | Text message format with placeholders for matched response | "" -**envoy-control.envoy.snapshot.dynamic-listeners.local-reply-mapper.matchers.response-format.json-format** | JSON message format with placeholders for matched response | empty map -**envoy-control.envoy.snapshot.dynamic-listeners.local-reply-mapper.matchers.response-format.content-type** | Response content-type header value | "" -**envoy-control.envoy.snapshot.eds-connection-timeout** | Connection timeout for EDS clusters | 2s -**envoy-control.envoy.snapshot.egress.common-http.idle-timeout** | Set idle timeout for all HTTP connections (HTTP/1 and HTTP/2) | 120s -**envoy-control.envoy.snapshot.egress.common-http.request-timeout** | Set request timeout for all routes (HTTP/1 and HTTP/2) | 120s -**envoy-control.envoy.snapshot.egress.common-http.circuit-breakers.high-threshold.max-connections** | The maximum number of connections that Envoy will make to the upstream cluster for high priority threshold. | 1024 -**envoy-control.envoy.snapshot.egress.common-http.circuit-breakers.high-threshold.max-pending-requests** | The maximum number of pending requests that Envoy will allow to the upstream cluster for high priority threshold. | 1024 -**envoy-control.envoy.snapshot.egress.common-http.circuit-breakers.high-threshold.max-requests** | The maximum number of parallel requests that Envoy will make to the upstream cluster for high priority threshold. | 1024 -**envoy-control.envoy.snapshot.egress.common-http.circuit-breakers.high-threshold.max-retries** | The maximum number of parallel retries that Envoy will allow to the upstream cluster for high priority threshold. | 3 -**envoy-control.envoy.snapshot.egress.common-http.circuit-breakers.default-threshold.max-connections** | The maximum number of connections that Envoy will make to the upstream cluster for default priority threshold. | 1024 -**envoy-control.envoy.snapshot.egress.common-http.circuit-breakers.default-threshold.max-pending-requests** | The maximum number of pending requests that Envoy will allow to the upstream cluster for default priority threshold. | 1024 -**envoy-control.envoy.snapshot.egress.common-http.circuit-breakers.default-threshold.max-requests** | The maximum number of parallel requests that Envoy will make to the upstream cluster for default priority threshold. | 1024 -**envoy-control.envoy.snapshot.egress.common-http.circuit-breakers.default-threshold.max-retries** | The maximum number of parallel retries that Envoy will allow to the upstream cluster for default priority threshold. | 3 -**envoy-control.envoy.snapshot.egress.never-remove-clusters** | Don't remove cluster, when corresponding service disappears from services source. Only remove all instances. | true -**envoy-control.envoy.snapshot.egress.cluster-not-found-status-code** | Status code when cluster is not found | 503 -**envoy-control.envoy.snapshot.egress.http2.enabled** | Enable http2 for clusters that use envoy | true -**envoy-control.envoy.snapshot.egress.http2.tag-name** | Tag to be used to identify if instance uses envoy | envoy -**envoy-control.envoy.snapshot.egress.handle-internal-redirect** | Handle redirects by Envoy | false -**envoy-control.envoy.snapshot.egress.host-header-rewriting.enabled** | Enable rewriting Host header with value from specified header | false -**envoy-control.envoy.snapshot.egress.host-header-rewriting.custom-host-header** | Header name which value will override Host header | "x-envoy-original-host" -**envoy-control.envoy.snapshot.egress.headers-to-remove** | List of headers to sanitize on egress | empty list -**envoy-control.envoy.snapshot.egress.domains** | List of domains added to service names for matching. Domain name has to start with '.' ( e.g.: .domain) | empty list -**envoy-control.envoy.snapshot.ingress.headers-to-remove** | List of headers to sanitize on ingress | empty list -**envoy-control.envoy.snapshot.local-service.idle-timeout** | Idle timeout between client to envoy | 60s -**envoy-control.envoy.snapshot.local-service.response-timeout** | Response timeout for localService | 15s -**envoy-control.envoy.snapshot.local-service.connection-idle-timeout** | Connection idle timeout for localService | 120s -**envoy-control.envoy.snapshot.routes.status.enabled** | Enable status route | false -**envoy-control.envoy.snapshot.routes.status.endpoints** | List of endpoints with path or prefix of status routes | /status -**envoy-control.envoy.snapshot.routes.status.create-virtual-cluster** | Create virtual cluster for status route | false -**envoy-control.envoy.snapshot.state-sample-duration** | Duration of state sampling (this is used to prevent surges in consul events overloading control plane) | 1s -**envoy-control.envoy.snapshot.xds-cluster-name** | Name of cluster for xDS operations | envoy-control-xds -**envoy-control.envoy.snapshot.enabled-communication-modes.ads** | Enable or disable support for ADS communication mode | true -**envoy-control.envoy.snapshot.enabled-communication-modes.xds** | Enable or disable support for XDS communication mode | true -**envoy-control.envoy.snapshot.should-send-missing-endpoints** | Enable sending missing Endpoints - when Envoy requests for not existing cluster in snapshot control-plane will respond with empty Endpoint definition | false -**envoy-control.envoy.snapshot.cluster-name** | Dynamic forward proxy cluster name | dynamic_forward_proxy_cluster -**envoy-control.envoy.snapshot.dns-lookup-family** | DNS lookup address family | V4_ONLY -**envoy-control.envoy.snapshot.max-cached-hosts** | The maximum number of hosts that the cache will hold | 1024 -**envoy-control.envoy.snapshot.max-host-ttl** | The TTL for hosts that are unused. Hosts that have not been used in the configured time interval will be purged | 300s -**envoy-control.envoy.snapshot.rate-limit.domain** | Domain name for ratelimit service. | rl -**envoy-control.envoy.snapshot.rate-limit.service-name** | ratelimit GRPC service name | ratelimit-grpc +Property | Description | Default value +----------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------- +**envoy-control.envoy.snapshot.dynamic-listeners.enabled** | Enable or disable creating listeners using dynamic configuration | true +**envoy-control.envoy.snapshot.dynamic-listeners.http-filters.access-log.time-format** | Time format for access logs | "%START_TIME(%FT%T.%3fZ)%" +**envoy-control.envoy.snapshot.dynamic-listeners.http-filters.access-log.message-format** | Message format for access logs | "%PROTOCOL% %REQ(:METHOD)% %REQ(:authority)% %REQ(:PATH)% %DOWNSTREAM_REMOTE_ADDRESS% -> %UPSTREAM_HOST%" +**envoy-control.envoy.snapshot.dynamic-listeners.http-filters.access-log.level** | Logging level for access logs | "TRACE" +**envoy-control.envoy.snapshot.dynamic-listeners.http-filters.access-log.logger** | Logger name for access logs | "envoy.AccessLog" +**envoy-control.envoy.snapshot.dynamic-listeners.http-filters.access-log.custom-fields** | Custom fields, which should be included in access logs | "empty map()" +**envoy-control.envoy.snapshot.dynamic-listeners.http-filters.ingress-xff-num-trusted-hops** | Number of trusted hops for ingress filter (refer to [envoy docs](https://www.envoyproxy.io/docs/envoy/latest/configuration/http/http_conn_man/headers.html?highlight=xff_num_trusted_hops#x-forwarded-for)) | 1 +**envoy-control.envoy.snapshot.dynamic-listeners.local-reply-mapper.enabled** | Enable or disable creating local reply mapper configuration (refer to [envoy docs](https://www.envoyproxy.io/docs/envoy/latest/configuration/http/http_conn_man/local_reply)) | false +**envoy-control.envoy.snapshot.dynamic-listeners.local-reply-mapper.response-format.text-format** | Text message format with placeholders (refer to [envoy docs](https://www.envoyproxy.io/docs/envoy/latest/configuration/observability/access_log/usage#command-operators)) | "" +**envoy-control.envoy.snapshot.dynamic-listeners.local-reply-mapper.response-format.json-format** | JSON message format with placeholders for matched response (refer to [envoy docs](https://www.envoyproxy.io/docs/envoy/latest/configuration/observability/access_log/usage#command-operators)). | "" +**envoy-control.envoy.snapshot.dynamic-listeners.local-reply-mapper.response-format.content-type** | Response content-type header value | "" +**envoy-control.envoy.snapshot.dynamic-listeners.local-reply-mapper.matchers.status-code-matcher** | Matcher which handles specific status codes formatted as string e.g.: EQ:400 - equal to status code 400 | "" +**envoy-control.envoy.snapshot.dynamic-listeners.local-reply-mapper.matchers.header-matcher.name** | Header name to match | "" +**envoy-control.envoy.snapshot.dynamic-listeners.local-reply-mapper.matchers.header-matcher.exact-match** | Header value to match for specified header (only one of: exactMatch, regexMatch can be specified. If none is specified, header name presence matcher will be used) | "" +**envoy-control.envoy.snapshot.dynamic-listeners.local-reply-mapper.matchers.header-matcher.regex-match** | Header value regex to match for specified header (only one of: exactMatch, regexMatch can be specified. If none is specified, header name presence matcher will be used) | "" +**envoy-control.envoy.snapshot.dynamic-listeners.local-reply-mapper.matchers.response-flag-matcher** | Response flags to match (refer to [envoy docs](https://www.envoyproxy.io/docs/envoy/latest/configuration/observability/access_log/usage#command-operators)) | empty list +**envoy-control.envoy.snapshot.dynamic-listeners.local-reply-mapper.matchers.status-code-to-return** | Status code to return for matched response | 0 (disabled) +**envoy-control.envoy.snapshot.dynamic-listeners.local-reply-mapper.matchers.body-to-return** | Response message to return for matched response | "" +**envoy-control.envoy.snapshot.dynamic-listeners.local-reply-mapper.matchers.response-format.text-format** | Text message format with placeholders for matched response | "" +**envoy-control.envoy.snapshot.dynamic-listeners.local-reply-mapper.matchers.response-format.json-format** | JSON message format with placeholders for matched response | empty map +**envoy-control.envoy.snapshot.dynamic-listeners.local-reply-mapper.matchers.response-format.content-type** | Response content-type header value | "" +**envoy-control.envoy.snapshot.eds-connection-timeout** | Connection timeout for EDS clusters | 2s +**envoy-control.envoy.snapshot.egress.common-http.idle-timeout** | Set idle timeout for all HTTP connections (HTTP/1 and HTTP/2) | 120s +**envoy-control.envoy.snapshot.egress.common-http.request-timeout** | Set request timeout for all routes (HTTP/1 and HTTP/2) | 120s +**envoy-control.envoy.snapshot.egress.common-http.circuit-breakers.high-threshold.max-connections** | The maximum number of connections that Envoy will make to the upstream cluster for high priority threshold. | 1024 +**envoy-control.envoy.snapshot.egress.common-http.circuit-breakers.high-threshold.max-connection-pools** | The maximum number of connections pools that Envoy will make to the upstream cluster for high priority threshold. | null (unlimited) +**envoy-control.envoy.snapshot.egress.common-http.circuit-breakers.high-threshold.track-remaining** | If track-remaining is true, then stats will be (for high priority threshold) published that expose the number of resources remaining until the circuit breakers open. | false +**envoy-control.envoy.snapshot.egress.common-http.circuit-breakers.high-threshold.max-pending-requests** | The maximum number of pending requests that Envoy will allow to the upstream cluster for high priority threshold. | 1024 +**envoy-control.envoy.snapshot.egress.common-http.circuit-breakers.high-threshold.max-requests** | The maximum number of parallel requests that Envoy will make to the upstream cluster for high priority threshold. | 1024 +**envoy-control.envoy.snapshot.egress.common-http.circuit-breakers.high-threshold.max-retries** | The maximum number of parallel retries that Envoy will allow to the upstream cluster for high priority threshold. (it is overriden by retry budget if it is not null) | 3 +**envoy-control.envoy.snapshot.egress.common-http.circuit-breakers.high-threshold.retry-budget.budget-percent** | Specifies the limit on concurrent retries as a percentage of the sum of active requests and active pending requests for high priority threshold. | 20.0 +**envoy-control.envoy.snapshot.egress.common-http.circuit-breakers.high-threshold.retry-budget.min-retry-concurrency** | The limit on the number of active retries may never go below this number for high priority threshold | 3 +**envoy-control.envoy.snapshot.egress.common-http.circuit-breakers.default-threshold.max-connections** | The maximum number of connections that Envoy will make to the upstream cluster for default priority threshold. | 1024 +**envoy-control.envoy.snapshot.egress.common-http.circuit-breakers.default-threshold.max-connection-pools** | The maximum number of connections pools that Envoy will make to the upstream cluster for default priority threshold. | null (unlimited) +**envoy-control.envoy.snapshot.egress.common-http.circuit-breakers.default-threshold.track-remaining** | If track-remaining is true, then stats will be (for default priority threshold) published that expose the number of resources remaining until the circuit breakers open. | false +**envoy-control.envoy.snapshot.egress.common-http.circuit-breakers.default-threshold.max-pending-requests** | The maximum number of pending requests that Envoy will allow to the upstream cluster for default priority threshold. | 1024 +**envoy-control.envoy.snapshot.egress.common-http.circuit-breakers.default-threshold.max-requests** | The maximum number of parallel requests that Envoy will make to the upstream cluster for default priority threshold. | 1024 +**envoy-control.envoy.snapshot.egress.common-http.circuit-breakers.default-threshold.max-retries** | The maximum number of parallel retries that Envoy will allow to the upstream cluster for default priority threshold. (it is overriden by retry budget if it is not null) | 3 +**envoy-control.envoy.snapshot.egress.common-http.circuit-breakers.default-threshold.retry-budget.budget-percent** | Specifies the limit on concurrent retries as a percentage of the sum of active requests and active pending requests for default priority threshold. | 20.0 +**envoy-control.envoy.snapshot.egress.common-http.circuit-breakers.default-threshold.retry-budget.min-retry-concurrency** | The limit on the number of active retries may never go below this number for default priority threshold | 3 +**envoy-control.envoy.snapshot.egress.never-remove-clusters** | Don't remove cluster, when corresponding service disappears from services source. Only remove all instances. | true +**envoy-control.envoy.snapshot.egress.cluster-not-found-status-code** | Status code when cluster is not found | 503 +**envoy-control.envoy.snapshot.egress.http2.enabled** | Enable http2 for clusters that use envoy | true +**envoy-control.envoy.snapshot.egress.http2.tag-name** | Tag to be used to identify if instance uses envoy | envoy +**envoy-control.envoy.snapshot.egress.handle-internal-redirect** | Handle redirects by Envoy | false +**envoy-control.envoy.snapshot.egress.host-header-rewriting.enabled** | Enable rewriting Host header with value from specified header | false +**envoy-control.envoy.snapshot.egress.host-header-rewriting.custom-host-header** | Header name which value will override Host header | "x-envoy-original-host" +**envoy-control.envoy.snapshot.egress.headers-to-remove** | List of headers to sanitize on egress | empty list +**envoy-control.envoy.snapshot.egress.domains** | List of domains added to service names for matching. Domain name has to start with '.' ( e.g.: .domain) | empty list +**envoy-control.envoy.snapshot.ingress.headers-to-remove** | List of headers to sanitize on ingress | empty list +**envoy-control.envoy.snapshot.local-service.idle-timeout** | Idle timeout between client to envoy | 60s +**envoy-control.envoy.snapshot.local-service.response-timeout** | Response timeout for localService | 15s +**envoy-control.envoy.snapshot.local-service.connection-idle-timeout** | Connection idle timeout for localService | 120s +**envoy-control.envoy.snapshot.routes.status.enabled** | Enable status route | false +**envoy-control.envoy.snapshot.routes.status.endpoints** | List of endpoints with path or prefix of status routes | /status +**envoy-control.envoy.snapshot.routes.status.create-virtual-cluster** | Create virtual cluster for status route | false +**envoy-control.envoy.snapshot.state-sample-duration** | Duration of state sampling (this is used to prevent surges in consul events overloading control plane) | 1s +**envoy-control.envoy.snapshot.xds-cluster-name** | Name of cluster for xDS operations | envoy-control-xds +**envoy-control.envoy.snapshot.enabled-communication-modes.ads** | Enable or disable support for ADS communication mode | true +**envoy-control.envoy.snapshot.enabled-communication-modes.xds** | Enable or disable support for XDS communication mode | true +**envoy-control.envoy.snapshot.should-send-missing-endpoints** | Enable sending missing Endpoints - when Envoy requests for not existing cluster in snapshot control-plane will respond with empty Endpoint definition | false +**envoy-control.envoy.snapshot.cluster-name** | Dynamic forward proxy cluster name | dynamic_forward_proxy_cluster +**envoy-control.envoy.snapshot.dns-lookup-family** | DNS lookup address family | V4_ONLY +**envoy-control.envoy.snapshot.max-cached-hosts** | The maximum number of hosts that the cache will hold | 1024 +**envoy-control.envoy.snapshot.max-host-ttl** | The TTL for hosts that are unused. Hosts that have not been used in the configured time interval will be purged | 300s +**envoy-control.envoy.snapshot.rate-limit.domain** | Domain name for ratelimit service. | rl +**envoy-control.envoy.snapshot.rate-limit.service-name** | ratelimit GRPC service name | ratelimit-grpc ## Permissions @@ -171,13 +179,13 @@ Where `` is one of the following: * `default` - default retry policy, applied for every request that doesn't match more specific selector ### Outgoing traffic -Property | Description | Default value ---------------------------------------------------------------------------------------------------------| ----------------------------------------------------------------------------------------------------------------------------- | --------- -**envoy-control.envoy.snapshot.retryPolicy.numberOfRetries** | Number of retries | 1 -**envoy-control.envoy.snapshot.retryPolicy.hostSelectionRetryMaxAttempts** | The maximum number of times host selection will be reattempted before request being routed to last selected host | 3 -**envoy-control.envoy.snapshot.retryPolicy.retryHostPredicate** | Specifies a collection of RetryHostPredicates that will be consulted when selecting a host for retries | a list with one entry "envoy.retry_host_predicates.previous_hosts" -**envoy-control.envoy.snapshot.retryPolicy.retryBackOff.baseInterval** | Specifies parameters that control exponential retry back off base interval | 25ms -**envoy-control.envoy.snapshot.retryPolicy.retryBackOff.maxInterval** | Specifies parameters that control exponential retry back off max interval | 10 times base interval +Property | Description | Default value +-------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------- | --------- +**envoy-control.envoy.snapshot.egress.retryPolicy.numberOfRetries** | Number of retries | 1 +**envoy-control.envoy.snapshot.egress.retryPolicy.hostSelectionRetryMaxAttempts** | The maximum number of times host selection will be reattempted before request being routed to last selected host | 3 +**envoy-control.envoy.snapshot.egress.retryPolicy.retryHostPredicate** | Specifies a collection of RetryHostPredicates that will be consulted when selecting a host for retries | a list with one entry "envoy.retry_host_predicates.previous_hosts" +**envoy-control.envoy.snapshot.egress.retryPolicy.retryBackOff.baseInterval** | Specifies parameters that control exponential retry back off base interval | 25ms +**envoy-control.envoy.snapshot.egress.retryPolicy.retryBackOff.maxInterval** | Specifies parameters that control exponential retry back off max interval | 10 times base interval ## Metrics Property | Description | Default value diff --git a/docs/ec_vs_other_software.md b/docs/ec_vs_other_software.md index cd05186cc..f33608ee8 100644 --- a/docs/ec_vs_other_software.md +++ b/docs/ec_vs_other_software.md @@ -16,7 +16,7 @@ this job, because of the JRE runtime. This means higher memory footprint and lat Linkerd v2 was rewritten in Rust to get better performance. Unfortunately, just like Istio - it's Kubernetes only. ### Consul Connect -[Consul Connect](https://www.consul.io/docs/connect/index.html) is a simple way to deploy Envoy to current +[Consul Connect](https://www.consul.io/docs/connect) is a simple way to deploy Envoy to current Consul based infrastructure. The problem with Consul Connect is that versions prior to 1.6.0 had very limited traffic control capabilities. We want to have a fallback to instances from other DCs, canary deployment and other features specific to our @@ -35,4 +35,4 @@ Control Plane implementation on. They're not a sufficient Control Plane by thems Discovery Service. Envoy Control is based on Java Control Plane and integrates with Consul by default. It also adds features like -Cross DC Synchronization or Permission management. \ No newline at end of file +Cross DC Synchronization or Permission management. diff --git a/docs/features/multi_dc_support.md b/docs/features/multi_dc_support.md index 54f5439ca..2d9e05e43 100644 --- a/docs/features/multi_dc_support.md +++ b/docs/features/multi_dc_support.md @@ -47,7 +47,7 @@ The state is available in `LocalClusterStateChanges#latestServiceState`. Then build a `RemoteServices` class providing: -* [AsyncControlPlaneClient](https://github.com/allegro/envoy-control/blob/master/envoy-control-runner/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/synchronization/AsyncRestTemplateControlPlaneClient.kt) - an HTTP client +* [AsyncControlPlaneClient](https://github.com/allegro/envoy-control/blob/master/envoy-control-runner/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/synchronization/RestTemplateControlPlaneClient.kt) - an HTTP client * [ControlPlaneInstanceFetcher](https://github.com/allegro/envoy-control/blob/master/envoy-control-source-consul/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/consul/synchronization/SimpleConsulInstanceFetcher.kt) - the strategy of retrieving other Envoy Control from given cluster * `remoteClusters` - list of remote clusters diff --git a/docs/integrations/consul.md b/docs/integrations/consul.md index bb648fbf0..4352293fe 100644 --- a/docs/integrations/consul.md +++ b/docs/integrations/consul.md @@ -7,7 +7,7 @@ first-class integration with Consul. Popular Service Mesh solutions provide integration with Consul by polling periodically the state of all services. Assuming we polled the state each second in order to minimize change propagation latency, we would have to send a request -for a [list of services](https://www.consul.io/api/catalog.html#list-services) and then a +for a [list of services](https://www.consul.io/api/catalog#list-services) and then a [request per each service](https://www.consul.io/api/catalog.html#list-nodes-for-service). With 1,000 services, this would generate 1,000 rps per one instance of Control Plane. diff --git a/envoy-control-core/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/groups/NodeMetadata.kt b/envoy-control-core/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/groups/NodeMetadata.kt index 8be9ed910..c2fd3621d 100644 --- a/envoy-control-core/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/groups/NodeMetadata.kt +++ b/envoy-control-core/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/groups/NodeMetadata.kt @@ -6,6 +6,8 @@ import com.google.protobuf.Value import com.google.protobuf.util.Durations import io.envoyproxy.controlplane.server.exception.RequestException import io.grpc.Status +import pl.allegro.tech.servicemesh.envoycontrol.snapshot.CircuitBreakerProperties +import pl.allegro.tech.servicemesh.envoycontrol.snapshot.EgressProperties import pl.allegro.tech.servicemesh.envoycontrol.snapshot.SnapshotProperties import pl.allegro.tech.servicemesh.envoycontrol.snapshot.resource.listeners.util.StatusCodeFilterParser import pl.allegro.tech.servicemesh.envoycontrol.snapshot.resource.listeners.util.StatusCodeFilterSettings @@ -77,20 +79,7 @@ fun Value?.toOutgoing(properties: SnapshotProperties): Outgoing { val allServiceDependenciesIdentifier = properties.outgoingPermissions.allServicesDependencies.identifier val rawDependencies = this?.field("dependencies")?.list().orEmpty().map(::toRawDependency) val allServicesDependencies = toAllServiceDependencies(rawDependencies, allServiceDependenciesIdentifier) - val defaultSettingsFromProperties = DependencySettings( - handleInternalRedirect = properties.egress.handleInternalRedirect, - timeoutPolicy = Outgoing.TimeoutPolicy( - idleTimeout = Durations.fromMillis(properties.egress.commonHttp.idleTimeout.toMillis()), - connectionIdleTimeout = Durations.fromMillis(properties.egress.commonHttp.connectionIdleTimeout.toMillis()), - requestTimeout = Durations.fromMillis(properties.egress.commonHttp.requestTimeout.toMillis()) - ), - retryPolicy = RetryPolicy( - numberRetries = properties.retryPolicy.numberOfRetries, - retryHostPredicate = properties.retryPolicy.retryHostPredicate, - hostSelectionRetryMaxAttempts = properties.retryPolicy.hostSelectionRetryMaxAttempts, - retryBackOff = properties.retryPolicy.retryBackOff - ) - ) + val defaultSettingsFromProperties = createDefaultDependencySettingFromEgressProperties(properties.egress) val allServicesDefaultSettings = allServicesDependencies?.value.toSettings(defaultSettingsFromProperties) val services = rawDependencies.filter { it.service != null && it.service != allServiceDependenciesIdentifier } .map { @@ -114,6 +103,47 @@ fun Value?.toOutgoing(properties: SnapshotProperties): Outgoing { ) } +private fun createDefaultDependencySettingFromEgressProperties(egress: EgressProperties) : DependencySettings { + return DependencySettings( + handleInternalRedirect = egress.handleInternalRedirect, + timeoutPolicy = egress.commonHttp.let { + Outgoing.TimeoutPolicy( + idleTimeout = Durations.fromMillis(it.idleTimeout.toMillis()), + connectionIdleTimeout = Durations.fromMillis(it.connectionIdleTimeout.toMillis()), + requestTimeout = Durations.fromMillis(it.requestTimeout.toMillis()) + ) + }, + retryPolicy = egress.retryPolicy.let { RetryPolicy( + numberRetries = it.numberOfRetries, + retryHostPredicate = it.retryHostPredicate, + hostSelectionRetryMaxAttempts = it.hostSelectionRetryMaxAttempts, + retryBackOff = it.retryBackOff + ) }, + circuitBreakers = egress.commonHttp.circuitBreakers.let { properties -> + CircuitBreakers(defaultThreshold = properties.defaultThreshold.toCircuitBreaker(), + highThreshold = properties.highThreshold.toCircuitBreaker()) + } + ) +} + +fun CircuitBreakerProperties.toCircuitBreaker(): CircuitBreaker { + return CircuitBreaker( + priority = this.priority, + maxRequests = this.maxRequests, + maxPendingRequests = this.maxPendingRequests, + maxConnections = this.maxConnections, + maxRetries = this.maxRetries, + maxConnectionPools = this.maxConnectionPools, + trackRemaining = this.trackRemaining, + retryBudget = this.retryBudget?.let { + RetryBudget( + budgetPercent = it.budgetPercent, + minRetryConcurrency = it.minRetryConcurrency + ) + } + ) +} + @Suppress("ComplexCondition") private fun toRawDependency(it: Value): RawDependency { val service = it.field("service")?.stringValue @@ -193,11 +223,13 @@ private fun Value?.toSettings(defaultSettings: DependencySettings): DependencySe defaultSettings.retryPolicy ) } + val circuitBreakers = this?.field("circuitBreakers")?.toCircuitBreakers(defaultSettings.circuitBreakers) val shouldAllBeDefault = handleInternalRedirect == null && rewriteHostHeader == null && timeoutPolicy == null && - retryPolicy == null + retryPolicy == null && + circuitBreakers == null return if (shouldAllBeDefault) { defaultSettings @@ -206,11 +238,44 @@ private fun Value?.toSettings(defaultSettings: DependencySettings): DependencySe handleInternalRedirect = handleInternalRedirect ?: defaultSettings.handleInternalRedirect, timeoutPolicy = timeoutPolicy ?: defaultSettings.timeoutPolicy, rewriteHostHeader = rewriteHostHeader ?: defaultSettings.rewriteHostHeader, - retryPolicy = retryPolicy ?: defaultSettings.retryPolicy + retryPolicy = retryPolicy ?: defaultSettings.retryPolicy, + circuitBreakers = circuitBreakers ?: defaultSettings.circuitBreakers ) } } +private fun Value?.toCircuitBreakers(defaultCircuitBreakers: CircuitBreakers): CircuitBreakers { + return CircuitBreakers( + defaultThreshold = this?.field("defaultThreshold")?.toCircuitBreaker(defaultCircuitBreakers.defaultThreshold) + ?: defaultCircuitBreakers.defaultThreshold, + highThreshold = this?.field("highThreshold")?.toCircuitBreaker(defaultCircuitBreakers.highThreshold) + ?: defaultCircuitBreakers.highThreshold + ) +} + +private fun Value?.toCircuitBreaker(defaultCircuitBreaker: CircuitBreaker?): CircuitBreaker { + return CircuitBreaker(priority = this?.field("priority")?.stringValue?.let { RoutingPriority.fromString(it) } + ?: defaultCircuitBreaker?.priority, + maxRequests = this?.field("maxRequests")?.numberValue?.toInt() ?: defaultCircuitBreaker?.maxRequests, + maxPendingRequests = this?.field("maxPendingRequests")?.numberValue?.toInt() + ?: defaultCircuitBreaker?.maxPendingRequests, + maxConnections = this?.field("maxConnections")?.numberValue?.toInt() ?: defaultCircuitBreaker?.maxConnections, + maxRetries = this?.field("maxRetries")?.numberValue?.toInt() ?: defaultCircuitBreaker?.maxRetries, + maxConnectionPools = this?.field("maxConnectionPools")?.numberValue?.toInt() + ?: defaultCircuitBreaker?.maxConnectionPools, + trackRemaining = this?.field("trackRemaining")?.boolValue ?: defaultCircuitBreaker?.trackRemaining, + retryBudget = this?.field("retryBudget")?.toRetryBudget(defaultCircuitBreaker?.retryBudget) + ?: defaultCircuitBreaker?.retryBudget + ) +} +private fun Value?.toRetryBudget(defaultRetryBudget: RetryBudget?): RetryBudget { + return RetryBudget( + budgetPercent = this?.field("budgetPercent")?.numberValue ?: defaultRetryBudget?.budgetPercent, + minRetryConcurrency = this?.field("minRetryConcurrency")?.numberValue?.toInt() + ?: defaultRetryBudget?.minRetryConcurrency + ) +} + private fun mapProtoToRetryPolicy(value: Value, defaultRetryPolicy: RetryPolicy): RetryPolicy { return RetryPolicy( retryOn = value.field("retryOn")?.listValue?.valuesList?.map { it.stringValue }, @@ -537,9 +602,42 @@ data class DependencySettings( val handleInternalRedirect: Boolean = false, val timeoutPolicy: Outgoing.TimeoutPolicy = Outgoing.TimeoutPolicy(), val rewriteHostHeader: Boolean = false, - val retryPolicy: RetryPolicy = RetryPolicy() + val retryPolicy: RetryPolicy = RetryPolicy(), + val circuitBreakers: CircuitBreakers = CircuitBreakers() +) + +data class CircuitBreakers( + val defaultThreshold: CircuitBreaker? = null, + val highThreshold: CircuitBreaker? = null +) + +data class CircuitBreaker( + val priority: RoutingPriority? = null, + val maxRequests: Int? = null, + val maxPendingRequests: Int? = null, + val maxConnections: Int? = null, + val maxRetries: Int? = null, + val maxConnectionPools: Int? = null, + val trackRemaining: Boolean? = null, + val retryBudget: RetryBudget? = null ) +data class RetryBudget(val budgetPercent: Double? = null, val minRetryConcurrency: Int? = null) + +enum class RoutingPriority { + DEFAULT, HIGH, UNRECOGNIZED; + + companion object { + fun fromString(value: String): RoutingPriority { + return when (value.toUpperCase()) { + "DEFAULT" -> DEFAULT + "HIGH" -> HIGH + else -> UNRECOGNIZED + } + } + } +} + data class RetryPolicy( val retryOn: List? = null, val hostSelectionRetryMaxAttempts: Long? = null, diff --git a/envoy-control-core/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/snapshot/SnapshotProperties.kt b/envoy-control-core/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/snapshot/SnapshotProperties.kt index 2a55d3717..7338037cf 100644 --- a/envoy-control-core/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/snapshot/SnapshotProperties.kt +++ b/envoy-control-core/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/snapshot/SnapshotProperties.kt @@ -9,6 +9,7 @@ import pl.allegro.tech.servicemesh.envoycontrol.groups.OAuth import pl.allegro.tech.servicemesh.envoycontrol.groups.PathMatchingType import pl.allegro.tech.servicemesh.envoycontrol.groups.RetryBackOff import pl.allegro.tech.servicemesh.envoycontrol.groups.RetryHostPredicate +import pl.allegro.tech.servicemesh.envoycontrol.groups.RoutingPriority import java.net.URI import java.time.Duration @@ -35,7 +36,6 @@ class SnapshotProperties { var jwt = JwtFilterProperties() var requireServiceName = false var rateLimit = RateLimitProperties() - var retryPolicy = RetryPolicyProperties() } class MetricsProperties { @@ -251,6 +251,7 @@ class EgressProperties { var hostHeaderRewriting = HostHeaderRewritingProperties() var headersToRemove = mutableListOf() var domains = mutableListOf() + var retryPolicy = RetryPolicyProperties() } class IngressProperties { @@ -265,19 +266,27 @@ class CommonHttpProperties { var idleTimeout: Duration = Duration.ofSeconds(120) var connectionIdleTimeout: Duration = Duration.ofSeconds(120) var requestTimeout: Duration = Duration.ofSeconds(120) - var circuitBreakers: CircuitBreakers = CircuitBreakers() + var circuitBreakers: CircuitBreakersProperties = CircuitBreakersProperties() +} + +class CircuitBreakersProperties { + var highThreshold = CircuitBreakerProperties(RoutingPriority.HIGH) + var defaultThreshold = CircuitBreakerProperties(RoutingPriority.DEFAULT) } -class CircuitBreakers { - var highThreshold = Threshold("HIGH") - var defaultThreshold = Threshold("DEFAULT") +class CircuitBreakerProperties(var priority: RoutingPriority = RoutingPriority.DEFAULT) { + var maxRequests: Int = 1024 + var maxPendingRequests: Int = 1024 + var maxConnections: Int = 1024 + var maxRetries: Int = 3 + var maxConnectionPools: Int? = null + var trackRemaining: Boolean = false + var retryBudget: RetryBudgetProperties? = RetryBudgetProperties() } -class Threshold(var priority: String) { - var maxConnections = 1024 - var maxPendingRequests = 1024 - var maxRequests = 1024 - var maxRetries = 3 +class RetryBudgetProperties { + var budgetPercent: Double = 20.0 + var minRetryConcurrency: Int = 3 } class Http2Properties { diff --git a/envoy-control-core/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/snapshot/resource/clusters/EnvoyClustersFactory.kt b/envoy-control-core/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/snapshot/resource/clusters/EnvoyClustersFactory.kt index 64bd68b55..b5660fe0e 100644 --- a/envoy-control-core/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/snapshot/resource/clusters/EnvoyClustersFactory.kt +++ b/envoy-control-core/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/snapshot/resource/clusters/EnvoyClustersFactory.kt @@ -17,7 +17,6 @@ import io.envoyproxy.envoy.config.core.v3.DataSource import io.envoyproxy.envoy.config.core.v3.GrpcService import io.envoyproxy.envoy.config.core.v3.Http2ProtocolOptions import io.envoyproxy.envoy.config.core.v3.HttpProtocolOptions -import io.envoyproxy.envoy.config.core.v3.RoutingPriority import io.envoyproxy.envoy.config.core.v3.SocketAddress import io.envoyproxy.envoy.config.core.v3.TransportSocket import io.envoyproxy.envoy.config.core.v3.UpstreamHttpProtocolOptions @@ -32,13 +31,16 @@ import io.envoyproxy.envoy.extensions.transport_sockets.tls.v3.CommonTlsContext import io.envoyproxy.envoy.extensions.transport_sockets.tls.v3.SdsSecretConfig import io.envoyproxy.envoy.extensions.transport_sockets.tls.v3.TlsParameters import io.envoyproxy.envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext +import io.envoyproxy.envoy.type.v3.Percent import pl.allegro.tech.servicemesh.envoycontrol.groups.AllServicesGroup +import pl.allegro.tech.servicemesh.envoycontrol.groups.CircuitBreaker import pl.allegro.tech.servicemesh.envoycontrol.groups.CommunicationMode import pl.allegro.tech.servicemesh.envoycontrol.groups.CommunicationMode.ADS import pl.allegro.tech.servicemesh.envoycontrol.groups.CommunicationMode.XDS import pl.allegro.tech.servicemesh.envoycontrol.groups.DependencySettings import pl.allegro.tech.servicemesh.envoycontrol.groups.DomainDependency import pl.allegro.tech.servicemesh.envoycontrol.groups.Group +import pl.allegro.tech.servicemesh.envoycontrol.groups.RoutingPriority import pl.allegro.tech.servicemesh.envoycontrol.groups.ServicesGroup import pl.allegro.tech.servicemesh.envoycontrol.groups.containsGlobalRateLimits import pl.allegro.tech.servicemesh.envoycontrol.logger @@ -46,7 +48,6 @@ import pl.allegro.tech.servicemesh.envoycontrol.snapshot.ClusterConfiguration import pl.allegro.tech.servicemesh.envoycontrol.snapshot.GlobalSnapshot import pl.allegro.tech.servicemesh.envoycontrol.snapshot.OAuthProvider import pl.allegro.tech.servicemesh.envoycontrol.snapshot.SnapshotProperties -import pl.allegro.tech.servicemesh.envoycontrol.snapshot.Threshold import pl.allegro.tech.servicemesh.envoycontrol.snapshot.resource.listeners.filters.SanUriMatcherFactory class EnvoyClustersFactory( @@ -57,8 +58,6 @@ class EnvoyClustersFactory( ).build() private val dynamicForwardProxyCluster: Cluster = createDynamicForwardProxyCluster() - private val thresholds: List = mapPropertiesToThresholds() - private val allThresholds = CircuitBreakers.newBuilder().addAllThresholds(thresholds).build() private val tlsProperties = properties.incomingPermissions.tlsAuthentication private val sanUriMatcher = SanUriMatcherFactory(tlsProperties) private val matchPlaintextContext = Cluster.TransportSocketMatch.newBuilder() @@ -170,7 +169,8 @@ class EnvoyClustersFactory( return listOf(Cluster.newBuilder(cluster).build()) } - logger.warn("ratelimit service [{}] cluster required for service [{}] has not been found.", + logger.warn( + "ratelimit service [{}] cluster required for service [{}] has not been found.", properties.rateLimit.serviceName, group.serviceName ) @@ -193,12 +193,13 @@ class EnvoyClustersFactory( createClusterForGroup(it.value.settings, clusters[it.key]) } is AllServicesGroup -> { + val defaultServiceSettings = group.proxySettings.outgoing.defaultServiceSettings globalSnapshot.allServicesNames.mapNotNull { val dependency = serviceDependencies[it] - if (dependency != null && dependency.settings.timeoutPolicy.connectionIdleTimeout != null) { - createClusterForGroup(dependency.settings, clusters[it]) + if (dependency != null) { + createClusterForGroup(dependency.settings, clusters[it], defaultServiceSettings) } else { - createClusterForGroup(group.proxySettings.outgoing.defaultServiceSettings, clusters[it]) + createClusterForGroup(defaultServiceSettings, clusters[it]) } } } @@ -210,17 +211,61 @@ class EnvoyClustersFactory( return clustersForGroup } - private fun createClusterForGroup(dependencySettings: DependencySettings, cluster: Cluster?): Cluster? { + private fun createClusterForGroup( + dependencySettings: DependencySettings, + cluster: Cluster?, + defaultDependencySettings: DependencySettings? = null + ): Cluster? { return cluster?.let { val idleTimeoutPolicy = - dependencySettings.timeoutPolicy.connectionIdleTimeout ?: cluster.commonHttpProtocolOptions.idleTimeout + dependencySettings.timeoutPolicy.connectionIdleTimeout + ?: defaultDependencySettings?.timeoutPolicy?.connectionIdleTimeout + ?: cluster.commonHttpProtocolOptions.idleTimeout Cluster.newBuilder(cluster) + .setCircuitBreakers(createCircuitBreakers(dependencySettings, defaultDependencySettings)) .setCommonHttpProtocolOptions( HttpProtocolOptions.newBuilder().setIdleTimeout(idleTimeoutPolicy) ).build() } } + private fun createCircuitBreakers( + dependencySettings: DependencySettings, + defaultDependencySettings: DependencySettings? = null + ): CircuitBreakers { + val defaultThreshold = dependencySettings.circuitBreakers.defaultThreshold + ?: defaultDependencySettings?.circuitBreakers?.defaultThreshold + val highThreshold = dependencySettings.circuitBreakers.highThreshold + ?: defaultDependencySettings?.circuitBreakers?.highThreshold + val thresholds = listOfNotNull( + defaultThreshold?.toThreshold(RoutingPriority.DEFAULT), + highThreshold?.toThreshold(RoutingPriority.HIGH) + ) + return CircuitBreakers.newBuilder() + .addAllThresholds(thresholds) + .build() + } + + private fun CircuitBreaker.toThreshold(priority: RoutingPriority): CircuitBreakers.Thresholds { + val builder = CircuitBreakers.Thresholds.newBuilder() + priority.convertPriority().let(builder::setPriority) + maxRequests?.toValue()?.let(builder::setMaxRequests) + maxConnections?.toValue()?.let(builder::setMaxConnections) + maxRetries?.toValue()?.let(builder::setMaxRetries) + maxConnectionPools?.toValue()?.let(builder::setMaxConnectionPools) + maxPendingRequests?.toValue()?.let(builder::setMaxPendingRequests) + trackRemaining?.let(builder::setTrackRemaining) + retryBudget?.let { + val retryBudgetBuilder = CircuitBreakers.Thresholds.RetryBudget.newBuilder() + it.minRetryConcurrency?.toValue()?.let(retryBudgetBuilder::setMinRetryConcurrency) + it.budgetPercent?.let { Percent.newBuilder().setValue(it) }?.let(retryBudgetBuilder::setBudgetPercent) + builder.setRetryBudget(retryBudgetBuilder) + } + return builder.build() + } + + private fun Int.toValue() = this.let { UInt32Value.of(this) } + private fun shouldAddDynamicForwardProxyCluster(group: Group) = group.proxySettings.outgoing.getDomainPatternDependencies().isNotEmpty() @@ -341,6 +386,8 @@ class EnvoyClustersFactory( clusterBuilder.setCommonHttpProtocolOptions(HttpProtocolOptions.newBuilder().setIdleTimeout(it)) } + clusterBuilder.circuitBreakers = createCircuitBreakers(domainDependency.settings) + return clusterBuilder.build() } @@ -395,7 +442,6 @@ class EnvoyClustersFactory( .configureLbSubsets() cluster.setCommonHttpProtocolOptions(httpProtocolOptions) - cluster.setCircuitBreakers(allThresholds) if (clusterConfiguration.http2Enabled) { cluster.setHttp2ProtocolOptions(Http2ProtocolOptions.getDefaultInstance()) @@ -469,25 +515,10 @@ class EnvoyClustersFactory( } ) - private fun mapPropertiesToThresholds(): List { - return listOf( - convertThreshold(properties.egress.commonHttp.circuitBreakers.defaultThreshold), - convertThreshold(properties.egress.commonHttp.circuitBreakers.highThreshold) - ) - } - - private fun convertThreshold(threshold: Threshold): CircuitBreakers.Thresholds { - val thresholdsBuilder = CircuitBreakers.Thresholds.newBuilder() - thresholdsBuilder.maxConnections = UInt32Value.of(threshold.maxConnections) - thresholdsBuilder.maxPendingRequests = UInt32Value.of(threshold.maxPendingRequests) - thresholdsBuilder.maxRequests = UInt32Value.of(threshold.maxRequests) - thresholdsBuilder.maxRetries = UInt32Value.of(threshold.maxRetries) - when (threshold.priority.toUpperCase()) { - "DEFAULT" -> thresholdsBuilder.priority = RoutingPriority.DEFAULT - "HIGH" -> thresholdsBuilder.priority = RoutingPriority.HIGH - else -> thresholdsBuilder.priority = RoutingPriority.UNRECOGNIZED - } - return thresholdsBuilder.build() + private fun RoutingPriority.convertPriority() = when (this) { + RoutingPriority.DEFAULT -> io.envoyproxy.envoy.config.core.v3.RoutingPriority.DEFAULT + RoutingPriority.HIGH -> io.envoyproxy.envoy.config.core.v3.RoutingPriority.HIGH + else -> io.envoyproxy.envoy.config.core.v3.RoutingPriority.UNRECOGNIZED } private fun configureOutlierDetection(clusterBuilder: Cluster.Builder) { diff --git a/envoy-control-core/src/test/kotlin/pl/allegro/tech/servicemesh/envoycontrol/groups/NodeMetadataTest.kt b/envoy-control-core/src/test/kotlin/pl/allegro/tech/servicemesh/envoycontrol/groups/NodeMetadataTest.kt index 297d99cb7..4712ca2a9 100644 --- a/envoy-control-core/src/test/kotlin/pl/allegro/tech/servicemesh/envoycontrol/groups/NodeMetadataTest.kt +++ b/envoy-control-core/src/test/kotlin/pl/allegro/tech/servicemesh/envoycontrol/groups/NodeMetadataTest.kt @@ -950,6 +950,56 @@ class NodeMetadataTest { assertThat(duration!!.seconds).isEqualTo(20L) } + @Test + fun `should parse dependencies with circuit breakers`() { + // given + val proto = outgoingDependenciesProto { + withService( + "service-1", + circuitBreakers = OutgoingDependenciesProtoScope.CircuitBreakers( + defaultThreshold = OutgoingDependenciesProtoScope.CircuitBreaker( + maxRetries = 1, + maxPendingRequests = 2, + maxRequests = 3, + maxConnections = 4, + maxConnectionPools = 6, + trackRemaining = false, + budgetPercent = 10.0, + minRetryConcurrency = 7 + ) + ) + ) + withService("service-2") + } + + // when + val outgoing = proto.toOutgoing(snapshotProperties()) + + // then + val expectedCircuitBreaker1 = CircuitBreaker( + priority = RoutingPriority.DEFAULT, + maxRetries = 1, + maxPendingRequests = 2, + maxRequests = 3, + maxConnections = 4, + maxConnectionPools = 6, + trackRemaining = false, + retryBudget = RetryBudget( + budgetPercent = 10.0, + minRetryConcurrency = 7 + ) + ) + val defaultCircuitBreaker = snapshotProperties().egress.commonHttp.circuitBreakers.defaultThreshold.toCircuitBreaker() + val highCircuitBreaker = snapshotProperties().egress.commonHttp.circuitBreakers.highThreshold.toCircuitBreaker() + outgoing.getServiceDependencies().assertServiceDependency("service-1") + .hasDefaultThresholdCircuitBreaker(expectedCircuitBreaker1) + .hasHighThresholdCircuitBreaker(highCircuitBreaker) + + outgoing.getServiceDependencies().assertServiceDependency("service-2") + .hasDefaultThresholdCircuitBreaker(defaultCircuitBreaker) + .hasHighThresholdCircuitBreaker(highCircuitBreaker) + } + @Test fun `should return null when empty value provided`() { // given @@ -1020,6 +1070,20 @@ class NodeMetadataTest { return this } + fun ObjectAssert.hasDefaultThresholdCircuitBreaker( + circuitBreaker: CircuitBreaker + ): ObjectAssert { + this.extracting { it.circuitBreakers.defaultThreshold }.isEqualTo(circuitBreaker) + return this + } + + fun ObjectAssert.hasHighThresholdCircuitBreaker( + circuitBreaker: CircuitBreaker + ): ObjectAssert { + this.extracting { it.circuitBreakers.highThreshold }.isEqualTo(circuitBreaker) + return this + } + fun List.assertServiceDependency(name: String): ObjectAssert { val list = this.filter { it.service == name } assertThat(list).hasSize(1) diff --git a/envoy-control-core/src/test/kotlin/pl/allegro/tech/servicemesh/envoycontrol/groups/TestNodeFactory.kt b/envoy-control-core/src/test/kotlin/pl/allegro/tech/servicemesh/envoycontrol/groups/TestNodeFactory.kt index 9a44e551f..60b256c5a 100644 --- a/envoy-control-core/src/test/kotlin/pl/allegro/tech/servicemesh/envoycontrol/groups/TestNodeFactory.kt +++ b/envoy-control-core/src/test/kotlin/pl/allegro/tech/servicemesh/envoycontrol/groups/TestNodeFactory.kt @@ -77,6 +77,28 @@ fun ProxySettings.with( domainDependencies: Set = emptySet(), allServicesDependencies: Boolean = false, defaultServiceSettings: DependencySettings = DependencySettings( + circuitBreakers = CircuitBreakers( + defaultThreshold = CircuitBreaker( + RoutingPriority.DEFAULT, + maxRequests = 1024, + maxPendingRequests = 1024, + maxConnections = 1024, + maxRetries = 3, + maxConnectionPools = null, + trackRemaining = false, + retryBudget = RetryBudget(20.0, 3) + ), + highThreshold = CircuitBreaker( + RoutingPriority.HIGH, + maxRequests = 1024, + maxPendingRequests = 1024, + maxConnections = 1024, + maxRetries = 3, + maxConnectionPools = null, + trackRemaining = false, + retryBudget = RetryBudget(20.0, 3) + ) + ), timeoutPolicy = Outgoing.TimeoutPolicy( Durations.fromSeconds(120), Durations.fromSeconds(120), @@ -191,7 +213,23 @@ class OutgoingDependenciesProtoScope { val connectionIdleTimeout: String? = null, val requestTimeout: String? = null, val handleInternalRedirect: Boolean? = null, - val retryPolicy: RetryPolicyInput? = null + val retryPolicy: RetryPolicyInput? = null, + val circuitBreakers: CircuitBreakers? = null + ) + + data class CircuitBreakers( + val defaultThreshold: CircuitBreaker? = null, + val highThreshold: CircuitBreaker? = null + ) + data class CircuitBreaker( + val maxRequests: Int? = null, + val maxPendingRequests: Int? = null, + val maxConnections: Int? = null, + val maxRetries: Int? = null, + val maxConnectionPools: Int? = null, + val trackRemaining: Boolean? = null, + val budgetPercent: Double? = null, + val minRetryConcurrency: Int? = null ) val dependencies = mutableListOf() @@ -199,8 +237,9 @@ class OutgoingDependenciesProtoScope { fun withServices( serviceDependencies: List = emptyList(), idleTimeout: String? = null, - responseTimeout: String? = null - ) = serviceDependencies.forEach { withService(it, idleTimeout, responseTimeout) } + responseTimeout: String? = null, + circuitBreakers: CircuitBreakers? = null + ) = serviceDependencies.forEach { withService(it, idleTimeout, responseTimeout, circuitBreakers = circuitBreakers) } fun withService( serviceName: String, @@ -208,7 +247,8 @@ class OutgoingDependenciesProtoScope { connectionIdleTimeout: String? = null, requestTimeout: String? = null, handleInternalRedirect: Boolean? = null, - retryPolicy: RetryPolicyInput? = null + retryPolicy: RetryPolicyInput? = null, + circuitBreakers: CircuitBreakers? = null ) = dependencies.add( Dependency( service = serviceName, @@ -216,7 +256,8 @@ class OutgoingDependenciesProtoScope { connectionIdleTimeout = connectionIdleTimeout, requestTimeout = requestTimeout, handleInternalRedirect = handleInternalRedirect, - retryPolicy = retryPolicy + retryPolicy = retryPolicy, + circuitBreakers = circuitBreakers ) ) @@ -224,13 +265,15 @@ class OutgoingDependenciesProtoScope { url: String, idleTimeout: String? = null, connectionIdleTimeout: String? = null, - requestTimeout: String? = null + requestTimeout: String? = null, + circuitBreakers: CircuitBreakers? = null ) = dependencies.add( Dependency( domain = url, idleTimeout = idleTimeout, connectionIdleTimeout = connectionIdleTimeout, - requestTimeout = requestTimeout + requestTimeout = requestTimeout, + circuitBreakers = circuitBreakers ) ) @@ -238,13 +281,15 @@ class OutgoingDependenciesProtoScope { pattern: String, idleTimeout: String? = null, connectionIdleTimeout: String? = null, - requestTimeout: String? = null + requestTimeout: String? = null, + circuitBreakers: CircuitBreakers? = null ) = dependencies.add( Dependency( domainPattern = pattern, idleTimeout = idleTimeout, connectionIdleTimeout = connectionIdleTimeout, - requestTimeout = requestTimeout + requestTimeout = requestTimeout, + circuitBreakers = circuitBreakers ) ) @@ -272,7 +317,8 @@ fun outgoingDependenciesProto( connectionIdleTimeout = it.connectionIdleTimeout, requestTimeout = it.requestTimeout, handleInternalRedirect = it.handleInternalRedirect, - retryPolicy = it.retryPolicy + retryPolicy = it.retryPolicy, + circuitBreakers = it.circuitBreakers ) ) } @@ -288,18 +334,63 @@ fun outgoingDependencyProto( idleTimeout: String? = null, connectionIdleTimeout: String? = null, requestTimeout: String? = null, - retryPolicy: RetryPolicyInput? = null + retryPolicy: RetryPolicyInput? = null, + circuitBreakers: OutgoingDependenciesProtoScope.CircuitBreakers? = null ) = struct { service?.also { putFields("service", string(service)) } domain?.also { putFields("domain", string(domain)) } retryPolicy?.also { putFields("retryPolicy", retryPolicyProto(retryPolicy)) } domainPattern?.also { putFields("domainPattern", string(domainPattern)) } handleInternalRedirect?.also { putFields("handleInternalRedirect", boolean(handleInternalRedirect)) } + circuitBreakers?.also { putFields("circuitBreakers", circuitBreakersProto(it)) } + if (idleTimeout != null || requestTimeout != null || connectionIdleTimeout != null) { putFields("timeoutPolicy", outgoingTimeoutPolicy(idleTimeout, connectionIdleTimeout, requestTimeout)) } } +fun circuitBreakersProto(circuitBreakers: OutgoingDependenciesProtoScope.CircuitBreakers) = struct { + circuitBreakers.defaultThreshold?.let { + putFields("defaultThreshold", circuitBreakerProto(it, RoutingPriority.DEFAULT)) + } + + circuitBreakers.highThreshold?.let { + putFields("highThreshold", circuitBreakerProto(it, RoutingPriority.HIGH)) + } +} + +private fun circuitBreakerProto(circuitBreaker: OutgoingDependenciesProtoScope.CircuitBreaker, priority: RoutingPriority) = struct { + putFields("priority", string(priority.name)) + + circuitBreaker.maxConnectionPools?.also { + putFields("maxConnectionPools", integer(it)) + } + + circuitBreaker.maxPendingRequests?.also { + putFields("maxPendingRequests", integer(it)) + } + circuitBreaker.maxRetries?.also { + putFields("maxRetries", integer(it)) + } + circuitBreaker.maxConnections?.also { + putFields("maxConnections", integer(it)) + } + circuitBreaker.maxRequests?.also { + putFields("maxRequests", integer(it)) + } + + circuitBreaker.trackRemaining?.also { + putFields("trackRemaining", boolean(it)) + } + putFields("retryBudget", struct { + circuitBreaker.budgetPercent?.also { + putFields("budgetPercent", integer(it)) + } + circuitBreaker.minRetryConcurrency?.also { + putFields("minRetryConcurrency", integer(it)) + } + }) +} private fun retryPolicyProto(retryPolicy: RetryPolicyInput) = struct { retryPolicy.retryOn?.also { putFields("retryOn", retryOnProto(it)) } retryPolicy.hostSelectionRetryMaxAttempts?.also { putFields("hostSelectionRetryMaxAttempts", integer(it)) } @@ -431,7 +522,7 @@ private fun boolean(value: Boolean): Value { return Value.newBuilder().setBoolValue(value).build() } -private fun integer(value: Int): Value { +private fun integer(value: Number): Value { return Value.newBuilder().setNumberValue(value.toDouble()).build() } diff --git a/envoy-control-core/src/test/kotlin/pl/allegro/tech/servicemesh/envoycontrol/snapshot/SnapshotUpdaterTest.kt b/envoy-control-core/src/test/kotlin/pl/allegro/tech/servicemesh/envoycontrol/snapshot/SnapshotUpdaterTest.kt index 36f9b70a9..70a643458 100644 --- a/envoy-control-core/src/test/kotlin/pl/allegro/tech/servicemesh/envoycontrol/snapshot/SnapshotUpdaterTest.kt +++ b/envoy-control-core/src/test/kotlin/pl/allegro/tech/servicemesh/envoycontrol/snapshot/SnapshotUpdaterTest.kt @@ -20,6 +20,8 @@ import org.junit.jupiter.params.provider.Arguments import org.junit.jupiter.params.provider.MethodSource import pl.allegro.tech.servicemesh.envoycontrol.groups.AccessLogFilterSettings import pl.allegro.tech.servicemesh.envoycontrol.groups.AllServicesGroup +import pl.allegro.tech.servicemesh.envoycontrol.groups.CircuitBreaker +import pl.allegro.tech.servicemesh.envoycontrol.groups.CircuitBreakers import pl.allegro.tech.servicemesh.envoycontrol.groups.CommunicationMode import pl.allegro.tech.servicemesh.envoycontrol.groups.CommunicationMode.ADS import pl.allegro.tech.servicemesh.envoycontrol.groups.CommunicationMode.XDS @@ -30,8 +32,9 @@ import pl.allegro.tech.servicemesh.envoycontrol.groups.ListenersConfig import pl.allegro.tech.servicemesh.envoycontrol.groups.Outgoing import pl.allegro.tech.servicemesh.envoycontrol.groups.ProxySettings import pl.allegro.tech.servicemesh.envoycontrol.groups.RetryBackOff +import pl.allegro.tech.servicemesh.envoycontrol.groups.RetryBudget import pl.allegro.tech.servicemesh.envoycontrol.groups.RetryHostPredicate -import pl.allegro.tech.servicemesh.envoycontrol.groups.RetryPolicy as EnvoyControlRetryPolicy +import pl.allegro.tech.servicemesh.envoycontrol.groups.RoutingPriority import pl.allegro.tech.servicemesh.envoycontrol.groups.ServiceDependency import pl.allegro.tech.servicemesh.envoycontrol.groups.ServicesGroup import pl.allegro.tech.servicemesh.envoycontrol.groups.with @@ -60,6 +63,7 @@ import java.util.concurrent.CountDownLatch import java.util.concurrent.Executors import java.util.concurrent.TimeUnit import java.util.function.Consumer +import pl.allegro.tech.servicemesh.envoycontrol.groups.RetryPolicy as EnvoyControlRetryPolicy @Suppress("LargeClass") class SnapshotUpdaterTest { @@ -1224,6 +1228,26 @@ fun serviceDependencies(vararg serviceNames: String): Set = service = it, settings = DependencySettings( timeoutPolicy = outgoingTimeoutPolicy(), + circuitBreakers = CircuitBreakers( + defaultThreshold = CircuitBreaker( + RoutingPriority.DEFAULT, maxRequests = 1024, + maxPendingRequests = 1024, + maxConnections = 1024, + maxRetries = 3, + maxConnectionPools = null, + trackRemaining = false, + retryBudget = RetryBudget(20.0, 3) + ), + highThreshold = CircuitBreaker( + RoutingPriority.HIGH, maxRequests = 1024, + maxPendingRequests = 1024, + maxConnections = 1024, + maxRetries = 3, + maxConnectionPools = null, + trackRemaining = false, + retryBudget = RetryBudget(20.0, 3) + ) + ), retryPolicy = pl.allegro.tech.servicemesh.envoycontrol.groups.RetryPolicy( hostSelectionRetryMaxAttempts = 3, retryHostPredicate = listOf(RetryHostPredicate("envoy.retry_host_predicates.previous_hosts")), diff --git a/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/ClusterCircuitBreakerDefaultSettingsTest.kt b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/ClusterCircuitBreakerDefaultSettingsTest.kt index 35e76a3ce..a96968d7b 100644 --- a/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/ClusterCircuitBreakerDefaultSettingsTest.kt +++ b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/ClusterCircuitBreakerDefaultSettingsTest.kt @@ -6,30 +6,60 @@ import org.junit.jupiter.api.extension.RegisterExtension import pl.allegro.tech.servicemesh.envoycontrol.assertions.isFrom import pl.allegro.tech.servicemesh.envoycontrol.assertions.isOk import pl.allegro.tech.servicemesh.envoycontrol.assertions.untilAsserted +import pl.allegro.tech.servicemesh.envoycontrol.config.Xds import pl.allegro.tech.servicemesh.envoycontrol.config.consul.ConsulExtension import pl.allegro.tech.servicemesh.envoycontrol.config.envoy.EnvoyExtension import pl.allegro.tech.servicemesh.envoycontrol.config.envoycontrol.EnvoyControlExtension import pl.allegro.tech.servicemesh.envoycontrol.config.service.EchoServiceExtension -import pl.allegro.tech.servicemesh.envoycontrol.snapshot.Threshold +import pl.allegro.tech.servicemesh.envoycontrol.groups.RoutingPriority +import pl.allegro.tech.servicemesh.envoycontrol.snapshot.CircuitBreakerProperties internal class ClusterCircuitBreakerDefaultSettingsTest { companion object { private val properties = mapOf( - "envoy-control.envoy.snapshot.egress.commonHttp.circuitBreakers.defaultThreshold" to Threshold("DEFAULT").also { + "envoy-control.envoy.snapshot.egress.commonHttp.circuitBreakers.defaultThreshold" to CircuitBreakerProperties(RoutingPriority.DEFAULT).also { it.maxConnections = 1 it.maxPendingRequests = 2 it.maxRequests = 3 it.maxRetries = 4 }, - "envoy-control.envoy.snapshot.egress.commonHttp.circuitBreakers.highThreshold" to Threshold("HIGH").also { + "envoy-control.envoy.snapshot.egress.commonHttp.circuitBreakers.highThreshold" to CircuitBreakerProperties(RoutingPriority.HIGH).also { it.maxConnections = 5 it.maxPendingRequests = 6 it.maxRequests = 7 it.maxRetries = 8 + it.retryBudget = null } ) + private val CIRCUIT_BREAKERS_SETTINGS_CONFIG = """ +node: + metadata: + proxy_settings: + outgoing: + dependencies: + - service: "echo" + circuitBreakers: + defaultThreshold: + maxRequests: 22 + maxRetries: 10 + retryBudget: + minRetryConcurrency: 6 + - service: "echo2" + circuitBreakers: + defaultThreshold: + maxRequests: 22 + maxRetries: 10 + retryBudget: + minRetryConcurrency: 6 + highThreshold: + maxConnections: 11 + maxPendingRequests: 12 + maxRequests: 22 + maxRetries: 10 + """.trimIndent() + @JvmField @RegisterExtension val consul = ConsulExtension() @@ -45,10 +75,18 @@ internal class ClusterCircuitBreakerDefaultSettingsTest { @JvmField @RegisterExtension val envoy = EnvoyExtension(envoyControl, service) + + @JvmField + @RegisterExtension + val service2 = EchoServiceExtension() + + @JvmField + @RegisterExtension + val envoy2 = EnvoyExtension(envoyControl, service2, Xds.copy(serviceName = "echo2", configOverride = CIRCUIT_BREAKERS_SETTINGS_CONFIG)) } @Test - fun `should enable setting circuit breaker threstholds setting`() { + fun `should set default circuit breaker thresholds setting`() { // given consul.server.operations.registerService(name = "echo", extension = service) untilAsserted { @@ -57,11 +95,38 @@ internal class ClusterCircuitBreakerDefaultSettingsTest { } // when - val maxRequestsSetting = envoy.container.admin().circuitBreakerSetting("echo", "max_requests", "default_priority") - val maxRetriesSetting = envoy.container.admin().circuitBreakerSetting("echo", "max_retries", "high_priority") + val admin = envoy.container.admin() + val maxRequestsSetting = admin.circuitBreakerSetting("echo", "max_requests", "default_priority") + val maxRetriesSetting = admin.circuitBreakerSetting("echo", "max_retries", "high_priority") // then assertThat(maxRequestsSetting).isEqualTo(3) assertThat(maxRetriesSetting).isEqualTo(8) } + + @Test + fun `should set circuit breaker thresholds settings from metadata`() { + // given + consul.server.operations.registerService(name = "echo2", extension = service2) + + untilAsserted { + val response = envoy2.egressOperations.callService("echo2") + assertThat(response).isOk().isFrom(service2) + } + + // when + val admin = envoy2.container.admin() + val maxRequestSetting = admin.circuitBreakerSetting("echo2", "max_requests", "high_priority") + val maxConnectionsSetting = admin.circuitBreakerSetting("echo2", "max_connections", "high_priority") + val maxPendingSetting = admin.circuitBreakerSetting("echo2", "max_pending_requests", "high_priority") + val maxRetriesSetting = admin.circuitBreakerSetting("echo2", "max_retries", "high_priority") + val maxRetriesDefaultSetting = admin.circuitBreakerSetting("echo2", "max_retries", "default_priority") + + // then + assertThat(maxRetriesSetting).isEqualTo(10) + assertThat(maxRetriesDefaultSetting).isEqualTo(6) + assertThat(maxConnectionsSetting).isEqualTo(11) + assertThat(maxPendingSetting).isEqualTo(12) + assertThat(maxRequestSetting).isEqualTo(22) + } }