Making the readiness probe non-blocking, and backport non-blocking liveness and readiness probes

Closes #22109

Co-authored-by: Martin Bartoš <mabartos@redhat.com>
(cherry picked from commit 6a78e36d25)
This commit is contained in:
Alexander Schwartz 2023-07-29 13:17:01 +02:00 committed by Alexander Schwartz
parent 1afcccfbc7
commit bc5fdfbd29
6 changed files with 137 additions and 8 deletions

View file

@ -2,10 +2,19 @@ package org.keycloak.config;
public class HealthOptions {
public static final Option HEALTH_ENABLED = new OptionBuilder<>("health-enabled", Boolean.class)
public static final Option<Boolean> HEALTH_ENABLED = new OptionBuilder<>("health-enabled", Boolean.class)
.category(OptionCategory.HEALTH)
.description("If the server should expose health check endpoints. If enabled, health checks are available at the '/health', '/health/ready' and '/health/live' endpoints.")
.defaultValue(Boolean.FALSE)
.buildTime(true)
.build();
public static final Option<Boolean> HEALTH_CLASSIC_PROBES_ENABLED = new OptionBuilder<>("health-classic-probes-enabled", Boolean.class)
.category(OptionCategory.HEALTH)
.description("If enabled, use the original Quarkus blocking handler for '/health/ready'")
.defaultValue(Boolean.FALSE)
.buildTime(true)
.hidden()
.build();
}

View file

@ -75,7 +75,6 @@ import org.keycloak.connections.jpa.JpaConnectionProvider;
import org.keycloak.connections.jpa.JpaConnectionSpi;
import org.keycloak.connections.jpa.updater.liquibase.LiquibaseJpaUpdaterProviderFactory;
import org.keycloak.connections.jpa.updater.liquibase.conn.DefaultLiquibaseConnectionProvider;
import org.keycloak.models.map.storage.jpa.EventListenerIntegrator;
import org.keycloak.models.map.storage.jpa.JpaMapStorageProviderFactory;
import org.keycloak.policy.BlacklistPasswordPolicyProviderFactory;
import org.keycloak.protocol.ProtocolMapperSpi;
@ -86,6 +85,7 @@ import org.keycloak.provider.Provider;
import org.keycloak.provider.ProviderFactory;
import org.keycloak.provider.ProviderManager;
import org.keycloak.provider.Spi;
import org.keycloak.quarkus.runtime.integration.health.ReactiveLivenessHandler;
import org.keycloak.quarkus.runtime.Environment;
import org.keycloak.quarkus.runtime.KeycloakRecorder;
import org.keycloak.quarkus.runtime.configuration.Configuration;
@ -147,7 +147,6 @@ import static org.keycloak.quarkus.runtime.Environment.getProviderFiles;
import static org.keycloak.quarkus.runtime.KeycloakRecorder.DEFAULT_HEALTH_ENDPOINT;
import static org.keycloak.quarkus.runtime.KeycloakRecorder.DEFAULT_METRICS_ENDPOINT;
import static org.keycloak.quarkus.runtime.Providers.getProviderManager;
import static org.keycloak.quarkus.runtime.configuration.Configuration.getKcConfigValue;
import static org.keycloak.quarkus.runtime.configuration.Configuration.getOptionalKcValue;
import static org.keycloak.quarkus.runtime.configuration.Configuration.getOptionalValue;
import static org.keycloak.quarkus.runtime.configuration.Configuration.getPropertyNames;
@ -633,6 +632,11 @@ class KeycloakProcessor {
if (healthDisabled) {
routes.produce(RouteBuildItem.builder().route(DEFAULT_HEALTH_ENDPOINT.concat("/*")).handler(new NotFoundHandler()).build());
} else {
// local solution until https://github.com/quarkusio/quarkus/issues/35099 is available in Quarkus
if (!isHealthClassicProbesEnabled()) {
routes.produce(RouteBuildItem.builder().route(DEFAULT_HEALTH_ENDPOINT.concat("/live")).handler(new ReactiveLivenessHandler()).build());
}
}
boolean metricsDisabled = !isMetricsEnabled();
@ -876,6 +880,10 @@ class KeycloakProcessor {
return Configuration.getOptionalBooleanValue(NS_KEYCLOAK_PREFIX.concat("health-enabled")).orElse(false);
}
private boolean isHealthClassicProbesEnabled() {
return Configuration.getOptionalBooleanValue(NS_KEYCLOAK_PREFIX.concat("health-classic-probes-enabled")).orElse(false);
}
static JdbcDataSourceBuildItem getDefaultDataSource(List<JdbcDataSourceBuildItem> jdbcDataSources) {
for (JdbcDataSourceBuildItem jdbcDataSource : jdbcDataSources) {
if (jdbcDataSource.isDefault()) {

View file

@ -14,6 +14,8 @@ final class HealthPropertyMappers {
fromOption(HealthOptions.HEALTH_ENABLED)
.to("quarkus.health.extensions.enabled")
.paramLabel(Boolean.TRUE + "|" + Boolean.FALSE)
.build(),
fromOption(HealthOptions.HEALTH_CLASSIC_PROBES_ENABLED)
.build()
};
}

View file

@ -0,0 +1,57 @@
/*
* Copyright 2023 Red Hat, Inc. and/or its affiliates
* and other contributors as indicated by the @author tags.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.keycloak.quarkus.runtime.integration.health;
import io.quarkus.smallrye.health.runtime.SmallRyeLivenessHandler;
import io.smallrye.health.SmallRyeHealth;
import io.smallrye.health.SmallRyeHealthReporter;
import io.smallrye.mutiny.Uni;
import io.vertx.core.Handler;
import io.vertx.ext.web.RoutingContext;
/**
* This adds the possibility to have a non-blocking health handler in Quarkus.
* <p>
* Without a non-blocking health check, all liveness and readiness probes will enqueue in the worker thread pool. Under high load
* of if there is a lot of blocking IO happening (for example, during Keycloak cluster rebalancing), this leads to probes being queued.
* Queued probes would lead to timeouts unless the timeouts are configured to 10-20 seconds. Reactive probes avoid the enqueueing
* in the worker thread pool for all non-blocking probes, which will be the default for the (otherwise empty) liveness probe.
* For the readiness probe, this depends on the implementation of the specific readiness probes.
* <p>
* This is a workaround until <a href="https://github.com/quarkusio/quarkus/pull/35100">quarkusio/quarkus#35100</a> is available
* in a regular Quarkus version. Then these classes can be removed.
*
* @author Alexander Schwartz
*/
public abstract class ReactiveHealthHandler implements Handler<RoutingContext> {
@Override
public void handle(RoutingContext context) {
Uni<SmallRyeHealth> health = getHealth();
health.subscribe().with(smallRyeHealth -> {
new SmallRyeLivenessHandler() {
@Override
protected SmallRyeHealth getHealth(SmallRyeHealthReporter reporter, RoutingContext ctx) {
return smallRyeHealth;
}
}.handle(context);
});
}
protected abstract Uni<SmallRyeHealth> getHealth();
}

View file

@ -0,0 +1,35 @@
/*
* Copyright 2023 Red Hat, Inc. and/or its affiliates
* and other contributors as indicated by the @author tags.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.keycloak.quarkus.runtime.integration.health;
import io.quarkus.arc.Arc;
import io.smallrye.health.SmallRyeHealth;
import io.smallrye.health.SmallRyeHealthReporter;
import io.smallrye.mutiny.Uni;
/**
* @author Alexander Schwartz
*/
public class ReactiveLivenessHandler extends ReactiveHealthHandler {
@Override
protected Uni<SmallRyeHealth> getHealth() {
SmallRyeHealthReporter healthReporter = Arc.container().instance(SmallRyeHealthReporter.class).get();
return healthReporter.getLivenessAsync();
}
}

View file

@ -23,15 +23,11 @@ import org.keycloak.it.junit5.extension.DistributionTest;
import org.keycloak.it.utils.KeycloakDistribution;
import static io.restassured.RestAssured.when;
import static org.hamcrest.CoreMatchers.equalTo;
import java.util.List;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.function.Supplier;
@DistributionTest(keepAlive =true)
public class HealthDistTest {
@ -67,6 +63,28 @@ public class HealthDistTest {
.statusCode(404);
}
@Test
@Launch({ "start-dev", "--health-enabled=true", "--metrics-enabled=true" })
void testNonBlockingProbes() {
when().get("/health/live").then()
.statusCode(200);
when().get("/health/ready").then()
.statusCode(200)
.body("checks[0].name", equalTo("Keycloak database connections health check"))
.body("checks.size()", equalTo(1));
}
@Test
@Launch({ "start-dev", "--health-enabled=true", "--metrics-enabled=true", "--health-classic-probes-enabled=true" })
void testBlockingProbes() {
when().get("/health/live").then()
.statusCode(200);
when().get("/health/ready").then()
.statusCode(200)
.body("checks[0].name", equalTo("Keycloak database connections health check"))
.body("checks.size()", equalTo(1));
}
@Test
void testUsingRelativePath(KeycloakDistribution distribution) {
for (String relativePath : List.of("/auth", "/auth/", "auth")) {