diff --git a/configure.ac b/configure.ac index a7ed87a9de..090fcb1ba4 100644 --- a/configure.ac +++ b/configure.ac @@ -2226,6 +2226,9 @@ if test $ol_enable_balancer != no ; then else AC_MSG_ERROR([You need libevent 2.1 or later with DNS support to build the load balancer]) fi + + AC_CHECK_LIB(m, pow, [BALANCER_LIBS="$BALANCER_LIBS -lm"], + [AC_MSG_ERROR([could not locate pow -lm])]) fi dnl ---------------------------------------------------------------- diff --git a/doc/man/man5/lloadd.conf.5 b/doc/man/man5/lloadd.conf.5 index a25b9044d9..c7295b4c06 100644 --- a/doc/man/man5/lloadd.conf.5 +++ b/doc/man/man5/lloadd.conf.5 @@ -762,6 +762,23 @@ defaults to 0 and such backends have a slight chance of being selected even when a non-zero weight backend is configured in the tier. The selection process is along the lines of .BR RFC2782 . +.TP +.B bestof +Like with +.BI weighted , +backends accept the +.B weight= +option. Average latency multiplied by +.B weight +is measured over time. The selection process chooses 2 backends at random, +compares their weighted latencies and the backend with a better (lower) score +is tried. If the backend is not available (or is busy), the other backend is +tried, then backends are chosen in a round-robin order. + +Note that unlike +.BI weighted , +the higher the weight, the higher the "effective" latency and lower the chance +a backend is selected. .SH BACKEND OPTIONS diff --git a/servers/lloadd/Makefile.in b/servers/lloadd/Makefile.in index 2de57f885e..43cc947a73 100644 --- a/servers/lloadd/Makefile.in +++ b/servers/lloadd/Makefile.in @@ -21,7 +21,7 @@ NT_OBJS = nt_svc.o ../../libraries/liblutil/slapdmsg.res SRCS = backend.c bind.c config.c connection.c client.c \ daemon.c epoch.c extended.c init.c operation.c \ - tier.c tier_roundrobin.c tier_weighted.c \ + tier.c tier_roundrobin.c tier_weighted.c tier_bestof.c \ upstream.c libevent_support.c \ $(@PLAT@_SRCS) diff --git a/servers/lloadd/daemon.c b/servers/lloadd/daemon.c index fe84454ae6..5120c1d65d 100644 --- a/servers/lloadd/daemon.c +++ b/servers/lloadd/daemon.c @@ -96,6 +96,7 @@ struct event_base *daemon_base = NULL; struct evdns_base *dnsbase; struct event *lload_timeout_event; +struct event *lload_stats_event; /* * global lload statistics. Not mutex protected to preserve performance - @@ -1234,6 +1235,7 @@ lloadd_daemon( struct event_base *daemon_base ) LloadTier *tier; struct event_base *base; struct event *event; + struct timeval second = { 1, 0 }; assert( daemon_base != NULL ); @@ -1282,6 +1284,16 @@ lloadd_daemon( struct event_base *daemon_base ) } } + event = event_new( daemon_base, -1, EV_TIMEOUT|EV_PERSIST, + lload_tiers_update, NULL ); + if ( !event ) { + Debug( LDAP_DEBUG_ANY, "lloadd: " + "failed to allocate stats update event\n" ); + return -1; + } + lload_stats_event = event; + event_add( event, &second ); + event = evtimer_new( daemon_base, operations_timeout, event_self_cbarg() ); if ( !event ) { Debug( LDAP_DEBUG_ANY, "lloadd: " diff --git a/servers/lloadd/lload.h b/servers/lloadd/lload.h index 0bfdcb7d08..ec6c9d0c0f 100644 --- a/servers/lloadd/lload.h +++ b/servers/lloadd/lload.h @@ -252,6 +252,7 @@ struct lload_tier_type { LloadTierConfigCb *tier_config; LloadTierBackendConfigCb *tier_backend_config; LloadTierCb *tier_startup; + LloadTierCb *tier_update; LloadTierResetCb *tier_reset; LloadTierCb *tier_destroy; @@ -308,6 +309,7 @@ struct LloadBackend { LloadTier *b_tier; + time_t b_last_update; uintptr_t b_fitness; int b_weight; diff --git a/servers/lloadd/proto-lload.h b/servers/lloadd/proto-lload.h index e8903ba383..8cccacb20c 100644 --- a/servers/lloadd/proto-lload.h +++ b/servers/lloadd/proto-lload.h @@ -203,6 +203,7 @@ LDAP_SLAPD_F (int) tier_reset( LloadTier *tier, int shutdown ); LDAP_SLAPD_F (int) tier_destroy( LloadTier *tier ); LDAP_SLAPD_F (void) lload_tiers_shutdown( void ); LDAP_SLAPD_F (void) lload_tiers_reset( int shutdown ); +LDAP_SLAPD_F (void) lload_tiers_update( evutil_socket_t s, short what, void *arg ); LDAP_SLAPD_F (void) lload_tiers_destroy( void ); LDAP_SLAPD_F (struct lload_tier_type *) lload_tier_find( char *type ); diff --git a/servers/lloadd/tier.c b/servers/lloadd/tier.c index e4970add8c..da08962004 100644 --- a/servers/lloadd/tier.c +++ b/servers/lloadd/tier.c @@ -117,8 +117,21 @@ lload_tiers_reset( int shutdown ) } } +void +lload_tiers_update( evutil_socket_t s, short what, void *arg ) +{ + LloadTier *tier; + + LDAP_STAILQ_FOREACH ( tier, &tiers, t_next ) { + if ( tier->t_type.tier_update ) { + tier->t_type.tier_update( tier ); + } + } +} + extern struct lload_tier_type roundrobin_tier; extern struct lload_tier_type weighted_tier; +extern struct lload_tier_type bestof_tier; struct { char *name; @@ -126,6 +139,7 @@ struct { } tier_types[] = { { "roundrobin", &roundrobin_tier }, { "weighted", &weighted_tier }, + { "bestof", &bestof_tier }, { NULL } }; diff --git a/servers/lloadd/tier_bestof.c b/servers/lloadd/tier_bestof.c new file mode 100644 index 0000000000..ef96dd3a15 --- /dev/null +++ b/servers/lloadd/tier_bestof.c @@ -0,0 +1,315 @@ +/* $OpenLDAP$ */ +/* This work is part of OpenLDAP Software . + * + * Copyright 1998-2019 The OpenLDAP Foundation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * . + */ + +#include "portable.h" + +#include +#include + +#include "lload.h" +#include "lutil.h" + +static LloadTierInit bestof_init; +static LloadTierBackendConfigCb bestof_backend_options; +static LloadTierBackendCb bestof_add_backend; +static LloadTierBackendCb bestof_remove_backend; +static LloadTierSelect bestof_select; + +struct lload_tier_type bestof_tier; + +/* + * Linear Congruential Generator - we don't need + * high quality randomness, and we don't want to + * interfere with anyone else's use of srand(). + * + * The PRNG here cycles thru 941,955 numbers. + */ +static float bestof_seed; + +static void +bestof_srand( int seed ) +{ + bestof_seed = (float)seed / (float)RAND_MAX; +} + +static float +bestof_rand() +{ + float val = 9821.0 * bestof_seed + .211327; + bestof_seed = val - (int)val; + return bestof_seed; +} + +static int +bestof_cmp( const void *left, const void *right ) +{ + const LloadBackend *l = left; + const LloadBackend *r = right; + + return l->b_fitness - r->b_fitness; +} + +LloadTier * +bestof_init( void ) +{ + LloadTier *tier; + + tier = ch_calloc( 1, sizeof(LloadTier) ); + + tier->t_type = bestof_tier; + ldap_pvt_thread_mutex_init( &tier->t_mutex ); + LDAP_CIRCLEQ_INIT( &tier->t_backends ); + + bestof_srand( rand() ); + + return tier; +} + +int +bestof_add_backend( LloadTier *tier, LloadBackend *b ) +{ + assert( b->b_tier == tier ); + + LDAP_CIRCLEQ_INSERT_TAIL( &tier->t_backends, b, b_next ); + if ( !tier->t_private ) { + tier->t_private = b; + } + tier->t_nbackends++; + return LDAP_SUCCESS; +} + +static int +bestof_remove_backend( LloadTier *tier, LloadBackend *b ) +{ + LloadBackend *next = LDAP_CIRCLEQ_LOOP_NEXT( &tier->t_backends, b, b_next ); + + assert_locked( &tier->t_mutex ); + assert_locked( &b->b_mutex ); + + assert( b->b_tier == tier ); + assert( tier->t_private ); + + LDAP_CIRCLEQ_REMOVE( &tier->t_backends, b, b_next ); + LDAP_CIRCLEQ_ENTRY_INIT( b, b_next ); + + if ( b == next ) { + tier->t_private = NULL; + } else { + tier->t_private = next; + } + tier->t_nbackends--; + + return LDAP_SUCCESS; +} + +static int +bestof_backend_options( LloadTier *tier, LloadBackend *b, char *arg ) +{ + struct berval weight = BER_BVC("weight="); + unsigned long l; + + if ( !strncasecmp( arg, weight.bv_val, weight.bv_len ) ) { + if ( lutil_atoulx( &l, &arg[weight.bv_len], 0 ) != 0 ) { + Debug( LDAP_DEBUG_ANY, "bestof_backend_options: " + "cannot parse %s as weight\n", + arg ); + return 1; + } + b->b_weight = l; + return 0; + } + + return 1; +} + +static int +connection_collect_stats( LloadConnection *c, void *arg ) +{ + uintptr_t count, diff, *stats = arg; + + count = __atomic_exchange_n( + &( c )->c_operation_count, 0, __ATOMIC_RELAXED ); + diff = __atomic_exchange_n( &( c )->c_operation_time, 0, __ATOMIC_RELAXED ); + + stats[0] += count; + stats[1] += diff; + + return LDAP_SUCCESS; +} + +static int +bestof_update( LloadTier *tier ) +{ + LloadBackend *b, *first, *next; + time_t now = slap_get_time(); + + checked_lock( &tier->t_mutex ); + first = b = tier->t_private; + checked_unlock( &tier->t_mutex ); + + if ( !first ) return LDAP_SUCCESS; + + do { + int steps; + checked_lock( &b->b_mutex ); + + steps = now - b->b_last_update; + if ( b->b_weight && steps > 0 ) { + uintptr_t stats[2] = { 0, 0 }; + float factor = 1; + + connections_walk( + &b->b_mutex, &b->b_conns, connection_collect_stats, stats ); + + /* Smear values over time - rolling average */ + if ( stats[0] ) { + float fitness = b->b_weight * stats[1]; + + /* Stretch factor accordingly favouring the latest value */ + if ( steps > 10 ) { + factor = 0; /* No recent data */ + } else if ( steps > 1 ) { + factor = + 1 / ( pow( ( 1 / (float)factor ) + 1, steps ) - 1 ); + } + + b->b_fitness = ( factor * b->b_fitness + fitness / stats[0] ) / + ( factor + 1 ); + b->b_last_update = now; + } + } + + next = LDAP_CIRCLEQ_LOOP_NEXT( &tier->t_backends, b, b_next ); + checked_unlock( &b->b_mutex ); + b = next; + } while ( b != first ); + + return LDAP_SUCCESS; +} + +int +bestof_select( + LloadTier *tier, + LloadOperation *op, + LloadConnection **cp, + int *res, + char **message ) +{ + LloadBackend *first, *next, *b, *b0, *b1; + int result = 0, rc = 0, n = tier->t_nbackends; + int i0, i1, i = 0; + + checked_lock( &tier->t_mutex ); + first = b0 = b = tier->t_private; + checked_unlock( &tier->t_mutex ); + + if ( !first ) return rc; + + if ( tier->t_nbackends == 1 ) { + goto fallback; + } + + /* Pick two backend indices at random */ + i0 = bestof_rand() * n; + i1 = bestof_rand() * ( n - 1 ); + if ( i1 >= i0 ) { + i1 += 1; + } else { + int tmp = i0; + i0 = i1; + i1 = tmp; + } + assert( i0 < i1 ); + + /* + * FIXME: use a static array in t_private so we don't have to do any of + * this + */ + for ( i = 0; i < i1; i++ ) { + if ( i == i0 ) { + b0 = b; + } + checked_lock( &b->b_mutex ); + next = LDAP_CIRCLEQ_LOOP_NEXT( &tier->t_backends, b, b_next ); + checked_unlock( &b->b_mutex ); + b = next; + } + b1 = b; + assert( b0 != b1 ); + + if ( bestof_cmp( b0, b1 ) < 0 ) { + checked_lock( &b0->b_mutex ); + result = backend_select( b0, op, cp, res, message ); + checked_unlock( &b0->b_mutex ); + } else { + checked_lock( &b1->b_mutex ); + result = backend_select( b1, op, cp, res, message ); + checked_unlock( &b1->b_mutex ); + } + + rc |= result; + if ( result && *cp ) { + checked_lock( &tier->t_mutex ); + tier->t_private = LDAP_CIRCLEQ_LOOP_NEXT( + &tier->t_backends, (*cp)->c_backend, b_next ); + checked_unlock( &tier->t_mutex ); + return rc; + } + + /* Preferred backends deemed unusable, do a round robin from scratch */ + b = first; +fallback: + do { + checked_lock( &b->b_mutex ); + next = LDAP_CIRCLEQ_LOOP_NEXT( &tier->t_backends, b, b_next ); + + rc = backend_select( b, op, cp, res, message ); + checked_unlock( &b->b_mutex ); + + if ( rc && *cp ) { + /* + * Round-robin step: + * Rotate the queue to put this backend at the end. The race here + * is acceptable. + */ + checked_lock( &tier->t_mutex ); + tier->t_private = next; + checked_unlock( &tier->t_mutex ); + return rc; + } + + b = next; + } while ( b != first ); + + return rc; +} + +struct lload_tier_type bestof_tier = { + .tier_name = "bestof", + + .tier_init = bestof_init, + .tier_startup = tier_startup, + .tier_update = bestof_update, + .tier_reset = tier_reset, + .tier_destroy = tier_destroy, + + .tier_oc = BER_BVC("olcBkLloadTierConfig"), + .tier_backend_oc = BER_BVC("olcBkLloadBackendConfig"), + + .tier_add_backend = bestof_add_backend, + .tier_remove_backend = bestof_remove_backend, + + .tier_select = bestof_select, +}; diff --git a/servers/lloadd/tier_roundrobin.c b/servers/lloadd/tier_roundrobin.c index 0d924c7748..a959043922 100644 --- a/servers/lloadd/tier_roundrobin.c +++ b/servers/lloadd/tier_roundrobin.c @@ -69,6 +69,7 @@ roundrobin_remove_backend( LloadTier *tier, LloadBackend *b ) tier->t_private = NULL; } } + tier->t_nbackends--; return LDAP_SUCCESS; } diff --git a/tests/data/lloadd.conf b/tests/data/lloadd.conf index afd60bccfd..9d9d2a8005 100644 --- a/tests/data/lloadd.conf +++ b/tests/data/lloadd.conf @@ -27,7 +27,7 @@ bindconf tier roundrobin # empty tier -tier weighted +tier bestof backend-server uri=@URI2@ numconns=3 bindconns=3