Merge pull request #368 from Icinga/bugfix/icinga-db-does-not-exit-when-reconnecting-to-the-database-350

On shutdown: give up HA handover after 3s, not 5m
This commit is contained in:
Alexander Aleksandrovič Klimov 2021-09-22 16:22:51 +02:00 committed by GitHub
commit 585d1e6bb5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 16 additions and 7 deletions

View file

@ -19,6 +19,7 @@ import (
"os/signal"
"sync"
"syscall"
"time"
)
const (
@ -67,9 +68,17 @@ func run() int {
heartbeat := icingaredis.NewHeartbeat(ctx, rc, logger)
ha := icingadb.NewHA(ctx, db, heartbeat, logger)
// Closing ha on exit ensures that this instance retracts its heartbeat
// from the database so that another instance can take over immediately.
defer ha.Close()
defer func() {
// Give up after 3s, not 5m (default) not to hang for 5m if DB is down.
ctx, cancelCtx := context.WithTimeout(context.Background(), 3*time.Second)
ha.Close(ctx)
cancelCtx()
}()
s := icingadb.NewSync(db, rc, logger)
hs := history.NewSync(db, rc, logger)
rt := icingadb.NewRuntimeUpdates(db, rc, logger)

View file

@ -61,14 +61,14 @@ func NewHA(ctx context.Context, db *DB, heartbeat *icingaredis.Heartbeat, logger
return ha
}
// Close implements the io.Closer interface.
func (h *HA) Close() error {
// Close shuts h down.
func (h *HA) Close(ctx context.Context) error {
// Cancel ctx.
h.cancelCtx()
// Wait until the controller loop ended.
<-h.Done()
// Remove our instance from the database.
h.removeInstance()
h.removeInstance(ctx)
// And return an error, if any.
return h.Err()
}
@ -259,11 +259,11 @@ func (h *HA) realize(s *icingaredisv1.IcingaStatus, t *types.UnixMilli, shouldLo
return nil
}
func (h *HA) removeInstance() {
func (h *HA) removeInstance(ctx context.Context) {
h.logger.Debugw("Removing our row from icingadb_instance", zap.String("instance_id", hex.EncodeToString(h.instanceId)))
// Intentionally not using a context here as this is a cleanup task and h.ctx is already cancelled.
// Intentionally not using h.ctx here as it's already cancelled.
query := "DELETE FROM icingadb_instance WHERE id = ?"
_, err := h.db.Exec(query, h.instanceId)
_, err := h.db.ExecContext(ctx, query, h.instanceId)
if err != nil {
h.logger.Warnw("Could not remove instance from database", zap.Error(err), zap.String("query", query))
}