From e776c99ede510659929c8b4cef40bf15dbf6e8f2 Mon Sep 17 00:00:00 2001 From: "Alexander A. Klimov" Date: Fri, 26 May 2023 12:21:09 +0200 Subject: [PATCH] Merge network and database error retryability detection functions so that connection attempts will also be re-tried on RDBMS-specific errors, e.g. Postgres' 57P03 (the database system is starting up), not to crash. On the other hand, SQL operations which are safe to retry on SQL errors are also safe to retry on network errors. --- pkg/icingadb/db.go | 63 +++------------------------------------------- pkg/icingadb/ha.go | 2 +- pkg/retry/retry.go | 50 ++++++++++++++++++++++++++++++++++++ 3 files changed, 54 insertions(+), 61 deletions(-) diff --git a/pkg/icingadb/db.go b/pkg/icingadb/db.go index a9eed7fe..0beca180 100644 --- a/pkg/icingadb/db.go +++ b/pkg/icingadb/db.go @@ -2,9 +2,7 @@ package icingadb import ( "context" - sqlDriver "database/sql/driver" "fmt" - "github.com/go-sql-driver/mysql" "github.com/icinga/icingadb/internal" "github.com/icinga/icingadb/pkg/backoff" "github.com/icinga/icingadb/pkg/com" @@ -15,7 +13,6 @@ import ( "github.com/icinga/icingadb/pkg/retry" "github.com/icinga/icingadb/pkg/utils" "github.com/jmoiron/sqlx" - "github.com/lib/pq" "github.com/pkg/errors" "golang.org/x/sync/errgroup" "golang.org/x/sync/semaphore" @@ -339,7 +336,7 @@ func (db *DB) BulkExec( return nil }, - IsRetryable, + retry.Retryable, backoff.NewExponentialWithJitter(1*time.Millisecond, 1*time.Second), retry.Settings{}, ) @@ -404,7 +401,7 @@ func (db *DB) NamedBulkExec( return nil }, - IsRetryable, + retry.Retryable, backoff.NewExponentialWithJitter(1*time.Millisecond, 1*time.Second), retry.Settings{}, ) @@ -477,7 +474,7 @@ func (db *DB) NamedBulkExecTx( return nil }, - IsRetryable, + retry.Retryable, backoff.NewExponentialWithJitter(1*time.Millisecond, 1*time.Second), retry.Settings{}, ) @@ -674,57 +671,3 @@ func (db *DB) log(ctx context.Context, query string, counter *com.Counter) perio db.logger.Debugf("Finished executing %q with %d rows in %s", query, counter.Total(), tick.Elapsed) })) } - -// IsRetryable checks whether the given error is retryable. -func IsRetryable(err error) bool { - if errors.Is(err, sqlDriver.ErrBadConn) { - return true - } - - if errors.Is(err, mysql.ErrInvalidConn) { - return true - } - - var e *mysql.MySQLError - if errors.As(err, &e) { - switch e.Number { - case 1053, 1205, 1213, 2006: - // 1053: Server shutdown in progress - // 1205: Lock wait timeout - // 1213: Deadlock found when trying to get lock - // 2006: MySQL server has gone away - return true - default: - return false - } - } - - var pe *pq.Error - if errors.As(err, &pe) { - switch pe.Code { - case "08000", // connection_exception - "08006", // connection_failure - "08001", // sqlclient_unable_to_establish_sqlconnection - "08004", // sqlserver_rejected_establishment_of_sqlconnection - "40001", // serialization_failure - "40P01", // deadlock_detected - "54000", // program_limit_exceeded - "55006", // object_in_use - "55P03", // lock_not_available - "57P01", // admin_shutdown - "57P02", // crash_shutdown - "57P03", // cannot_connect_now - "58000", // system_error - "58030", // io_error - "XX000": // internal_error - return true - default: - if strings.HasPrefix(string(pe.Code), "53") { - // Class 53 - Insufficient Resources - return true - } - } - } - - return false -} diff --git a/pkg/icingadb/ha.go b/pkg/icingadb/ha.go index 0a81bb5b..d16c6c0e 100644 --- a/pkg/icingadb/ha.go +++ b/pkg/icingadb/ha.go @@ -319,7 +319,7 @@ func (h *HA) realize(ctx context.Context, s *icingaredisv1.IcingaStatus, t *type return nil }, - IsRetryable, + retry.Retryable, backoff.NewExponentialWithJitter(time.Millisecond*256, time.Second*3), retry.Settings{ OnError: func(_ time.Duration, attempt uint64, err, lastErr error) { diff --git a/pkg/retry/retry.go b/pkg/retry/retry.go index 0364bbcf..f0a1477f 100644 --- a/pkg/retry/retry.go +++ b/pkg/retry/retry.go @@ -2,9 +2,13 @@ package retry import ( "context" + "database/sql/driver" + "github.com/go-sql-driver/mysql" "github.com/icinga/icingadb/pkg/backoff" + "github.com/lib/pq" "github.com/pkg/errors" "net" + "strings" "syscall" "time" ) @@ -130,5 +134,51 @@ func Retryable(err error) bool { return true } + if errors.Is(err, driver.ErrBadConn) { + return true + } + if errors.Is(err, mysql.ErrInvalidConn) { + return true + } + + var e *mysql.MySQLError + if errors.As(err, &e) { + switch e.Number { + case 1053, 1205, 1213, 2006: + // 1053: Server shutdown in progress + // 1205: Lock wait timeout + // 1213: Deadlock found when trying to get lock + // 2006: MySQL server has gone away + return true + default: + return false + } + } + + var pe *pq.Error + if errors.As(err, &pe) { + switch pe.Code { + case "08000", // connection_exception + "08006", // connection_failure + "08001", // sqlclient_unable_to_establish_sqlconnection + "08004", // sqlserver_rejected_establishment_of_sqlconnection + "40001", // serialization_failure + "40P01", // deadlock_detected + "54000", // program_limit_exceeded + "55006", // object_in_use + "55P03", // lock_not_available + "57P01", // admin_shutdown + "57P02", // crash_shutdown + "57P03", // cannot_connect_now + "58000", // system_error + "58030", // io_error + "XX000": // internal_error + return true + default: + // Class 53 - Insufficient Resources + return strings.HasPrefix(string(pe.Code), "53") + } + } + return false }