mirror of
https://github.com/Icinga/icingadb.git
synced 2026-02-18 18:18:00 -05:00
SLA reporting: history retention for SLA tables
This commit is contained in:
parent
d119be0da5
commit
19170ecbcf
7 changed files with 162 additions and 75 deletions
|
|
@ -133,11 +133,12 @@ func run() int {
|
|||
ods := overdue.NewSync(db, rc, logs.GetChildLogger("overdue-sync"))
|
||||
ret := history.NewRetention(
|
||||
db,
|
||||
cmd.Config.HistoryRetention.Days,
|
||||
cmd.Config.HistoryRetention.Interval,
|
||||
cmd.Config.HistoryRetention.Count,
|
||||
cmd.Config.HistoryRetention.Options,
|
||||
logs.GetChildLogger("history-retention"),
|
||||
cmd.Config.Retention.HistoryDays,
|
||||
cmd.Config.Retention.SlaDays,
|
||||
cmd.Config.Retention.Interval,
|
||||
cmd.Config.Retention.Count,
|
||||
cmd.Config.Retention.Options,
|
||||
logs.GetChildLogger("retention"),
|
||||
)
|
||||
|
||||
sig := make(chan os.Signal, 1)
|
||||
|
|
|
|||
|
|
@ -33,15 +33,17 @@ logging:
|
|||
# dump-signals:
|
||||
# heartbeat:
|
||||
# high-availability:
|
||||
# history-retention:
|
||||
# history-sync:
|
||||
# overdue-sync:
|
||||
# redis:
|
||||
# retention:
|
||||
# runtime-updates:
|
||||
|
||||
history-retention:
|
||||
# Number of days to retain historical data. By default, historical data is retained forever.
|
||||
# days:
|
||||
retention:
|
||||
# Number of days to retain full historical data. By default, historical data is retained forever.
|
||||
# history-days:
|
||||
# Number of days to retain historical data for SLA reporting. By default, it is retained forever.
|
||||
# sla-days:
|
||||
# Map of history category to number of days to retain its data in order to
|
||||
# enable retention only for specific categories or to override the number that has been configured in days.
|
||||
options:
|
||||
|
|
|
|||
|
|
@ -57,10 +57,10 @@ database | Database connection status and queries.
|
|||
dump-signals | Dump signals received from Icinga.
|
||||
heartbeat | Icinga heartbeats received through Redis.
|
||||
high-availability | Manages responsibility of Icinga DB instances.
|
||||
history-retention | Deletes historical data that exceed their configured retention period.
|
||||
history-sync | Synchronization of history entries from Redis to MySQL.
|
||||
overdue-sync | Calculation and synchronization of the overdue status of checkables.
|
||||
redis | Redis connection status and queries.
|
||||
retention | Deletes historical data that exceed their configured retention period.
|
||||
runtime-updates | Runtime updates of config objects after the initial config synchronization.
|
||||
|
||||
### Duration String <a id="duration-string"></a>
|
||||
|
|
@ -68,12 +68,15 @@ runtime-updates | Runtime updates of config objects after the initial c
|
|||
A duration string is a sequence of decimal numbers and a unit suffix, such as `"20s"`.
|
||||
Valid units are `"ms"`, `"s"`, `"m"` and `"h"`.
|
||||
|
||||
## History Retention <a id="configuration-history-retention"></a>
|
||||
## Retention <a id="configuration-retention"></a>
|
||||
|
||||
By default, no historical data is deleted, which means that the longer the data is retained, the more disk space is required to store it.
|
||||
History retention is an optional feature that allows you to limit the number of days that historical data is available for each history category.
|
||||
There are separate options for the full history tables used to display history information in the web interface and
|
||||
SLA tables which store the minimal information required for SLA reporting, allowing to keep this information for longer with a smaller storage footprint.
|
||||
|
||||
| Option | Description |
|
||||
|---------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| days | **Optional.** Number of days to retain historical data for all history categories. Use `options` in order to enable retention only for specific categories or to override the retention days configured here. |
|
||||
| options | **Optional.** Map of history category to number of days to retain its data. Available categories are `acknowledgement`, `comment`, `downtime`, `flapping`, `notification` and `state`. |
|
||||
| Option | Description |
|
||||
|--------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| history-days | **Optional.** Number of days to retain historical data for all history categories. Use `options` in order to enable retention only for specific categories or to override the retention days configured here. |
|
||||
| sla-days | **Optional.** Number of days to retain historical data for SLA reporting. |
|
||||
| options | **Optional.** Map of history category to number of days to retain its data. Available categories are `acknowledgement`, `comment`, `downtime`, `flapping`, `notification`, `sla` and `state`. |
|
||||
|
|
|
|||
|
|
@ -14,10 +14,10 @@ import (
|
|||
|
||||
// Config defines Icinga DB config.
|
||||
type Config struct {
|
||||
Database Database `yaml:"database"`
|
||||
Redis Redis `yaml:"redis"`
|
||||
Logging Logging `yaml:"logging"`
|
||||
HistoryRetention HistoryRetention `yaml:"history-retention"`
|
||||
Database Database `yaml:"database"`
|
||||
Redis Redis `yaml:"redis"`
|
||||
Logging Logging `yaml:"logging"`
|
||||
Retention Retention `yaml:"retention"`
|
||||
}
|
||||
|
||||
// Validate checks constraints in the supplied configuration and returns an error if they are violated.
|
||||
|
|
@ -31,7 +31,7 @@ func (c *Config) Validate() error {
|
|||
if err := c.Logging.Validate(); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.HistoryRetention.Validate(); err != nil {
|
||||
if err := c.Retention.Validate(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -6,17 +6,18 @@ import (
|
|||
"time"
|
||||
)
|
||||
|
||||
// HistoryRetention defines configuration for history retention.
|
||||
type HistoryRetention struct {
|
||||
Days uint64 `yaml:"days"`
|
||||
Interval time.Duration `yaml:"interval" default:"1h"`
|
||||
Count uint64 `yaml:"count" default:"5000"`
|
||||
Options history.RetentionOptions `yaml:"options"`
|
||||
// Retention defines configuration for history retention.
|
||||
type Retention struct {
|
||||
HistoryDays uint64 `yaml:"history-days"`
|
||||
SlaDays uint64 `yaml:"sla-days"`
|
||||
Interval time.Duration `yaml:"interval" default:"1h"`
|
||||
Count uint64 `yaml:"count" default:"5000"`
|
||||
Options history.RetentionOptions `yaml:"options"`
|
||||
}
|
||||
|
||||
// Validate checks constraints in the supplied retention configuration and
|
||||
// returns an error if they are violated.
|
||||
func (r *HistoryRetention) Validate() error {
|
||||
func (r *Retention) Validate() error {
|
||||
if r.Interval <= 0 {
|
||||
return errors.New("retention interval must be positive")
|
||||
}
|
||||
|
|
|
|||
|
|
@ -11,39 +11,85 @@ import (
|
|||
"time"
|
||||
)
|
||||
|
||||
type RetentionType int
|
||||
|
||||
const (
|
||||
RetentionHistory RetentionType = iota
|
||||
RetentionSla
|
||||
)
|
||||
|
||||
type retentionStatement struct {
|
||||
icingadb.CleanupStmt
|
||||
RetentionType
|
||||
Category string
|
||||
}
|
||||
|
||||
// RetentionStatements maps history categories with corresponding cleanup statements.
|
||||
var RetentionStatements = map[string]icingadb.CleanupStmt{
|
||||
"acknowledgement": {
|
||||
var RetentionStatements = []retentionStatement{{
|
||||
RetentionType: RetentionHistory,
|
||||
Category: "acknowledgement",
|
||||
CleanupStmt: icingadb.CleanupStmt{
|
||||
Table: "acknowledgement_history",
|
||||
PK: "id",
|
||||
Column: "clear_time",
|
||||
},
|
||||
"comment": {
|
||||
}, {
|
||||
RetentionType: RetentionHistory,
|
||||
Category: "comment",
|
||||
CleanupStmt: icingadb.CleanupStmt{
|
||||
Table: "comment_history",
|
||||
PK: "comment_id",
|
||||
Column: "remove_time",
|
||||
},
|
||||
"downtime": {
|
||||
}, {
|
||||
RetentionType: RetentionHistory,
|
||||
Category: "downtime",
|
||||
CleanupStmt: icingadb.CleanupStmt{
|
||||
Table: "downtime_history",
|
||||
PK: "downtime_id",
|
||||
Column: "end_time",
|
||||
},
|
||||
"flapping": {
|
||||
}, {
|
||||
RetentionType: RetentionHistory,
|
||||
Category: "flapping",
|
||||
CleanupStmt: icingadb.CleanupStmt{
|
||||
Table: "flapping_history",
|
||||
PK: "id",
|
||||
Column: "end_time",
|
||||
},
|
||||
"notification": {
|
||||
}, {
|
||||
RetentionType: RetentionHistory,
|
||||
Category: "notification",
|
||||
CleanupStmt: icingadb.CleanupStmt{
|
||||
Table: "notification_history",
|
||||
PK: "id",
|
||||
Column: "send_time",
|
||||
},
|
||||
"state": {
|
||||
}, {
|
||||
RetentionType: RetentionHistory,
|
||||
Category: "state",
|
||||
CleanupStmt: icingadb.CleanupStmt{
|
||||
Table: "state_history",
|
||||
PK: "id",
|
||||
Column: "event_time",
|
||||
},
|
||||
}
|
||||
}, {
|
||||
RetentionType: RetentionSla,
|
||||
Category: "sla_downtime",
|
||||
CleanupStmt: icingadb.CleanupStmt{
|
||||
Table: "sla_history_downtime",
|
||||
PK: "downtime_id",
|
||||
Column: "downtime_end",
|
||||
},
|
||||
}, {
|
||||
RetentionType: RetentionSla,
|
||||
Category: "sla_state",
|
||||
CleanupStmt: icingadb.CleanupStmt{
|
||||
Table: "sla_history_state",
|
||||
PK: "id",
|
||||
Column: "event_time",
|
||||
},
|
||||
}}
|
||||
|
||||
// RetentionOptions defines the non-default mapping of history categories with their retention period in days.
|
||||
type RetentionOptions map[string]uint64
|
||||
|
|
@ -51,8 +97,15 @@ type RetentionOptions map[string]uint64
|
|||
// Validate checks constraints in the supplied retention options and
|
||||
// returns an error if they are violated.
|
||||
func (o RetentionOptions) Validate() error {
|
||||
allowedCategories := make(map[string]struct{})
|
||||
for _, stmt := range RetentionStatements {
|
||||
if stmt.RetentionType == RetentionHistory {
|
||||
allowedCategories[stmt.Category] = struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
for category := range o {
|
||||
if _, ok := RetentionStatements[category]; !ok {
|
||||
if _, ok := allowedCategories[category]; !ok {
|
||||
return errors.Errorf("invalid key %s for history retention", category)
|
||||
}
|
||||
}
|
||||
|
|
@ -62,23 +115,28 @@ func (o RetentionOptions) Validate() error {
|
|||
|
||||
// Retention deletes rows from history tables that exceed their configured retention period.
|
||||
type Retention struct {
|
||||
db *icingadb.DB
|
||||
logger *logging.Logger
|
||||
days uint64
|
||||
interval time.Duration
|
||||
count uint64
|
||||
options RetentionOptions
|
||||
db *icingadb.DB
|
||||
logger *logging.Logger
|
||||
historyDays uint64
|
||||
slaDays uint64
|
||||
interval time.Duration
|
||||
count uint64
|
||||
options RetentionOptions
|
||||
}
|
||||
|
||||
// NewRetention returns a new Retention.
|
||||
func NewRetention(db *icingadb.DB, days uint64, interval time.Duration, count uint64, options RetentionOptions, logger *logging.Logger) *Retention {
|
||||
func NewRetention(
|
||||
db *icingadb.DB, historyDays uint64, slaDays uint64, interval time.Duration,
|
||||
count uint64, options RetentionOptions, logger *logging.Logger,
|
||||
) *Retention {
|
||||
return &Retention{
|
||||
db: db,
|
||||
logger: logger,
|
||||
days: days,
|
||||
interval: interval,
|
||||
count: count,
|
||||
options: options,
|
||||
db: db,
|
||||
logger: logger,
|
||||
historyDays: historyDays,
|
||||
slaDays: slaDays,
|
||||
interval: interval,
|
||||
count: count,
|
||||
options: options,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -94,32 +152,39 @@ func (r *Retention) StartWithCallback(ctx context.Context, c func(table string,
|
|||
|
||||
errs := make(chan error, 1)
|
||||
|
||||
for category, stmt := range RetentionStatements {
|
||||
days, ok := r.options[category]
|
||||
if !ok {
|
||||
days = r.days
|
||||
for _, stmt := range RetentionStatements {
|
||||
var days uint64
|
||||
switch stmt.RetentionType {
|
||||
case RetentionHistory:
|
||||
if d, ok := r.options[stmt.Category]; ok {
|
||||
days = d
|
||||
} else {
|
||||
days = r.historyDays
|
||||
}
|
||||
case RetentionSla:
|
||||
days = r.slaDays
|
||||
}
|
||||
|
||||
if days < 1 {
|
||||
r.logger.Debugf("Skipping history retention for category %s", category)
|
||||
r.logger.Debugf("Skipping history retention for category %s", stmt.Category)
|
||||
continue
|
||||
}
|
||||
|
||||
r.logger.Debugw(
|
||||
fmt.Sprintf("Starting history retention for category %s", category),
|
||||
fmt.Sprintf("Starting history retention for category %s", stmt.Category),
|
||||
zap.Uint64("count", r.count),
|
||||
zap.Duration("interval", r.interval),
|
||||
zap.Uint64("retention-days", days),
|
||||
)
|
||||
|
||||
category := category
|
||||
stmt := stmt
|
||||
periodic.Start(ctx, r.interval, func(tick periodic.Tick) {
|
||||
olderThan := tick.Time.AddDate(0, 0, -int(days))
|
||||
|
||||
r.logger.Debugf("Cleaning up historical data for category %s older than %s", category, olderThan)
|
||||
r.logger.Debugf("Cleaning up historical data for category %s from table %s older than %s",
|
||||
stmt.Category, stmt.Table, olderThan)
|
||||
|
||||
rs, err := r.db.CleanupOlderThan(ctx, stmt, r.count, olderThan)
|
||||
rs, err := r.db.CleanupOlderThan(ctx, stmt.CleanupStmt, r.count, olderThan)
|
||||
if err != nil {
|
||||
select {
|
||||
case errs <- err:
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ import (
|
|||
"github.com/jmoiron/sqlx"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
|
@ -22,7 +23,8 @@ func TestCleanupAndRetention(t *testing.T) {
|
|||
t.Cleanup(func() { _ = db.Close() })
|
||||
|
||||
reten := retention{
|
||||
Days: 7,
|
||||
HistoryDays: 7,
|
||||
SlaDays: 30,
|
||||
Options: map[string]int{
|
||||
"acknowledgement": 0, // No cleanup.
|
||||
"comment": 1,
|
||||
|
|
@ -31,6 +33,16 @@ func TestCleanupAndRetention(t *testing.T) {
|
|||
},
|
||||
}
|
||||
|
||||
daysForCategory := func(category string) int {
|
||||
if strings.HasPrefix(category, "sla_") {
|
||||
return reten.SlaDays
|
||||
} else if d, ok := reten.Options[category]; ok {
|
||||
return d
|
||||
} else {
|
||||
return reten.HistoryDays
|
||||
}
|
||||
}
|
||||
|
||||
rowsToDelete := 10000
|
||||
rowsToSpare := 1000
|
||||
|
||||
|
|
@ -38,11 +50,7 @@ func TestCleanupAndRetention(t *testing.T) {
|
|||
err := dropNotNullColumns(db, stmt)
|
||||
assert.NoError(t, err)
|
||||
|
||||
retentionDays, ok := reten.Options[category]
|
||||
if !ok {
|
||||
retentionDays = reten.Days
|
||||
}
|
||||
|
||||
retentionDays := daysForCategory(category)
|
||||
start := time.Now().AddDate(0, 0, -retentionDays).Add(-1 * time.Millisecond * time.Duration(rowsToDelete))
|
||||
startMilli := start.UnixMilli()
|
||||
|
||||
|
|
@ -75,18 +83,14 @@ func TestCleanupAndRetention(t *testing.T) {
|
|||
i.Reload()
|
||||
waitForDumpDoneSignal(t, r, 20*time.Second, 100*time.Millisecond)
|
||||
config, err := yaml.Marshal(struct {
|
||||
Retention retention `yaml:"history-retention"`
|
||||
Retention retention `yaml:"retention"`
|
||||
}{reten})
|
||||
assert.NoError(t, err)
|
||||
it.IcingaDbInstanceT(t, r, rdb, services.WithIcingaDbConfig(string(config)))
|
||||
|
||||
eventually.Assert(t, func(t require.TestingT) {
|
||||
for category, stmt := range retentionStatements {
|
||||
retentionDays, ok := reten.Options[category]
|
||||
if !ok {
|
||||
retentionDays = reten.Days
|
||||
}
|
||||
|
||||
retentionDays := daysForCategory(category)
|
||||
threshold := time.Now().AddDate(0, 0, -retentionDays)
|
||||
thresholdMilli := threshold.UnixMilli()
|
||||
|
||||
|
|
@ -106,10 +110,10 @@ func TestCleanupAndRetention(t *testing.T) {
|
|||
|
||||
if retentionDays == 0 {
|
||||
// No cleanup.
|
||||
assert.Equal(t, rowsToDelete+rowsToSpare, rowsLeft+rowsSpared, "all rows should still be there")
|
||||
assert.Equal(t, rowsToDelete+rowsToSpare, rowsLeft+rowsSpared, "all rows should still be there for %s", category)
|
||||
} else {
|
||||
assert.Equal(t, 0, rowsLeft, "rows left in retention period")
|
||||
assert.Equal(t, rowsToSpare, rowsSpared, "rows spared")
|
||||
assert.Equal(t, 0, rowsLeft, "rows left in retention period for %s", category)
|
||||
assert.Equal(t, rowsToSpare, rowsSpared, "rows spared for %s", category)
|
||||
}
|
||||
}
|
||||
}, time.Minute, time.Second)
|
||||
|
|
@ -122,8 +126,9 @@ type cleanupStmt struct {
|
|||
}
|
||||
|
||||
type retention struct {
|
||||
Days int `yaml:"days"`
|
||||
Options map[string]int `yaml:"options"`
|
||||
HistoryDays int `yaml:"history-days"`
|
||||
SlaDays int `yaml:"sla-days"`
|
||||
Options map[string]int `yaml:"options"`
|
||||
}
|
||||
|
||||
var retentionStatements = map[string]cleanupStmt{
|
||||
|
|
@ -157,6 +162,16 @@ var retentionStatements = map[string]cleanupStmt{
|
|||
PK: "id",
|
||||
Column: "event_time",
|
||||
},
|
||||
"sla_downtime": {
|
||||
Table: "sla_history_downtime",
|
||||
PK: "downtime_id",
|
||||
Column: "downtime_end",
|
||||
},
|
||||
"sla_state": {
|
||||
Table: "sla_history_state",
|
||||
PK: "id",
|
||||
Column: "event_time",
|
||||
},
|
||||
}
|
||||
|
||||
// dropNotNullColumns drops all columns with a NOT NULL constraint that are not
|
||||
|
|
|
|||
Loading…
Reference in a new issue