diff --git a/builtin/logical/pki/backend.go b/builtin/logical/pki/backend.go index 819ff3e55a..51b1903189 100644 --- a/builtin/logical/pki/backend.go +++ b/builtin/logical/pki/backend.go @@ -3,11 +3,11 @@ package pki import ( "context" "strings" - "sync" "time" "github.com/hashicorp/vault/sdk/framework" "github.com/hashicorp/vault/sdk/logical" + "github.com/hashicorp/vault/vault" ) // Factory creates a new backend implementing the logical.Backend interface @@ -75,6 +75,7 @@ func Backend(conf *logical.BackendConfig) *backend { pathFetchListCerts(&b), pathRevoke(&b), pathTidy(&b), + pathTidyStatus(&b), }, Secrets: []*framework.Secret{ @@ -86,6 +87,7 @@ func Backend(conf *logical.BackendConfig) *backend { b.crlLifetime = time.Hour * 72 b.tidyCASGuard = new(uint32) + b.tidyStatus = &tidyStatus{state: tidyStatusInactive} b.storage = conf.StorageView return &b @@ -96,8 +98,36 @@ type backend struct { storage logical.Storage crlLifetime time.Duration - revokeStorageLock sync.RWMutex + revokeStorageLock vault.DeadlockRWMutex tidyCASGuard *uint32 + + tidyStatusLock vault.DeadlockRWMutex + tidyStatus *tidyStatus +} + +type tidyStatusState int + +const ( + tidyStatusInactive tidyStatusState = iota + tidyStatusStarted + tidyStatusFinished + tidyStatusError +) + +type tidyStatus struct { + // Parameters used to initiate the operation + safetyBuffer int + tidyCertStore bool + tidyRevokedCerts bool + + // Status + state tidyStatusState + err error + timeStarted time.Time + timeFinished time.Time + message string + certStoreDeletedCount uint + revokedCertDeletedCount uint } const backendHelp = ` diff --git a/builtin/logical/pki/backend_test.go b/builtin/logical/pki/backend_test.go index 657aadab3b..c9e092e070 100644 --- a/builtin/logical/pki/backend_test.go +++ b/builtin/logical/pki/backend_test.go @@ -12,6 +12,7 @@ import ( "crypto/x509" "crypto/x509/pkix" "encoding/base64" + "encoding/json" "encoding/pem" "fmt" "io/ioutil" @@ -29,6 +30,7 @@ import ( "testing" "time" + "github.com/armon/go-metrics" "github.com/fatih/structs" "github.com/go-test/deep" "github.com/hashicorp/go-secure-stdlib/strutil" @@ -3092,6 +3094,22 @@ func setCerts() { } func TestBackend_RevokePlusTidy_Intermediate(t *testing.T) { + // Use a ridiculously long time to minimize the chance + // that we have to deal with more than one interval. + // InMemSink rounds down to an interval boundary rather than + // starting one at the time of initialization. + inmemSink := metrics.NewInmemSink( + 1000000*time.Hour, + 2000000*time.Hour) + + metricsConf := metrics.DefaultConfig("") + metricsConf.EnableHostname = false + metricsConf.EnableHostnameLabel = false + metricsConf.EnableServiceLabel = false + metricsConf.EnableTypePrefix = false + + metrics.NewGlobal(metricsConf, inmemSink) + // Enable PKI secret engine coreConfig := &vault.CoreConfig{ LogicalBackends: map[string]logical.Factory{ @@ -3243,6 +3261,91 @@ func TestBackend_RevokePlusTidy_Intermediate(t *testing.T) { // Sleep a bit to make sure we're past the safety buffer time.Sleep(2 * time.Second) + // Issue a tidy-status on /pki + { + tidyStatus, err := client.Logical().Read("pki/tidy-status") + if err != nil { + t.Fatal(err) + } + expectedData := map[string]interface{}{ + "safety_buffer": json.Number("1"), + "tidy_cert_store": true, + "tidy_revoked_certs": true, + "state": "Finished", + "error": nil, + "time_started": nil, + "time_finished": nil, + "message": nil, + "cert_store_deleted_count": json.Number("1"), + "revoked_cert_deleted_count": json.Number("1"), + } + // Let's copy the times from the response so that we can use deep.Equal() + timeStarted, ok := tidyStatus.Data["time_started"] + if !ok || timeStarted == "" { + t.Fatal("Expected tidy status response to include a value for time_started") + } + expectedData["time_started"] = timeStarted + timeFinished, ok := tidyStatus.Data["time_finished"] + if !ok || timeFinished == "" { + t.Fatal("Expected tidy status response to include a value for time_finished") + } + expectedData["time_finished"] = timeFinished + + if diff := deep.Equal(expectedData, tidyStatus.Data); diff != nil { + t.Fatal(diff) + } + } + // Check the tidy metrics + { + // Map of gagues to expected value + expectedGauges := map[string]float32{ + "secrets.pki.tidy.cert_store_current_entry": 0, + "secrets.pki.tidy.cert_store_total_entries": 1, + "secrets.pki.tidy.revoked_cert_current_entry": 0, + "secrets.pki.tidy.revoked_cert_total_entries": 1, + "secrets.pki.tidy.start_time_epoch": 0, + } + // Map of counters to the sum of the metrics for that counter + expectedCounters := map[string]float64{ + "secrets.pki.tidy.cert_store_deleted_count": 1, + "secrets.pki.tidy.revoked_cert_deleted_count": 1, + "secrets.pki.tidy.success": 2, + // Note that "secrets.pki.tidy.failure" won't be in the captured metrics + } + + // If the metrics span mnore than one interval, skip the checks + intervals := inmemSink.Data() + if len(intervals) == 1 { + interval := inmemSink.Data()[0] + + for gauge, value := range expectedGauges { + if _, ok := interval.Gauges[gauge]; !ok { + t.Fatalf("Expected metrics to include a value for gauge %s", gauge) + } + if value != interval.Gauges[gauge].Value { + t.Fatalf("Expected value metric %s to be %f but got %f", gauge, value, interval.Gauges[gauge].Value) + } + + } + for counter, value := range expectedCounters { + if _, ok := interval.Counters[counter]; !ok { + t.Fatalf("Expected metrics to include a value for couter %s", counter) + } + if value != interval.Counters[counter].Sum { + t.Fatalf("Expected the sum of metric %s to be %f but got %f", counter, value, interval.Counters[counter].Sum) + } + } + + tidyDuration, ok := interval.Samples["secrets.pki.tidy.duration"] + if !ok { + t.Fatal("Expected metrics to include a value for sample secrets.pki.tidy.duration") + } + if tidyDuration.Count <= 0 { + t.Fatalf("Expected metrics to have count > 0 for sample secrets.pki.tidy.duration, but got %d", tidyDuration.Count) + } + } + } + req = client.NewRequest("GET", "/v1/pki/crl") resp, err = client.RawRequest(req) if err != nil { diff --git a/builtin/logical/pki/ca_test.go b/builtin/logical/pki/ca_test.go index f4c4d12ea0..5579ddb8f9 100644 --- a/builtin/logical/pki/ca_test.go +++ b/builtin/logical/pki/ca_test.go @@ -558,6 +558,32 @@ func runSteps(t *testing.T, rootB, intB *backend, client *api.Client, rootName, } } + verifyTidyStatus := func(expectedCertStoreDeleteCount int, expectedRevokedCertDeletedCount int) { + tidyStatus, err := client.Logical().Read(rootName+"tidy-status") + if err != nil { + t.Fatal(err) + } + + if tidyStatus.Data["state"] != "Finished" { + t.Fatalf("Expected tidy operation to be finished, but tidy-status reports its state is %v", tidyStatus.Data) + } + + var count int64 + if count, err = tidyStatus.Data["cert_store_deleted_count"].(json.Number).Int64(); err != nil { + t.Fatal(err) + } + if int64(expectedCertStoreDeleteCount) != count { + t.Fatalf("Expected %d for cert_store_deleted_count, but got %d", expectedCertStoreDeleteCount, count) + } + + if count, err = tidyStatus.Data["revoked_cert_deleted_count"].(json.Number).Int64(); err != nil { + t.Fatal(err) + } + if int64(expectedRevokedCertDeletedCount) != count { + t.Fatalf("Expected %d for revoked_cert_deleted_count, but got %d", expectedRevokedCertDeletedCount, count) + } + } + // Validate current state of revoked certificates verifyRevocation(t, intSerialNumber, true) @@ -585,6 +611,8 @@ func runSteps(t *testing.T, rootB, intB *backend, client *api.Client, rootName, // Check to make sure we still find the cert and see it on the CRL verifyRevocation(t, intSerialNumber, true) + + verifyTidyStatus(0, 0) } // Run with both values set false, nothing should happen @@ -606,6 +634,8 @@ func runSteps(t *testing.T, rootB, intB *backend, client *api.Client, rootName, // Check to make sure we still find the cert and see it on the CRL verifyRevocation(t, intSerialNumber, true) + + verifyTidyStatus(0, 0) } // Run with a short safety buffer and both set to true, both should be cleared @@ -627,6 +657,9 @@ func runSteps(t *testing.T, rootB, intB *backend, client *api.Client, rootName, // Check to make sure we still find the cert and see it on the CRL verifyRevocation(t, intSerialNumber, false) + + verifyTidyStatus(1, 1) } } } + diff --git a/builtin/logical/pki/path_tidy.go b/builtin/logical/pki/path_tidy.go index f655360b16..cb3f9fe991 100644 --- a/builtin/logical/pki/path_tidy.go +++ b/builtin/logical/pki/path_tidy.go @@ -8,6 +8,7 @@ import ( "sync/atomic" "time" + "github.com/armon/go-metrics" "github.com/hashicorp/vault/sdk/framework" "github.com/hashicorp/vault/sdk/helper/consts" "github.com/hashicorp/vault/sdk/logical" @@ -15,7 +16,7 @@ import ( func pathTidy(b *backend) *framework.Path { return &framework.Path{ - Pattern: "tidy", + Pattern: "tidy$", Fields: map[string]*framework.FieldSchema{ "tidy_cert_store": { Type: framework.TypeBool, @@ -45,8 +46,11 @@ Defaults to 72 hours.`, }, }, - Callbacks: map[logical.Operation]framework.OperationFunc{ - logical.UpdateOperation: b.pathTidyWrite, + Operations: map[logical.Operation]framework.OperationHandler{ + logical.UpdateOperation: &framework.PathOperation{ + Callback: b.pathTidyWrite, + ForwardPerformanceStandby: true, + }, }, HelpSynopsis: pathTidyHelpSyn, @@ -54,12 +58,21 @@ Defaults to 72 hours.`, } } -func (b *backend) pathTidyWrite(ctx context.Context, req *logical.Request, d *framework.FieldData) (*logical.Response, error) { - // If we are a performance standby forward the request to the active node - if b.System().ReplicationState().HasState(consts.ReplicationPerformanceStandby) { - return nil, logical.ErrReadOnly +func pathTidyStatus(b *backend) *framework.Path { + return &framework.Path{ + Pattern: "tidy-status$", + Operations: map[logical.Operation]framework.OperationHandler{ + logical.ReadOperation: &framework.PathOperation{ + Callback: b.pathTidyStatusRead, + ForwardPerformanceStandby: true, + }, + }, + HelpSynopsis: pathTidyStatusHelpSyn, + HelpDescription: pathTidyStatusHelpDesc, } +} +func (b *backend) pathTidyWrite(ctx context.Context, req *logical.Request, d *framework.FieldData) (*logical.Response, error) { safetyBuffer := d.Get("safety_buffer").(int) tidyCertStore := d.Get("tidy_cert_store").(bool) tidyRevokedCerts := d.Get("tidy_revoked_certs").(bool) @@ -86,6 +99,8 @@ func (b *backend) pathTidyWrite(ctx context.Context, req *logical.Request, d *fr go func() { defer atomic.StoreUint32(b.tidyCASGuard, 0) + b.tidyStatusStart(safetyBuffer, tidyCertStore, tidyRevokedCerts || tidyRevocationList) + // Don't cancel when the original client request goes away ctx = context.Background() @@ -98,7 +113,12 @@ func (b *backend) pathTidyWrite(ctx context.Context, req *logical.Request, d *fr return fmt.Errorf("error fetching list of certs: %w", err) } - for _, serial := range serials { + serialCount := len(serials) + metrics.SetGauge([]string{"secrets", "pki", "tidy", "cert_store_total_entries"}, float32(serialCount)) + for i, serial := range serials { + b.tidyStatusMessage(fmt.Sprintf("Tidying certificate store: checking entry %d of %d", i, serialCount)) + metrics.SetGauge([]string{"secrets", "pki", "tidy", "cert_store_current_entry"}, float32(i)) + certEntry, err := req.Storage.Get(ctx, "certs/"+serial) if err != nil { return fmt.Errorf("error fetching certificate %q: %w", serial, err) @@ -109,6 +129,7 @@ func (b *backend) pathTidyWrite(ctx context.Context, req *logical.Request, d *fr if err := req.Storage.Delete(ctx, "certs/"+serial); err != nil { return fmt.Errorf("error deleting nil entry with serial %s: %w", serial, err) } + b.tidyStatusIncCertStoreCount() continue } @@ -117,6 +138,7 @@ func (b *backend) pathTidyWrite(ctx context.Context, req *logical.Request, d *fr if err := req.Storage.Delete(ctx, "certs/"+serial); err != nil { return fmt.Errorf("error deleting entry with nil value with serial %s: %w", serial, err) } + b.tidyStatusIncCertStoreCount() continue } @@ -129,6 +151,7 @@ func (b *backend) pathTidyWrite(ctx context.Context, req *logical.Request, d *fr if err := req.Storage.Delete(ctx, "certs/"+serial); err != nil { return fmt.Errorf("error deleting serial %q from storage: %w", serial, err) } + b.tidyStatusIncCertStoreCount() } } } @@ -144,8 +167,14 @@ func (b *backend) pathTidyWrite(ctx context.Context, req *logical.Request, d *fr return fmt.Errorf("error fetching list of revoked certs: %w", err) } + revokedSerialsCount := len(revokedSerials) + metrics.SetGauge([]string{"secrets", "pki", "tidy", "revoked_cert_total_entries"}, float32(revokedSerialsCount)) + var revInfo revocationInfo - for _, serial := range revokedSerials { + for i, serial := range revokedSerials { + b.tidyStatusMessage(fmt.Sprintf("Tidying revoked certificates: checking certificate %d of %d", i, len(revokedSerials))) + metrics.SetGauge([]string{"secrets", "pki", "tidy", "revoked_cert_current_entry"}, float32(i)) + revokedEntry, err := req.Storage.Get(ctx, "revoked/"+serial) if err != nil { return fmt.Errorf("unable to fetch revoked cert with serial %q: %w", serial, err) @@ -156,6 +185,7 @@ func (b *backend) pathTidyWrite(ctx context.Context, req *logical.Request, d *fr if err := req.Storage.Delete(ctx, "revoked/"+serial); err != nil { return fmt.Errorf("error deleting nil revoked entry with serial %s: %w", serial, err) } + b.tidyStatusIncRevokedCertCount() continue } @@ -164,6 +194,7 @@ func (b *backend) pathTidyWrite(ctx context.Context, req *logical.Request, d *fr if err := req.Storage.Delete(ctx, "revoked/"+serial); err != nil { return fmt.Errorf("error deleting revoked entry with nil value with serial %s: %w", serial, err) } + b.tidyStatusIncRevokedCertCount() continue } @@ -189,6 +220,7 @@ func (b *backend) pathTidyWrite(ctx context.Context, req *logical.Request, d *fr return fmt.Errorf("error deleting serial %q from store when tidying revoked: %w", serial, err) } rebuildCRL = true + b.tidyStatusIncRevokedCertCount() } } @@ -204,7 +236,9 @@ func (b *backend) pathTidyWrite(ctx context.Context, req *logical.Request, d *fr if err := doTidy(); err != nil { logger.Error("error running tidy", "error", err) - return + b.tidyStatusStop(err) + } else { + b.tidyStatusStop(nil) } }() @@ -213,6 +247,121 @@ func (b *backend) pathTidyWrite(ctx context.Context, req *logical.Request, d *fr return logical.RespondWithStatusCode(resp, req, http.StatusAccepted) } +func (b *backend) pathTidyStatusRead(ctx context.Context, req *logical.Request, d *framework.FieldData) (*logical.Response, error) { + // If this node is a performance secondary return an ErrReadOnly so that the request gets forwarded, + // but only if the PKI backend is not a local mount. + if b.System().ReplicationState().HasState(consts.ReplicationPerformanceSecondary) && !b.System().LocalMount() { + return nil, logical.ErrReadOnly + } + + b.tidyStatusLock.RLock() + defer b.tidyStatusLock.RUnlock() + + resp := &logical.Response{ + Data: map[string]interface{}{ + "safety_buffer": nil, + "tidy_cert_store": nil, + "tidy_revoked_certs": nil, + "state": "Inactive", + "error": nil, + "time_started": nil, + "time_finished": nil, + "message": nil, + "cert_store_deleted_count": nil, + "revoked_cert_deleted_count": nil, + }, + } + + if b.tidyStatus.state == tidyStatusInactive { + return resp, nil + } + + resp.Data["safety_buffer"] = b.tidyStatus.safetyBuffer + resp.Data["tidy_cert_store"] = b.tidyStatus.tidyCertStore + resp.Data["tidy_revoked_certs"] = b.tidyStatus.tidyRevokedCerts + resp.Data["time_started"] = b.tidyStatus.timeStarted + resp.Data["message"] = b.tidyStatus.message + resp.Data["cert_store_deleted_count"] = b.tidyStatus.certStoreDeletedCount + resp.Data["revoked_cert_deleted_count"] = b.tidyStatus.revokedCertDeletedCount + + switch(b.tidyStatus.state) { + case tidyStatusStarted: + resp.Data["state"] = "Running" + case tidyStatusFinished: + resp.Data["state"] = "Finished" + resp.Data["time_finished"] = b.tidyStatus.timeFinished + resp.Data["message"] = nil + case tidyStatusError: + resp.Data["state"] = "Error" + resp.Data["time_finished"] = b.tidyStatus.timeFinished + resp.Data["error"] = b.tidyStatus.err.Error() + // Don't clear the message so that it serves as a hint about when + // the error ocurred. + } + + return resp, nil +} + +func (b *backend) tidyStatusStart(safetyBuffer int, tidyCertStore, tidyRevokedCerts bool) { + b.tidyStatusLock.Lock() + defer b.tidyStatusLock.Unlock() + + b.tidyStatus = &tidyStatus{ + safetyBuffer: safetyBuffer, + tidyCertStore: tidyCertStore, + tidyRevokedCerts: tidyRevokedCerts, + state: tidyStatusStarted, + timeStarted: time.Now(), + } + + metrics.SetGauge([]string{"secrets", "pki", "tidy", "start_time_epoch"}, float32(b.tidyStatus.timeStarted.Unix())) +} + +func (b *backend) tidyStatusStop(err error) { + b.tidyStatusLock.Lock() + defer b.tidyStatusLock.Unlock() + + b.tidyStatus.timeFinished = time.Now() + b.tidyStatus.err = err + if err == nil { + b.tidyStatus.state = tidyStatusFinished + } else { + b.tidyStatus.state = tidyStatusError + } + + metrics.MeasureSince([]string{"secrets", "pki", "tidy", "duration"}, b.tidyStatus.timeStarted) + metrics.SetGauge([]string{"secrets", "pki", "tidy", "start_time_epoch"}, 0) + metrics.IncrCounter([]string{"secrets", "pki", "tidy", "cert_store_deleted_count"}, float32(b.tidyStatus.certStoreDeletedCount)) + metrics.IncrCounter([]string{"secrets", "pki", "tidy", "revoked_cert_deleted_count"}, float32(b.tidyStatus.revokedCertDeletedCount)) + + if err != nil { + metrics.IncrCounter([]string{"secrets", "pki", "tidy", "failure"}, 1) + } else { + metrics.IncrCounter([]string{"secrets", "pki", "tidy", "success"}, 1) + } +} + +func (b *backend) tidyStatusMessage(msg string) { + b.tidyStatusLock.Lock() + defer b.tidyStatusLock.Unlock() + + b.tidyStatus.message = msg +} + +func (b *backend) tidyStatusIncCertStoreCount() { + b.tidyStatusLock.Lock() + defer b.tidyStatusLock.Unlock() + + b.tidyStatus.certStoreDeletedCount++ +} + +func (b *backend) tidyStatusIncRevokedCertCount() { + b.tidyStatusLock.Lock() + defer b.tidyStatusLock.Unlock() + + b.tidyStatus.revokedCertDeletedCount++ +} + const pathTidyHelpSyn = ` Tidy up the backend by removing expired certificates, revocation information, or both. @@ -239,3 +388,25 @@ certificate storage or in revocation information will then be checked. If the current time, minus the value of 'safety_buffer', is greater than the expiration, it will be removed. ` + +const pathTidyStatusHelpSyn = ` +Returns the status of the tidy operation. +` + +const pathTidyStatusHelpDesc = ` +This is a read only endpoint that returns information about the current tidy +operation, or the most recent if none is currently running. + +The result includes the following fields: +* 'safety_buffer': the value of this parameter when initiating the tidy operation +* 'tidy_cert_store': the value of this parameter when initiating the tidy operation +* 'tidy_revoked_certs': the value of this parameter when initiating the tidy operation +* 'state': one of "Inactive", "Running", "Finished", "Error" +* 'error': the error message, if the operation ran into an error +* 'time_started': the time the operation started +* 'time_finished': the time the operation finished +* 'message': One of "Tidying certificate store: checking entry N of TOTAL" or + "Tidying revoked certificates: checking certificate N of TOTAL" +* 'cert_store_deleted_count': The number of certificate storage entries deleted +* 'revoked_cert_deleted_count': The number of revoked certificate entries deleted +` diff --git a/changelog/12885.txt b/changelog/12885.txt new file mode 100644 index 0000000000..4018b9317f --- /dev/null +++ b/changelog/12885.txt @@ -0,0 +1,3 @@ +```release-note:feature: +secrets/pki: Add `tidy-status` endpoint to obtain information of the current or most recent tidy operation. +``` diff --git a/website/content/api-docs/secret/pki.mdx b/website/content/api-docs/secret/pki.mdx index db0240ac8e..30247938ca 100644 --- a/website/content/api-docs/secret/pki.mdx +++ b/website/content/api-docs/secret/pki.mdx @@ -1606,6 +1606,55 @@ $ curl \ http://127.0.0.1:8200/v1/pki/tidy ``` +## Tidy Status + +This is a read only endpoint that returns information about the current tidy +operation, or the most recent if none are currently running. + +The result includes the following fields: +* `safety_buffer`: the value of this parameter when initiating the tidy operation +* `tidy_cert_store`: the value of this parameter when initiating the tidy operation +* `tidy_revoked_certs`: the value of this parameter when initiating the tidy operation +* `state`: one of *Inactive*, *Running*, *Finished*, *Error* +* `error`: the error message, if the operation ran into an error +* `time_started`: the time the operation started +* `time_finished`: the time the operation finished +* `message`: One of *Tidying certificate store: checking entry N of TOTAL* or + *Tidying revoked certificates: checking certificate N of TOTAL* +* `cert_store_deleted_count`: The number of certificate storage entries deleted +* `revoked_cert_deleted_count`: The number of revoked certificate entries deleted + +| Method | Path | +| :----- | :----------------- | +| `GET` | `/pki/tidy-status` | + +### Sample Request + +```shell-session +$ curl \ + --header "X-Vault-Token: ..." \ + --request GET \ + http://127.0.0.1:8200/v1/pki/tidy-status + +``` + +### Sample Response + +```json + "data": { + "safety_buffer": 60, + "tidy_cert_store": true, + "tidy_revoked_certs": true, + "error": null, + "message": "Tidying certificate store: checking entry 234 of 488", + "revoked_cert_deleted_count": 0, + "cert_store_deleted_count": 2, + "state": "Running", + "time_started": "2021-10-20T14:52:13.510161-04:00", + "time_finished": null + }, +``` + # Cluster Scalability Most non-introspection operations in the PKI secrets engine require a write to diff --git a/website/content/docs/internals/telemetry.mdx b/website/content/docs/internals/telemetry.mdx index 8ff1d4e252..499d8cbe8a 100644 --- a/website/content/docs/internals/telemetry.mdx +++ b/website/content/docs/internals/telemetry.mdx @@ -288,30 +288,40 @@ These metrics relate to [Vault Enterprise Replication](/docs/enterprise/replicat These metrics relate to the supported [secrets engines][secrets-engines]. -| Metric | Description | Unit | Type | -| :------------------------------------------------------------------------------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :----- | :------ | -| `database.Initialize` | Time taken to initialize a database secret engine across all database secrets engines | ms | summary | -| `database..Initialize` | Time taken to initialize a database secret engine for the named database secrets engine ``, for example: `database.postgresql-prod.Initialize` | ms | summary | -| `database.Initialize.error` | Number of database secrets engine initialization operation errors across all database secrets engines | errors | counter | -| `database..Initialize.error` | Number of database secrets engine initialization operation errors for the named database secrets engine ``, for example: `database.postgresql-prod.Initialize.error` | errors | counter | -| `database.Close` | Time taken to close a database secret engine across all database secrets engines | ms | summary | -| `database..Close` | Time taken to close a database secret engine for the named database secrets engine ``, for example: `database.postgresql-prod.Close` | ms | summary | -| `database.Close.error` | Number of database secrets engine close operation errors across all database secrets engines | errors | counter | -| `database..Close.error` | Number of database secrets engine close operation errors for the named database secrets engine ``, for example: `database.postgresql-prod.Close.error` | errors | counter | -| `database.CreateUser` | Time taken to create a user across all database secrets engines | ms | summary | -| `database..CreateUser` | Time taken to create a user for the named database secrets engine `` | ms | summary | -| `database.CreateUser.error` | Number of user creation operation errors across all database secrets engines | errors | counter | -| `database..CreateUser.error` | Number of user creation operation errors for the named database secrets engine ``, for example: `database.postgresql-prod.CreateUser.error` | errors | counter | -| `database.RenewUser` | Time taken to renew a user across all database secrets engines | ms | summary | -| `database..RenewUser` | Time taken to renew a user for the named database secrets engine ``, for example: `database.postgresql-prod.RenewUser` | ms | summary | -| `database.RenewUser.error` | Number of user renewal operation errors across all database secrets engines | errors | counter | -| `database..RenewUser.error` | Number of user renewal operations for the named database secrets engine ``, for example: `database.postgresql-prod.RenewUser.error` | errors | counter | -| `database.RevokeUser` | Time taken to revoke a user across all database secrets engines | ms | summary | -| `database..RevokeUser` | Time taken to revoke a user for the named database secrets engine ``, for example: `database.postgresql-prod.RevokeUser` | ms | summary | -| `database.RevokeUser.error` | Number of user revocation operation errors across all database secrets engines | errors | counter | -| `database..RevokeUser.error` | Number of user revocation operations for the named database secrets engine ``, for example: `database.postgresql-prod.RevokeUser.error` | errors | counter | -| `vault.secret.kv.count` (cluster, namespace, mount_point) | Number of entries in each key-value secret engine. | paths | gauge | -| `vault.secret.lease.creation` (cluster, namespace, secret_engine, mount_point, creation_ttl) | Counts the number of leases created by secret engines. | leases | counter | +| Metric | Description | Unit | Type | +| :------------------------------------------------------------------------------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :----- | :------ | +| `database.Initialize` | Time taken to initialize a database secret engine across all database secrets engines | ms | summary | +| `database..Initialize` | Time taken to initialize a database secret engine for the named database secrets engine ``, for example: `database.postgresql-prod.Initialize` | ms | summary | +| `database.Initialize.error` | Number of database secrets engine initialization operation errors across all database secrets engines | errors | counter | +| `database..Initialize.error` | Number of database secrets engine initialization operation errors for the named database secrets engine ``, for example: `database.postgresql-prod.Initialize.error` | errors | counter | +| `database.Close` | Time taken to close a database secret engine across all database secrets engines | ms | summary | +| `database..Close` | Time taken to close a database secret engine for the named database secrets engine ``, for example: `database.postgresql-prod.Close` | ms | summary | +| `database.Close.error` | Number of database secrets engine close operation errors across all database secrets engines | errors | counter | +| `database..Close.error` | Number of database secrets engine close operation errors for the named database secrets engine ``, for example: `database.postgresql-prod.Close.error` | errors | counter | +| `database.CreateUser` | Time taken to create a user across all database secrets engines | ms | summary | +| `database..CreateUser` | Time taken to create a user for the named database secrets engine `` | ms | summary | +| `database.CreateUser.error` | Number of user creation operation errors across all database secrets engines | errors | counter | +| `database..CreateUser.error` | Number of user creation operation errors for the named database secrets engine ``, for example: `database.postgresql-prod.CreateUser.error` | errors | counter | +| `database.RenewUser` | Time taken to renew a user across all database secrets engines | ms | summary | +| `database..RenewUser` | Time taken to renew a user for the named database secrets engine ``, for example: `database.postgresql-prod.RenewUser` | ms | summary | +| `database.RenewUser.error` | Number of user renewal operation errors across all database secrets engines | errors | counter | +| `database..RenewUser.error` | Number of user renewal operations for the named database secrets engine ``, for example: `database.postgresql-prod.RenewUser.error` | errors | counter | +| `database.RevokeUser` | Time taken to revoke a user across all database secrets engines | ms | summary | +| `database..RevokeUser` | Time taken to revoke a user for the named database secrets engine ``, for example: `database.postgresql-prod.RevokeUser` | ms | summary | +| `database.RevokeUser.error` | Number of user revocation operation errors across all database secrets engines | errors | counter | +| `database..RevokeUser.error` | Number of user revocation operations for the named database secrets engine ``, for example: `database.postgresql-prod.RevokeUser.error` | errors | counter | +| `secrets.pki.tidy.cert_store_current_entry` | The index of the current entry in the certificate store being verified by the tidy operation | entry index | gauge | +| `secrets.pki.tidy.cert_store_deleted_count` | Number of entries deleted from the certificate store | entry | counter | +| `secrets.pki.tidy.cert_store_total_entries` | Number of entries in the certificate store to verify during the tidy operation | entry | gauge | +| `secrets.pki.tidy.duration` | Duration of time taken by the PKI tidy operation | ms | summary | +| `secrets.pki.tidy.failure` | Number of times the PKI tidy operation has not completed due to errors | operations | counter | +| `secrets.pki.tidy.revoked_cert_current_entry` | The index of the current revoked certificate entry in the certificate store being verified by the tidy operation | entry index | gauge | +| `secrets.pki.tidy.revoked_cert_deleted_count` | Number of entries deleted from the certificate store for revoked certificates | entry | counter | +| `secrets.pki.tidy.revoked_cert_total_entries` | Number of entries in the certificate store for revoked certificates to verify during the tidy operation | entry | gauge | +| `secrets.pki.tidy.start_time_epoch` | Start time (as seconds since Jan 1 1970) when the PKI tidy operation is active, 0 otherwise | seconds | gauge | +| `secrets.pki.tidy.success` | Number of times the PKI tidy operation has completed succcessfully | operations | counter | +| `vault.secret.kv.count` (cluster, namespace, mount_point) | Number of entries in each key-value secret engine. | paths | gauge | +| `vault.secret.lease.creation` (cluster, namespace, secret_engine, mount_point, creation_ttl) | Counts the number of leases created by secret engines. | leases | counter | ## Storage Backend Metrics