diff --git a/changelog/18615.txt b/changelog/18615.txt new file mode 100644 index 0000000000..2aa4b32a4c --- /dev/null +++ b/changelog/18615.txt @@ -0,0 +1,3 @@ +```release-note:bug +core: fix issue when attempting to re-bootstrap HA when using Raft as HA but not storage +``` \ No newline at end of file diff --git a/vault/external_tests/raftha/raft_ha_test.go b/vault/external_tests/raftha/raft_ha_test.go index 3f8c57d535..bbeb78eb39 100644 --- a/vault/external_tests/raftha/raft_ha_test.go +++ b/vault/external_tests/raftha/raft_ha_test.go @@ -14,6 +14,7 @@ import ( vaulthttp "github.com/hashicorp/vault/http" "github.com/hashicorp/vault/sdk/helper/logging" "github.com/hashicorp/vault/vault" + "github.com/stretchr/testify/require" ) func TestRaft_HA_NewCluster(t *testing.T) { @@ -54,6 +55,20 @@ func TestRaft_HA_NewCluster(t *testing.T) { }) } +// TestRaftHA_Recover_Cluster test that we can recover data and re-boostrap a cluster +// that was created with raft HA enabled but is not using raft as the storage backend. +func TestRaftHA_Recover_Cluster(t *testing.T) { + logger := logging.NewVaultLogger(hclog.Debug).Named(t.Name()) + t.Run("file", func(t *testing.T) { + physBundle := teststorage.MakeFileBackend(t, logger) + testRaftHARecoverCluster(t, physBundle, logger) + }) + t.Run("inmem", func(t *testing.T) { + physBundle := teststorage.MakeInmemBackend(t, logger) + testRaftHARecoverCluster(t, physBundle, logger) + }) +} + func testRaftHANewCluster(t *testing.T, bundler teststorage.PhysicalBackendBundler, addClientCerts bool) { var conf vault.CoreConfig opts := vault.TestClusterOptions{HandlerFunc: vaulthttp.Handler} @@ -117,6 +132,95 @@ func testRaftHANewCluster(t *testing.T, bundler teststorage.PhysicalBackendBundl } } +// testRaftHARecoverCluster : in this test, we're going to create a raft HA cluster and store a test secret in a KVv2 +// We're going to simulate an outage and destroy the cluster but we'll keep the storage backend. +// We'll recreate a new cluster with the same storage backend and ensure that we can recover using +// sys/storage/raft/bootstrap. We'll check that the new cluster +// is functional and no data was lost: we can get the test secret from the KVv2. +func testRaftHARecoverCluster(t *testing.T, physBundle *vault.PhysicalBackendBundle, logger hclog.Logger) { + opts := vault.TestClusterOptions{ + HandlerFunc: vaulthttp.Handler, + // We're not testing the HA, only that it can be recovered. No need for multiple cores. + NumCores: 1, + } + + haStorage, haCleanup := teststorage.MakeReusableRaftHAStorage(t, logger, opts.NumCores, physBundle) + defer haCleanup() + haStorage.Setup(nil, &opts) + cluster := vault.NewTestCluster(t, nil, &opts) + + var ( + clusterBarrierKeys [][]byte + clusterRootToken string + ) + clusterBarrierKeys = cluster.BarrierKeys + clusterRootToken = cluster.RootToken + leaderCore := cluster.Cores[0] + testhelpers.EnsureCoreUnsealed(t, cluster, leaderCore) + + leaderClient := cluster.Cores[0].Client + leaderClient.SetToken(clusterRootToken) + + // Mount a KVv2 backend to store a test data + err := leaderClient.Sys().Mount("kv", &api.MountInput{ + Type: "kv-v2", + }) + require.NoError(t, err) + + kvData := map[string]interface{}{ + "data": map[string]interface{}{ + "kittens": "awesome", + }, + } + + // Store the test data in the KVv2 backend + _, err = leaderClient.Logical().Write("kv/data/test_known_data", kvData) + require.NoError(t, err) + + // We delete the current cluster. We keep the storage backend so we can recover the cluster + cluster.Cleanup() + + // We now have a raft HA cluster with a KVv2 backend enabled and a test data. + // We're now going to delete the cluster and create a new raft HA cluster with the same backend storage + // and ensure we can recover to a working vault cluster and don't lose the data from the backend storage. + + opts = vault.TestClusterOptions{ + HandlerFunc: vaulthttp.Handler, + // We're not testing the HA, only that it can be recovered. No need for multiple cores. + NumCores: 1, + // It's already initialized as we keep the same storage backend. + SkipInit: true, + } + haStorage, haCleanup = teststorage.MakeReusableRaftHAStorage(t, logger, opts.NumCores, physBundle) + defer haCleanup() + haStorage.Setup(nil, &opts) + clusterRestored := vault.NewTestCluster(t, nil, &opts) + + clusterRestored.BarrierKeys = clusterBarrierKeys + clusterRestored.RootToken = clusterRootToken + leaderCoreRestored := clusterRestored.Cores[0] + + testhelpers.EnsureCoresUnsealed(t, clusterRestored) + + leaderClientRestored := clusterRestored.Cores[0].Client + + // We now reset the TLS keyring and bootstrap the cluster again. + _, err = leaderClientRestored.Logical().Write("sys/storage/raft/bootstrap", nil) + require.NoError(t, err) + + vault.TestWaitActive(t, leaderCoreRestored.Core) + // Core should be active and cluster in a working state. We should be able to + // read the data from the KVv2 backend. + leaderClientRestored.SetToken(clusterRootToken) + secretRaw, err := leaderClientRestored.Logical().Read("kv/data/test_known_data") + require.NoError(t, err) + + data := secretRaw.Data["data"] + dataAsMap := data.(map[string]interface{}) + require.NotNil(t, dataAsMap) + require.Equal(t, "awesome", dataAsMap["kittens"]) +} + func TestRaft_HA_ExistingCluster(t *testing.T) { t.Parallel() conf := vault.CoreConfig{ diff --git a/vault/raft.go b/vault/raft.go index 97ced8f089..7d34bef99c 100644 --- a/vault/raft.go +++ b/vault/raft.go @@ -751,7 +751,12 @@ func (c *Core) raftCreateTLSKeyring(ctx context.Context) (*raft.TLSKeyring, erro } if raftTLSEntry != nil { - return nil, fmt.Errorf("TLS keyring already present") + // For Raft storage, the keyring should already be there, but + // for situations with non-Raft storage and Raft HA, we can ignore this, + // as it will need to be remade. + if _, usingRaftStorage := c.underlyingPhysical.(*raft.RaftBackend); usingRaftStorage { + return nil, fmt.Errorf("TLS keyring already present") + } } raftTLS, err := raft.GenerateTLSKey(c.secureRandomReader)