mirror of
https://github.com/hashicorp/vault.git
synced 2026-02-18 18:38:08 -05:00
Hopefully fix flaky Consul fencing test (#23280)
* Hopefully fix flaky fencing test when run in Enterprise * Fix typo
This commit is contained in:
parent
98e9d0c95e
commit
9fc67b6656
1 changed files with 52 additions and 0 deletions
|
|
@ -68,6 +68,18 @@ func TestConsulFencing_PartitionedLeaderCantWrite(t *testing.T) {
|
|||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
// We need to wait for the KV mount to be ready on all servers - it runs an
|
||||
// "Upgrade" process on mount and will error until that's done. In practice
|
||||
// that's so fast that in CE I've never seen it fail yet, but in Enterprise
|
||||
// there is the added complexity of the upgrade running on the primary while
|
||||
// standby nodes wait for it to complete. So in Enterprise the first PATCH
|
||||
// sent to a standby often comes in before the standby has "seen" that the
|
||||
// primary has completed the upgrade and fails causing the whole test to
|
||||
// error. We can prevent that by taking a short loop here to wait for a read
|
||||
// to succeed on the standby/non-leader (since KV-v2 does upgrade check on
|
||||
// read too).
|
||||
waitForKVv2Upgrade(t, ctx, notLeader.APIClient(), "/test")
|
||||
|
||||
// Start two background workers that will cause writes to Consul in the
|
||||
// background. KV v2 relies on a single active node for correctness.
|
||||
// Specifically its patch operation does a read-modify-write under a
|
||||
|
|
@ -263,6 +275,46 @@ func TestConsulFencing_PartitionedLeaderCantWrite(t *testing.T) {
|
|||
require.Equal(t, expect1, sets[1], "Client 1 writes lost")
|
||||
}
|
||||
|
||||
func waitForKVv2Upgrade(t *testing.T, ctx context.Context, client *api.Client, path string) {
|
||||
t.Helper()
|
||||
|
||||
ctx, cancel := context.WithTimeout(ctx, 10*time.Second)
|
||||
defer cancel()
|
||||
|
||||
kv := client.KVv2(path)
|
||||
attempts := 0
|
||||
wait := 20 * time.Millisecond
|
||||
for {
|
||||
// Attempt to perform a write on the KVv2 mount. It will fail until the
|
||||
// backend is done upgrading. If this is a performance standby in Ent then
|
||||
// it will not complete until the primary is done upgrading AND the standby
|
||||
// has noticed that!
|
||||
_, err := kv.Put(ctx, "test-upgrade-done", map[string]interface{}{
|
||||
"ok": 1,
|
||||
})
|
||||
if err == nil {
|
||||
return
|
||||
}
|
||||
t.Logf("waitForKVv2Upgrade: write failed: %s", err)
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
t.Fatalf("context cancelled waiting for KVv2 (%s) upgrade to complete: %s",
|
||||
path, ctx.Err())
|
||||
return
|
||||
case <-time.After(wait):
|
||||
}
|
||||
attempts++
|
||||
// We don't quite want exponential backoff because it really should be fast,
|
||||
// but just reduce log spam on failures if it's taking a while.
|
||||
if attempts > 4 {
|
||||
wait = 250 * time.Millisecond
|
||||
}
|
||||
if attempts > 10 {
|
||||
wait = time.Second
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func makeSet(n int) []int {
|
||||
a := make([]int, n)
|
||||
for i := 0; i < n; i++ {
|
||||
|
|
|
|||
Loading…
Reference in a new issue