Fix cluster reshard slot allocation rounding bug

This commit is contained in:
Ayush 2026-05-27 20:28:06 +00:00
parent 138263a1b4
commit d2b8f56e9d
5 changed files with 79 additions and 4 deletions

5
src/nodes-7000.conf Normal file
View file

@ -0,0 +1,5 @@
d406bdce5c68173ec2d6354e2e08225725cafb40 127.0.0.1:7001@17001,,tls-port=0,shard-id=e85634c2cacf2d1b34734ca3c5bdbf82946c7f73 master - 0 1779913295990 2 connected 4100-8191
f2ace4e3291a56b7bbfd0c7598ad860d598eee9a 127.0.0.1:7000@17000,,tls-port=0,shard-id=ee4de652b17293887aa8f21f8c9a0ea65158dbe0 myself,master - 0 0 5 connected 0-4099 8192-8194 12288-12291
7ddaf6ad99810838e0c747a7d3c68634c46ba867 127.0.0.1:7003@17003,,tls-port=0,shard-id=d7e75c4b485c6d753fab316d2a125d009aa03c48 master - 0 1779913295723 4 connected 12292-16383
6749ff515e4c4106b14eda6fdf10638ee15e4457 127.0.0.1:7002@17002,,tls-port=0,shard-id=79241d2a4cafe0b7f42384ad90e808bc5e58b612 master - 0 1779913295716 3 connected 8195-12287
vars currentEpoch 5 lastVoteEpoch 0

5
src/nodes-7001.conf Normal file
View file

@ -0,0 +1,5 @@
6749ff515e4c4106b14eda6fdf10638ee15e4457 127.0.0.1:7002@17002,,tls-port=0,shard-id=79241d2a4cafe0b7f42384ad90e808bc5e58b612 master - 0 1779913296160 3 connected 8195-12287
7ddaf6ad99810838e0c747a7d3c68634c46ba867 127.0.0.1:7003@17003,,tls-port=0,shard-id=d7e75c4b485c6d753fab316d2a125d009aa03c48 master - 0 1779913296059 4 connected 12292-16383
f2ace4e3291a56b7bbfd0c7598ad860d598eee9a 127.0.0.1:7000@17000,,tls-port=0,shard-id=ee4de652b17293887aa8f21f8c9a0ea65158dbe0 master - 0 1779913296000 5 connected 0-4099 8192-8194 12288-12291
d406bdce5c68173ec2d6354e2e08225725cafb40 127.0.0.1:7001@17001,,tls-port=0,shard-id=e85634c2cacf2d1b34734ca3c5bdbf82946c7f73 myself,master - 0 0 2 connected 4100-8191
vars currentEpoch 5 lastVoteEpoch 0

5
src/nodes-7002.conf Normal file
View file

@ -0,0 +1,5 @@
7ddaf6ad99810838e0c747a7d3c68634c46ba867 127.0.0.1:7003@17003,,tls-port=0,shard-id=d7e75c4b485c6d753fab316d2a125d009aa03c48 master - 0 1779913296365 4 connected 12292-16383
6749ff515e4c4106b14eda6fdf10638ee15e4457 127.0.0.1:7002@17002,,tls-port=0,shard-id=79241d2a4cafe0b7f42384ad90e808bc5e58b612 myself,master - 0 0 3 connected 8195-12287
d406bdce5c68173ec2d6354e2e08225725cafb40 127.0.0.1:7001@17001,,tls-port=0,shard-id=e85634c2cacf2d1b34734ca3c5bdbf82946c7f73 master - 0 1779913296391 2 connected 4100-8191
f2ace4e3291a56b7bbfd0c7598ad860d598eee9a 127.0.0.1:7000@17000,,tls-port=0,shard-id=ee4de652b17293887aa8f21f8c9a0ea65158dbe0 master - 0 1779913296281 5 connected 0-4099 8192-8194 12288-12291
vars currentEpoch 5 lastVoteEpoch 0

5
src/nodes-7003.conf Normal file
View file

@ -0,0 +1,5 @@
d406bdce5c68173ec2d6354e2e08225725cafb40 127.0.0.1:7001@17001,,tls-port=0,shard-id=e85634c2cacf2d1b34734ca3c5bdbf82946c7f73 master - 0 1779913296453 2 connected 4100-8191
7ddaf6ad99810838e0c747a7d3c68634c46ba867 127.0.0.1:7003@17003,,tls-port=0,shard-id=d7e75c4b485c6d753fab316d2a125d009aa03c48 myself,master - 0 0 4 connected 12292-16383
6749ff515e4c4106b14eda6fdf10638ee15e4457 127.0.0.1:7002@17002,,tls-port=0,shard-id=79241d2a4cafe0b7f42384ad90e808bc5e58b612 master - 0 1779913296555 3 connected 8195-12287
f2ace4e3291a56b7bbfd0c7598ad860d598eee9a 127.0.0.1:7000@17000,,tls-port=0,shard-id=ee4de652b17293887aa8f21f8c9a0ea65158dbe0 master - 0 1779913296372 5 connected 0-4099 8192-8194 12288-12291
vars currentEpoch 5 lastVoteEpoch 0

View file

@ -6891,8 +6891,11 @@ static list *clusterManagerComputeReshardTable(list *sources, int numslots) {
list *moved = listCreate();
int src_count = listLength(sources), i = 0, tot_slots = 0, j;
clusterManagerNode **sorted = zmalloc(src_count * sizeof(*sorted));
int *max_slots = zmalloc(src_count * sizeof(*max_slots));
float *remainders = zmalloc(src_count * sizeof(*remainders));
listIter li;
listNode *ln;
listRewind(sources, &li);
while ((ln = listNext(&li)) != NULL) {
clusterManagerNode *node = ln->value;
@ -6901,12 +6904,61 @@ static list *clusterManagerComputeReshardTable(list *sources, int numslots) {
}
qsort(sorted, src_count, sizeof(clusterManagerNode *),
clusterManagerSlotCountCompareDesc);
int assigned_total = 0;
for (i = 0; i < src_count; i++) {
clusterManagerNode *node = sorted[i];
float n = ((float) numslots / tot_slots * node->slots_count);
if (i == 0) n = ceil(n);
else n = floor(n);
int max = (int) n, count = 0;
float exact = ((float) numslots / tot_slots * node->slots_count);
int floor_slots = floor(exact);
max_slots[i] = floor_slots;
remainders[i] = exact - floor_slots;
assigned_total += floor_slots;
}
int remaining = numslots - assigned_total;
if (remaining > 0) {
typedef struct {
int idx;
float rem;
int slots_count;
} orderItem;
orderItem *order = zmalloc(src_count * sizeof(*order));
int k, best;
for (i = 0; i < src_count; i++) {
order[i].idx = i;
order[i].rem = remainders[i];
order[i].slots_count = sorted[i]->slots_count;
}
for (k = 0; k < src_count; k++) {
best = k;
for (j = k + 1; j < src_count; j++) {
if (order[j].rem > order[best].rem ||
(order[j].rem == order[best].rem &&
order[j].slots_count > order[best].slots_count) ||
(order[j].rem == order[best].rem &&
order[j].slots_count == order[best].slots_count &&
order[j].idx < order[best].idx)) {
best = j;
}
}
if (best != k) {
orderItem tmp = order[k];
order[k] = order[best];
order[best] = tmp;
}
}
for (i = 0; i < remaining; i++) {
max_slots[order[i].idx]++;
}
zfree(order);
}
for (i = 0; i < src_count; i++) {
clusterManagerNode *node = sorted[i];
int max = max_slots[i], count = 0;
for (j = 0; j < CLUSTER_MANAGER_SLOTS; j++) {
int slot = node->slots[j];
if (!slot) continue;
@ -6918,6 +6970,9 @@ static list *clusterManagerComputeReshardTable(list *sources, int numslots) {
count++;
}
}
zfree(max_slots);
zfree(remainders);
zfree(sorted);
return moved;
}