mattermost/server/channels/app/platform/web_hub_test.go

956 lines
26 KiB
Go
Raw Permalink Normal View History

// Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
// See LICENSE.txt for license information.
package platform
import (
"bytes"
"encoding/json"
"fmt"
MM-63130: Move to webHub iteration to be alloc-free (#30792) We switch to using iterators introduced in Go 1.23 to make iteration alloc-free and fast. And since element removal is allowed while iterating a map, this also means we don't need to even copy the slice any more. While here, we also address the comment https://github.com/mattermost/mattermost/pull/30178#discussion_r1954862151. I have simply gone back to using []string as the map entry rather than a type alias or a redirection with a struct. https://mattermost.atlassian.net/browse/MM-63130 ```release-note NONE ``` * Changed back nil to len ```release-note NONE ``` * fixing unused assignment ```release-note NONE ``` * add benchmark ``` goos: linux goarch: amd64 pkg: github.com/mattermost/mattermost/server/v8/channels/app/platform cpu: Intel(R) Core(TM) i5-8265U CPU @ 1.60GHz │ old.txt │ new.txt │ │ sec/op │ sec/op vs base │ HubConnIndexIterator/2_users-8 93.53n ± 1% 38.09n ± 1% -59.27% (p=0.000 n=10) HubConnIndexIterator/3_users-8 106.30n ± 0% 38.41n ± 1% -63.86% (p=0.000 n=10) HubConnIndexIterator/4_users-8 111.30n ± 1% 38.66n ± 1% -65.27% (p=0.000 n=10) geomean 103.4n 38.39n -62.89% │ old.txt │ new.txt │ │ B/op │ B/op vs base │ HubConnIndexIterator/2_users-8 16.00 ± 0% 24.00 ± 0% +50.00% (p=0.000 n=10) HubConnIndexIterator/3_users-8 24.00 ± 0% 24.00 ± 0% ~ (p=1.000 n=10) ¹ HubConnIndexIterator/4_users-8 32.00 ± 0% 24.00 ± 0% -25.00% (p=0.000 n=10) geomean 23.08 24.00 +4.00% ¹ all samples are equal │ old.txt │ new.txt │ │ allocs/op │ allocs/op vs base │ HubConnIndexIterator/2_users-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ HubConnIndexIterator/3_users-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ HubConnIndexIterator/4_users-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ geomean 1.000 1.000 +0.00% ¹ all samples are equal ``` ```release-note NONE ``` * ForChannel test as well ```release-note NONE ``` * review comments ```release-note NONE ``` * fix lint errors ```release-note NONE ``` --------- Co-authored-by: Mattermost Build <build@mattermost.com>
2025-05-11 02:30:12 -04:00
"iter"
"net"
"net/http"
"net/http/httptest"
"runtime"
MM-63130: Move to webHub iteration to be alloc-free (#30792) We switch to using iterators introduced in Go 1.23 to make iteration alloc-free and fast. And since element removal is allowed while iterating a map, this also means we don't need to even copy the slice any more. While here, we also address the comment https://github.com/mattermost/mattermost/pull/30178#discussion_r1954862151. I have simply gone back to using []string as the map entry rather than a type alias or a redirection with a struct. https://mattermost.atlassian.net/browse/MM-63130 ```release-note NONE ``` * Changed back nil to len ```release-note NONE ``` * fixing unused assignment ```release-note NONE ``` * add benchmark ``` goos: linux goarch: amd64 pkg: github.com/mattermost/mattermost/server/v8/channels/app/platform cpu: Intel(R) Core(TM) i5-8265U CPU @ 1.60GHz │ old.txt │ new.txt │ │ sec/op │ sec/op vs base │ HubConnIndexIterator/2_users-8 93.53n ± 1% 38.09n ± 1% -59.27% (p=0.000 n=10) HubConnIndexIterator/3_users-8 106.30n ± 0% 38.41n ± 1% -63.86% (p=0.000 n=10) HubConnIndexIterator/4_users-8 111.30n ± 1% 38.66n ± 1% -65.27% (p=0.000 n=10) geomean 103.4n 38.39n -62.89% │ old.txt │ new.txt │ │ B/op │ B/op vs base │ HubConnIndexIterator/2_users-8 16.00 ± 0% 24.00 ± 0% +50.00% (p=0.000 n=10) HubConnIndexIterator/3_users-8 24.00 ± 0% 24.00 ± 0% ~ (p=1.000 n=10) ¹ HubConnIndexIterator/4_users-8 32.00 ± 0% 24.00 ± 0% -25.00% (p=0.000 n=10) geomean 23.08 24.00 +4.00% ¹ all samples are equal │ old.txt │ new.txt │ │ allocs/op │ allocs/op vs base │ HubConnIndexIterator/2_users-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ HubConnIndexIterator/3_users-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ HubConnIndexIterator/4_users-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ geomean 1.000 1.000 +0.00% ¹ all samples are equal ``` ```release-note NONE ``` * ForChannel test as well ```release-note NONE ``` * review comments ```release-note NONE ``` * fix lint errors ```release-note NONE ``` --------- Co-authored-by: Mattermost Build <build@mattermost.com>
2025-05-11 02:30:12 -04:00
"slices"
"testing"
"time"
"github.com/gorilla/websocket"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/require"
"github.com/mattermost/mattermost/server/public/model"
"github.com/mattermost/mattermost/server/public/shared/i18n"
platform_mocks "github.com/mattermost/mattermost/server/v8/channels/app/platform/mocks"
"github.com/mattermost/mattermost/server/v8/channels/testlib"
)
func dummyWebsocketHandler(t *testing.T) http.HandlerFunc {
return func(w http.ResponseWriter, req *http.Request) {
upgrader := &websocket.Upgrader{
ReadBufferSize: 1024,
WriteBufferSize: 1024,
}
conn, err := upgrader.Upgrade(w, req, nil)
for err == nil {
_, _, err = conn.ReadMessage()
}
if _, ok := err.(*websocket.CloseError); !ok {
require.NoError(t, err)
}
}
}
func registerDummyWebConn(t *testing.T, th *TestHelper, addr net.Addr, session *model.Session) *WebConn {
d := websocket.Dialer{}
c, _, err := d.Dial("ws://"+addr.String()+"/ws", nil)
require.NoError(t, err)
MM-32950: Reliable WebSockets: Basic single server (#17406) * MM-32950: Reliable WebSockets: Basic single server This PR adds reliable websocket support for a single server. Below is a brief overview of the three states of a connection: Normal: - All messages are routed via web hub. - Each web conn has a send queue to which it gets pushed. - A message gets pulled from the queue, and before it gets written to the wire, it is added to the dead queue. Disconnect: - Hub Unregister gets called, where the connection is just marked as inactive. And new messages keep getting pushed to the send queue. If it gets full, the channel is closed and the conn gets removed from conn index. Reconnect: - We query the hub for the connection ID, and get back the queues. - We construct a WebConn reusing the old queues, or a fresh one depending on whether the connection ID was found or not. - Now there is a tricky bit here which needs to be carefully processed. On register, we would always send the hello message in the send queue. But we cannot do that now because the send queue might already have messages. Therefore, we don't send the hello message from web hub, if we reuse a connection. Instead, we move that logic to the web conn write pump. We check if the sequence number is in dead queue, and if it is, then we drain the dead queue, and start consuming from the active queue. No hello message is sent here. But if the message does not exist in the dead queue, and the sequence number is actually something that should have existed, then we set a new connction id and clear the dead queue, and send a hello message. The client, on receiving a new connection id will automatically set its sequence number to 0, and make the sync API calls to manage any lost data. https://mattermost.atlassian.net/browse/MM-32590 ```release-note NONE ``` * gofmt * Add EnableReliableWebSockets to the client config * Refactoring isInDeadQueue * Passing index to drainDeadQueue * refactoring webconn * fix pointer * review comments * simplify hasMsgLoss * safety comment * fix test * Trigger CI * Trigger CI Co-authored-by: Devin Binnie <devin.binnie@mattermost.com> Co-authored-by: Mattermod <mattermod@users.noreply.github.com>
2021-04-26 10:21:25 -04:00
cfg := &WebConnConfig{
WebSocket: c,
Session: *session,
TFunc: i18n.IdentityTfunc(),
Locale: "en",
}
wc := th.Service.NewWebConn(cfg, th.Suite, &hookRunner{})
require.NoError(t, th.Service.HubRegister(wc))
go wc.Pump()
return wc
}
func TestHubStopWithMultipleConnections(t *testing.T) {
mainHelper.Parallel(t)
th := Setup(t).InitBasic(t)
s := httptest.NewServer(dummyWebsocketHandler(t))
defer s.Close()
session, err := th.Service.CreateSession(th.Context, &model.Session{
UserId: th.BasicUser.Id,
})
require.NoError(t, err)
wc1 := registerDummyWebConn(t, th, s.Listener.Addr(), session)
wc2 := registerDummyWebConn(t, th, s.Listener.Addr(), session)
wc3 := registerDummyWebConn(t, th, s.Listener.Addr(), session)
2018-08-07 13:52:51 -04:00
defer wc1.Close()
defer wc2.Close()
defer wc3.Close()
}
// TestHubStopRaceCondition verifies that attempts to use the hub after it has shutdown does not
// block the caller indefinitely.
func TestHubStopRaceCondition(t *testing.T) {
mainHelper.Parallel(t)
th := Setup(t).InitBasic(t)
s := httptest.NewServer(dummyWebsocketHandler(t))
session, err := th.Service.CreateSession(th.Context, &model.Session{
UserId: th.BasicUser.Id,
})
require.NoError(t, err)
wc1 := registerDummyWebConn(t, th, s.Listener.Addr(), session)
defer wc1.Close()
hub := th.Service.hubs[0]
th.Shutdown(t)
done := make(chan bool)
go func() {
wc4 := registerDummyWebConn(t, th, s.Listener.Addr(), session)
wc5 := registerDummyWebConn(t, th, s.Listener.Addr(), session)
require.NoError(t, hub.Register(wc4))
require.NoError(t, hub.Register(wc5))
hub.UpdateActivity("userId", "sessionToken", 0)
MM-23805: Refactor web_hub (#14277) * MM-23800: remove goroutineID and stack printing Each hub has a goroutineID which is calculated with a known hack. The FAQ clearly explains why goroutines don't have an id: https://golang.org/doc/faq#no_goroutine_id. We only added that because sometimes the hub would be deadlocked and having the goroutineID would be useful when getting the stack trace. This is also problematic in stress tests because the hubs would frequently get overloaded and the logs would unnecessarily have stack traces. But that was in the past, and we have done extensive testing with load tests and fuzz testing to smooth any rough edges remaining. Including adding additional metrics for hub buffer size. Monitoring the metrics is a better way to approach this problem. Therefore, we remove these kludges from the code. * Also remove deadlock checking code There is no need for that anymore since we are getting rid of the stack printing anyways. Let's do a wholesale refactor and clean up the codebase. * MM-23805: Refactor web_hub This is a beginning of the refactoring of the websocket code. To start off with, we unexport some methods and constants which did not need to be exported. There are more remaining but some are out of scope for this PR. The main chunk of refactor is to unexport the webconn send channel which was the main cause of panics. Since we were directly sending to the connection from various parts of the codebase, it would be possible that the send channel would be closed and we could still send a message. This would crash the server. To fix this, we refactor the code to centralize all sending from the main hub goroutine. This means we can leverage the connections map to check if the connection exists or not, and only then send the message. We also move the cluster calls to cluster.go. * bring back cluster code inside hub * Incorporate review comments * Address review comments * rename index * MM-23807: Refactor web_conn - Unexport some struct fields and constants which are not necessary to be accessed from outside the package. This will help us moving the entire websocket handling code to a separate package later. - Change some empty string checks to check for empty string rather than doing a len check which is more idiomatic. Both of them compile to the same code. So it doesn't make a difference performance-wise. - Remove redundant ToJson calls to get the length. - Incorporate review comments - Unexport some more methods * Fix field name * Run make app-layers * Add note on hub check
2020-04-23 03:46:18 -04:00
for i := 0; i <= broadcastQueueSize; i++ {
hub.Broadcast(model.NewWebSocketEvent("", "", "", "", nil, ""))
}
hub.InvalidateUser("userId")
hub.Unregister(wc4)
hub.Unregister(wc5)
close(done)
}()
select {
case <-done:
case <-time.After(15 * time.Second):
require.FailNow(t, "hub call did not return within 15 seconds after stop")
}
}
MM-27648: Fix a hub deadlock while revoking session (#15293) * MM-27648: Fix a hub deadlock while revoking session This is a bug which has always been there in the codebase. And it can only occur in the extreme of edge-cases. Following is the call trace due to which this happens: ``` 0 0x0000000001dfea68 in github.com/mattermost/mattermost-server/v5/app.(*Hub).InvalidateUser // deadlock at ./app/web_hub.go:369 1 0x0000000001dfc0bd in github.com/mattermost/mattermost-server/v5/app.(*App).InvalidateWebConnSessionCacheForUser at ./app/web_hub.go:109 2 0x0000000001db1be5 in github.com/mattermost/mattermost-server/v5/app.(*App).ClearSessionCacheForUserSkipClusterSend at ./app/session.go:209 3 0x0000000001db1763 in github.com/mattermost/mattermost-server/v5/app.(*App).ClearSessionCacheForUser at ./app/session.go:170 4 0x0000000001db2d2f in github.com/mattermost/mattermost-server/v5/app.(*App).RevokeSession at ./app/session.go:275 5 0x0000000001db2c09 in github.com/mattermost/mattermost-server/v5/app.(*App).RevokeSessionById at ./app/session.go:260 6 0x0000000001daf442 in github.com/mattermost/mattermost-server/v5/app.(*App).GetSession at ./app/session.go:93 7 0x0000000001df93f4 in github.com/mattermost/mattermost-server/v5/app.(*WebConn).IsAuthenticated at ./app/web_conn.go:271 8 0x0000000001dfa29b in github.com/mattermost/mattermost-server/v5/app.(*WebConn).shouldSendEvent at ./app/web_conn.go:323 9 0x0000000001e2667e in github.com/mattermost/mattermost-server/v5/app.(*Hub).Start.func1.3 // starting from hub at ./app/web_hub.go:491 10 0x0000000001e27c01 in github.com/mattermost/mattermost-server/v5/app.(*Hub).Start.func1 at ./app/web_hub.go:504 11 0x0000000001e27ee2 in github.com/mattermost/mattermost-server/v5/app.(*Hub).Start.func2 at ./app/web_hub.go:528 12 0x0000000000473811 in runtime.goexit at /usr/local/go/src/runtime/asm_amd64.s:1373 ``` The stars have to align in such a way that the session idle timeout _has_ to happen _exactly_ when a broadcast is happening for that user. Only then, this code path gets triggered. Since this is an extreme rabbit hole of calls, I have not attempted any big refactors and went with the most sensible approach which is to make the RevokeSessionById call asynchronous. There are 2 main reasons: - It was already treated as an asynchronous call because it happened during an error condition and we were not checking for the return value anyways. - Session idle timeout is a relatively infrequent event, so creating unbounded goroutines is not a concern. As a bonus, we also get to check the error return and log it. https://mattermost.atlassian.net/browse/MM-27648 * Add a test case * Fix an incorrect comment
2020-08-19 13:57:48 -04:00
func TestHubSessionRevokeRace(t *testing.T) {
mainHelper.Parallel(t)
th := Setup(t)
MM-27648: Fix a hub deadlock while revoking session (#15293) * MM-27648: Fix a hub deadlock while revoking session This is a bug which has always been there in the codebase. And it can only occur in the extreme of edge-cases. Following is the call trace due to which this happens: ``` 0 0x0000000001dfea68 in github.com/mattermost/mattermost-server/v5/app.(*Hub).InvalidateUser // deadlock at ./app/web_hub.go:369 1 0x0000000001dfc0bd in github.com/mattermost/mattermost-server/v5/app.(*App).InvalidateWebConnSessionCacheForUser at ./app/web_hub.go:109 2 0x0000000001db1be5 in github.com/mattermost/mattermost-server/v5/app.(*App).ClearSessionCacheForUserSkipClusterSend at ./app/session.go:209 3 0x0000000001db1763 in github.com/mattermost/mattermost-server/v5/app.(*App).ClearSessionCacheForUser at ./app/session.go:170 4 0x0000000001db2d2f in github.com/mattermost/mattermost-server/v5/app.(*App).RevokeSession at ./app/session.go:275 5 0x0000000001db2c09 in github.com/mattermost/mattermost-server/v5/app.(*App).RevokeSessionById at ./app/session.go:260 6 0x0000000001daf442 in github.com/mattermost/mattermost-server/v5/app.(*App).GetSession at ./app/session.go:93 7 0x0000000001df93f4 in github.com/mattermost/mattermost-server/v5/app.(*WebConn).IsAuthenticated at ./app/web_conn.go:271 8 0x0000000001dfa29b in github.com/mattermost/mattermost-server/v5/app.(*WebConn).shouldSendEvent at ./app/web_conn.go:323 9 0x0000000001e2667e in github.com/mattermost/mattermost-server/v5/app.(*Hub).Start.func1.3 // starting from hub at ./app/web_hub.go:491 10 0x0000000001e27c01 in github.com/mattermost/mattermost-server/v5/app.(*Hub).Start.func1 at ./app/web_hub.go:504 11 0x0000000001e27ee2 in github.com/mattermost/mattermost-server/v5/app.(*Hub).Start.func2 at ./app/web_hub.go:528 12 0x0000000000473811 in runtime.goexit at /usr/local/go/src/runtime/asm_amd64.s:1373 ``` The stars have to align in such a way that the session idle timeout _has_ to happen _exactly_ when a broadcast is happening for that user. Only then, this code path gets triggered. Since this is an extreme rabbit hole of calls, I have not attempted any big refactors and went with the most sensible approach which is to make the RevokeSessionById call asynchronous. There are 2 main reasons: - It was already treated as an asynchronous call because it happened during an error condition and we were not checking for the return value anyways. - Session idle timeout is a relatively infrequent event, so creating unbounded goroutines is not a concern. As a bonus, we also get to check the error return and log it. https://mattermost.atlassian.net/browse/MM-27648 * Add a test case * Fix an incorrect comment
2020-08-19 13:57:48 -04:00
// This needs to be false for the condition to trigger
th.Service.UpdateConfig(func(cfg *model.Config) {
MM-27648: Fix a hub deadlock while revoking session (#15293) * MM-27648: Fix a hub deadlock while revoking session This is a bug which has always been there in the codebase. And it can only occur in the extreme of edge-cases. Following is the call trace due to which this happens: ``` 0 0x0000000001dfea68 in github.com/mattermost/mattermost-server/v5/app.(*Hub).InvalidateUser // deadlock at ./app/web_hub.go:369 1 0x0000000001dfc0bd in github.com/mattermost/mattermost-server/v5/app.(*App).InvalidateWebConnSessionCacheForUser at ./app/web_hub.go:109 2 0x0000000001db1be5 in github.com/mattermost/mattermost-server/v5/app.(*App).ClearSessionCacheForUserSkipClusterSend at ./app/session.go:209 3 0x0000000001db1763 in github.com/mattermost/mattermost-server/v5/app.(*App).ClearSessionCacheForUser at ./app/session.go:170 4 0x0000000001db2d2f in github.com/mattermost/mattermost-server/v5/app.(*App).RevokeSession at ./app/session.go:275 5 0x0000000001db2c09 in github.com/mattermost/mattermost-server/v5/app.(*App).RevokeSessionById at ./app/session.go:260 6 0x0000000001daf442 in github.com/mattermost/mattermost-server/v5/app.(*App).GetSession at ./app/session.go:93 7 0x0000000001df93f4 in github.com/mattermost/mattermost-server/v5/app.(*WebConn).IsAuthenticated at ./app/web_conn.go:271 8 0x0000000001dfa29b in github.com/mattermost/mattermost-server/v5/app.(*WebConn).shouldSendEvent at ./app/web_conn.go:323 9 0x0000000001e2667e in github.com/mattermost/mattermost-server/v5/app.(*Hub).Start.func1.3 // starting from hub at ./app/web_hub.go:491 10 0x0000000001e27c01 in github.com/mattermost/mattermost-server/v5/app.(*Hub).Start.func1 at ./app/web_hub.go:504 11 0x0000000001e27ee2 in github.com/mattermost/mattermost-server/v5/app.(*Hub).Start.func2 at ./app/web_hub.go:528 12 0x0000000000473811 in runtime.goexit at /usr/local/go/src/runtime/asm_amd64.s:1373 ``` The stars have to align in such a way that the session idle timeout _has_ to happen _exactly_ when a broadcast is happening for that user. Only then, this code path gets triggered. Since this is an extreme rabbit hole of calls, I have not attempted any big refactors and went with the most sensible approach which is to make the RevokeSessionById call asynchronous. There are 2 main reasons: - It was already treated as an asynchronous call because it happened during an error condition and we were not checking for the return value anyways. - Session idle timeout is a relatively infrequent event, so creating unbounded goroutines is not a concern. As a bonus, we also get to check the error return and log it. https://mattermost.atlassian.net/browse/MM-27648 * Add a test case * Fix an incorrect comment
2020-08-19 13:57:48 -04:00
*cfg.ServiceSettings.ExtendSessionLengthWithActivity = false
})
s := httptest.NewServer(dummyWebsocketHandler(t))
defer s.Close()
session, err := th.Service.CreateSession(th.Context, &model.Session{
UserId: model.NewId(),
})
require.NoError(t, err)
wc1 := registerDummyWebConn(t, th, s.Listener.Addr(), session)
hub := th.Service.GetHubForUserId(wc1.UserId)
MM-27648: Fix a hub deadlock while revoking session (#15293) * MM-27648: Fix a hub deadlock while revoking session This is a bug which has always been there in the codebase. And it can only occur in the extreme of edge-cases. Following is the call trace due to which this happens: ``` 0 0x0000000001dfea68 in github.com/mattermost/mattermost-server/v5/app.(*Hub).InvalidateUser // deadlock at ./app/web_hub.go:369 1 0x0000000001dfc0bd in github.com/mattermost/mattermost-server/v5/app.(*App).InvalidateWebConnSessionCacheForUser at ./app/web_hub.go:109 2 0x0000000001db1be5 in github.com/mattermost/mattermost-server/v5/app.(*App).ClearSessionCacheForUserSkipClusterSend at ./app/session.go:209 3 0x0000000001db1763 in github.com/mattermost/mattermost-server/v5/app.(*App).ClearSessionCacheForUser at ./app/session.go:170 4 0x0000000001db2d2f in github.com/mattermost/mattermost-server/v5/app.(*App).RevokeSession at ./app/session.go:275 5 0x0000000001db2c09 in github.com/mattermost/mattermost-server/v5/app.(*App).RevokeSessionById at ./app/session.go:260 6 0x0000000001daf442 in github.com/mattermost/mattermost-server/v5/app.(*App).GetSession at ./app/session.go:93 7 0x0000000001df93f4 in github.com/mattermost/mattermost-server/v5/app.(*WebConn).IsAuthenticated at ./app/web_conn.go:271 8 0x0000000001dfa29b in github.com/mattermost/mattermost-server/v5/app.(*WebConn).shouldSendEvent at ./app/web_conn.go:323 9 0x0000000001e2667e in github.com/mattermost/mattermost-server/v5/app.(*Hub).Start.func1.3 // starting from hub at ./app/web_hub.go:491 10 0x0000000001e27c01 in github.com/mattermost/mattermost-server/v5/app.(*Hub).Start.func1 at ./app/web_hub.go:504 11 0x0000000001e27ee2 in github.com/mattermost/mattermost-server/v5/app.(*Hub).Start.func2 at ./app/web_hub.go:528 12 0x0000000000473811 in runtime.goexit at /usr/local/go/src/runtime/asm_amd64.s:1373 ``` The stars have to align in such a way that the session idle timeout _has_ to happen _exactly_ when a broadcast is happening for that user. Only then, this code path gets triggered. Since this is an extreme rabbit hole of calls, I have not attempted any big refactors and went with the most sensible approach which is to make the RevokeSessionById call asynchronous. There are 2 main reasons: - It was already treated as an asynchronous call because it happened during an error condition and we were not checking for the return value anyways. - Session idle timeout is a relatively infrequent event, so creating unbounded goroutines is not a concern. As a bonus, we also get to check the error return and log it. https://mattermost.atlassian.net/browse/MM-27648 * Add a test case * Fix an incorrect comment
2020-08-19 13:57:48 -04:00
done := make(chan bool)
time.Sleep(2 * time.Second)
MM-27648: Fix a hub deadlock while revoking session (#15293) * MM-27648: Fix a hub deadlock while revoking session This is a bug which has always been there in the codebase. And it can only occur in the extreme of edge-cases. Following is the call trace due to which this happens: ``` 0 0x0000000001dfea68 in github.com/mattermost/mattermost-server/v5/app.(*Hub).InvalidateUser // deadlock at ./app/web_hub.go:369 1 0x0000000001dfc0bd in github.com/mattermost/mattermost-server/v5/app.(*App).InvalidateWebConnSessionCacheForUser at ./app/web_hub.go:109 2 0x0000000001db1be5 in github.com/mattermost/mattermost-server/v5/app.(*App).ClearSessionCacheForUserSkipClusterSend at ./app/session.go:209 3 0x0000000001db1763 in github.com/mattermost/mattermost-server/v5/app.(*App).ClearSessionCacheForUser at ./app/session.go:170 4 0x0000000001db2d2f in github.com/mattermost/mattermost-server/v5/app.(*App).RevokeSession at ./app/session.go:275 5 0x0000000001db2c09 in github.com/mattermost/mattermost-server/v5/app.(*App).RevokeSessionById at ./app/session.go:260 6 0x0000000001daf442 in github.com/mattermost/mattermost-server/v5/app.(*App).GetSession at ./app/session.go:93 7 0x0000000001df93f4 in github.com/mattermost/mattermost-server/v5/app.(*WebConn).IsAuthenticated at ./app/web_conn.go:271 8 0x0000000001dfa29b in github.com/mattermost/mattermost-server/v5/app.(*WebConn).shouldSendEvent at ./app/web_conn.go:323 9 0x0000000001e2667e in github.com/mattermost/mattermost-server/v5/app.(*Hub).Start.func1.3 // starting from hub at ./app/web_hub.go:491 10 0x0000000001e27c01 in github.com/mattermost/mattermost-server/v5/app.(*Hub).Start.func1 at ./app/web_hub.go:504 11 0x0000000001e27ee2 in github.com/mattermost/mattermost-server/v5/app.(*Hub).Start.func2 at ./app/web_hub.go:528 12 0x0000000000473811 in runtime.goexit at /usr/local/go/src/runtime/asm_amd64.s:1373 ``` The stars have to align in such a way that the session idle timeout _has_ to happen _exactly_ when a broadcast is happening for that user. Only then, this code path gets triggered. Since this is an extreme rabbit hole of calls, I have not attempted any big refactors and went with the most sensible approach which is to make the RevokeSessionById call asynchronous. There are 2 main reasons: - It was already treated as an asynchronous call because it happened during an error condition and we were not checking for the return value anyways. - Session idle timeout is a relatively infrequent event, so creating unbounded goroutines is not a concern. As a bonus, we also get to check the error return and log it. https://mattermost.atlassian.net/browse/MM-27648 * Add a test case * Fix an incorrect comment
2020-08-19 13:57:48 -04:00
// We override the LastActivityAt which happens in NewWebConn.
// This is needed to call RevokeSessionById which triggers the race.
2025-05-05 12:36:25 -04:00
err = th.Service.AddSessionToCache(session)
require.NoError(t, err)
MM-27648: Fix a hub deadlock while revoking session (#15293) * MM-27648: Fix a hub deadlock while revoking session This is a bug which has always been there in the codebase. And it can only occur in the extreme of edge-cases. Following is the call trace due to which this happens: ``` 0 0x0000000001dfea68 in github.com/mattermost/mattermost-server/v5/app.(*Hub).InvalidateUser // deadlock at ./app/web_hub.go:369 1 0x0000000001dfc0bd in github.com/mattermost/mattermost-server/v5/app.(*App).InvalidateWebConnSessionCacheForUser at ./app/web_hub.go:109 2 0x0000000001db1be5 in github.com/mattermost/mattermost-server/v5/app.(*App).ClearSessionCacheForUserSkipClusterSend at ./app/session.go:209 3 0x0000000001db1763 in github.com/mattermost/mattermost-server/v5/app.(*App).ClearSessionCacheForUser at ./app/session.go:170 4 0x0000000001db2d2f in github.com/mattermost/mattermost-server/v5/app.(*App).RevokeSession at ./app/session.go:275 5 0x0000000001db2c09 in github.com/mattermost/mattermost-server/v5/app.(*App).RevokeSessionById at ./app/session.go:260 6 0x0000000001daf442 in github.com/mattermost/mattermost-server/v5/app.(*App).GetSession at ./app/session.go:93 7 0x0000000001df93f4 in github.com/mattermost/mattermost-server/v5/app.(*WebConn).IsAuthenticated at ./app/web_conn.go:271 8 0x0000000001dfa29b in github.com/mattermost/mattermost-server/v5/app.(*WebConn).shouldSendEvent at ./app/web_conn.go:323 9 0x0000000001e2667e in github.com/mattermost/mattermost-server/v5/app.(*Hub).Start.func1.3 // starting from hub at ./app/web_hub.go:491 10 0x0000000001e27c01 in github.com/mattermost/mattermost-server/v5/app.(*Hub).Start.func1 at ./app/web_hub.go:504 11 0x0000000001e27ee2 in github.com/mattermost/mattermost-server/v5/app.(*Hub).Start.func2 at ./app/web_hub.go:528 12 0x0000000000473811 in runtime.goexit at /usr/local/go/src/runtime/asm_amd64.s:1373 ``` The stars have to align in such a way that the session idle timeout _has_ to happen _exactly_ when a broadcast is happening for that user. Only then, this code path gets triggered. Since this is an extreme rabbit hole of calls, I have not attempted any big refactors and went with the most sensible approach which is to make the RevokeSessionById call asynchronous. There are 2 main reasons: - It was already treated as an asynchronous call because it happened during an error condition and we were not checking for the return value anyways. - Session idle timeout is a relatively infrequent event, so creating unbounded goroutines is not a concern. As a bonus, we also get to check the error return and log it. https://mattermost.atlassian.net/browse/MM-27648 * Add a test case * Fix an incorrect comment
2020-08-19 13:57:48 -04:00
go func() {
for i := 0; i <= broadcastQueueSize; i++ {
hub.Broadcast(model.NewWebSocketEvent("", "teamID", "", "", nil, ""))
MM-27648: Fix a hub deadlock while revoking session (#15293) * MM-27648: Fix a hub deadlock while revoking session This is a bug which has always been there in the codebase. And it can only occur in the extreme of edge-cases. Following is the call trace due to which this happens: ``` 0 0x0000000001dfea68 in github.com/mattermost/mattermost-server/v5/app.(*Hub).InvalidateUser // deadlock at ./app/web_hub.go:369 1 0x0000000001dfc0bd in github.com/mattermost/mattermost-server/v5/app.(*App).InvalidateWebConnSessionCacheForUser at ./app/web_hub.go:109 2 0x0000000001db1be5 in github.com/mattermost/mattermost-server/v5/app.(*App).ClearSessionCacheForUserSkipClusterSend at ./app/session.go:209 3 0x0000000001db1763 in github.com/mattermost/mattermost-server/v5/app.(*App).ClearSessionCacheForUser at ./app/session.go:170 4 0x0000000001db2d2f in github.com/mattermost/mattermost-server/v5/app.(*App).RevokeSession at ./app/session.go:275 5 0x0000000001db2c09 in github.com/mattermost/mattermost-server/v5/app.(*App).RevokeSessionById at ./app/session.go:260 6 0x0000000001daf442 in github.com/mattermost/mattermost-server/v5/app.(*App).GetSession at ./app/session.go:93 7 0x0000000001df93f4 in github.com/mattermost/mattermost-server/v5/app.(*WebConn).IsAuthenticated at ./app/web_conn.go:271 8 0x0000000001dfa29b in github.com/mattermost/mattermost-server/v5/app.(*WebConn).shouldSendEvent at ./app/web_conn.go:323 9 0x0000000001e2667e in github.com/mattermost/mattermost-server/v5/app.(*Hub).Start.func1.3 // starting from hub at ./app/web_hub.go:491 10 0x0000000001e27c01 in github.com/mattermost/mattermost-server/v5/app.(*Hub).Start.func1 at ./app/web_hub.go:504 11 0x0000000001e27ee2 in github.com/mattermost/mattermost-server/v5/app.(*Hub).Start.func2 at ./app/web_hub.go:528 12 0x0000000000473811 in runtime.goexit at /usr/local/go/src/runtime/asm_amd64.s:1373 ``` The stars have to align in such a way that the session idle timeout _has_ to happen _exactly_ when a broadcast is happening for that user. Only then, this code path gets triggered. Since this is an extreme rabbit hole of calls, I have not attempted any big refactors and went with the most sensible approach which is to make the RevokeSessionById call asynchronous. There are 2 main reasons: - It was already treated as an asynchronous call because it happened during an error condition and we were not checking for the return value anyways. - Session idle timeout is a relatively infrequent event, so creating unbounded goroutines is not a concern. As a bonus, we also get to check the error return and log it. https://mattermost.atlassian.net/browse/MM-27648 * Add a test case * Fix an incorrect comment
2020-08-19 13:57:48 -04:00
}
close(done)
}()
// This call should happen _after_ !wc.IsAuthenticated() and _before_wc.isMemberOfTeam().
// There's no guarantee this will happen. But that's our best bet to trigger this race.
MM-27648: Fix a hub deadlock while revoking session (#15293) * MM-27648: Fix a hub deadlock while revoking session This is a bug which has always been there in the codebase. And it can only occur in the extreme of edge-cases. Following is the call trace due to which this happens: ``` 0 0x0000000001dfea68 in github.com/mattermost/mattermost-server/v5/app.(*Hub).InvalidateUser // deadlock at ./app/web_hub.go:369 1 0x0000000001dfc0bd in github.com/mattermost/mattermost-server/v5/app.(*App).InvalidateWebConnSessionCacheForUser at ./app/web_hub.go:109 2 0x0000000001db1be5 in github.com/mattermost/mattermost-server/v5/app.(*App).ClearSessionCacheForUserSkipClusterSend at ./app/session.go:209 3 0x0000000001db1763 in github.com/mattermost/mattermost-server/v5/app.(*App).ClearSessionCacheForUser at ./app/session.go:170 4 0x0000000001db2d2f in github.com/mattermost/mattermost-server/v5/app.(*App).RevokeSession at ./app/session.go:275 5 0x0000000001db2c09 in github.com/mattermost/mattermost-server/v5/app.(*App).RevokeSessionById at ./app/session.go:260 6 0x0000000001daf442 in github.com/mattermost/mattermost-server/v5/app.(*App).GetSession at ./app/session.go:93 7 0x0000000001df93f4 in github.com/mattermost/mattermost-server/v5/app.(*WebConn).IsAuthenticated at ./app/web_conn.go:271 8 0x0000000001dfa29b in github.com/mattermost/mattermost-server/v5/app.(*WebConn).shouldSendEvent at ./app/web_conn.go:323 9 0x0000000001e2667e in github.com/mattermost/mattermost-server/v5/app.(*Hub).Start.func1.3 // starting from hub at ./app/web_hub.go:491 10 0x0000000001e27c01 in github.com/mattermost/mattermost-server/v5/app.(*Hub).Start.func1 at ./app/web_hub.go:504 11 0x0000000001e27ee2 in github.com/mattermost/mattermost-server/v5/app.(*Hub).Start.func2 at ./app/web_hub.go:528 12 0x0000000000473811 in runtime.goexit at /usr/local/go/src/runtime/asm_amd64.s:1373 ``` The stars have to align in such a way that the session idle timeout _has_ to happen _exactly_ when a broadcast is happening for that user. Only then, this code path gets triggered. Since this is an extreme rabbit hole of calls, I have not attempted any big refactors and went with the most sensible approach which is to make the RevokeSessionById call asynchronous. There are 2 main reasons: - It was already treated as an asynchronous call because it happened during an error condition and we were not checking for the return value anyways. - Session idle timeout is a relatively infrequent event, so creating unbounded goroutines is not a concern. As a bonus, we also get to check the error return and log it. https://mattermost.atlassian.net/browse/MM-27648 * Add a test case * Fix an incorrect comment
2020-08-19 13:57:48 -04:00
wc1.InvalidateCache()
for range 10 {
MM-27648: Fix a hub deadlock while revoking session (#15293) * MM-27648: Fix a hub deadlock while revoking session This is a bug which has always been there in the codebase. And it can only occur in the extreme of edge-cases. Following is the call trace due to which this happens: ``` 0 0x0000000001dfea68 in github.com/mattermost/mattermost-server/v5/app.(*Hub).InvalidateUser // deadlock at ./app/web_hub.go:369 1 0x0000000001dfc0bd in github.com/mattermost/mattermost-server/v5/app.(*App).InvalidateWebConnSessionCacheForUser at ./app/web_hub.go:109 2 0x0000000001db1be5 in github.com/mattermost/mattermost-server/v5/app.(*App).ClearSessionCacheForUserSkipClusterSend at ./app/session.go:209 3 0x0000000001db1763 in github.com/mattermost/mattermost-server/v5/app.(*App).ClearSessionCacheForUser at ./app/session.go:170 4 0x0000000001db2d2f in github.com/mattermost/mattermost-server/v5/app.(*App).RevokeSession at ./app/session.go:275 5 0x0000000001db2c09 in github.com/mattermost/mattermost-server/v5/app.(*App).RevokeSessionById at ./app/session.go:260 6 0x0000000001daf442 in github.com/mattermost/mattermost-server/v5/app.(*App).GetSession at ./app/session.go:93 7 0x0000000001df93f4 in github.com/mattermost/mattermost-server/v5/app.(*WebConn).IsAuthenticated at ./app/web_conn.go:271 8 0x0000000001dfa29b in github.com/mattermost/mattermost-server/v5/app.(*WebConn).shouldSendEvent at ./app/web_conn.go:323 9 0x0000000001e2667e in github.com/mattermost/mattermost-server/v5/app.(*Hub).Start.func1.3 // starting from hub at ./app/web_hub.go:491 10 0x0000000001e27c01 in github.com/mattermost/mattermost-server/v5/app.(*Hub).Start.func1 at ./app/web_hub.go:504 11 0x0000000001e27ee2 in github.com/mattermost/mattermost-server/v5/app.(*Hub).Start.func2 at ./app/web_hub.go:528 12 0x0000000000473811 in runtime.goexit at /usr/local/go/src/runtime/asm_amd64.s:1373 ``` The stars have to align in such a way that the session idle timeout _has_ to happen _exactly_ when a broadcast is happening for that user. Only then, this code path gets triggered. Since this is an extreme rabbit hole of calls, I have not attempted any big refactors and went with the most sensible approach which is to make the RevokeSessionById call asynchronous. There are 2 main reasons: - It was already treated as an asynchronous call because it happened during an error condition and we were not checking for the return value anyways. - Session idle timeout is a relatively infrequent event, so creating unbounded goroutines is not a concern. As a bonus, we also get to check the error return and log it. https://mattermost.atlassian.net/browse/MM-27648 * Add a test case * Fix an incorrect comment
2020-08-19 13:57:48 -04:00
// If broadcast buffer has not emptied,
// we sleep for a second and check again
if len(hub.broadcast) > 0 {
time.Sleep(time.Second)
continue
}
}
if len(hub.broadcast) > 0 {
require.Fail(t, "hub is deadlocked")
}
}
func TestHubConnIndex(t *testing.T) {
mainHelper.Parallel(t)
th := Setup(t).InitBasic(t)
_, err := th.Service.Store.Channel().SaveMember(th.Context, &model.ChannelMember{
ChannelId: th.BasicChannel.Id,
UserId: th.BasicUser.Id,
NotifyProps: model.GetDefaultChannelNotifyProps(),
SchemeGuest: th.BasicUser.IsGuest(),
SchemeUser: !th.BasicUser.IsGuest(),
})
require.NoError(t, err)
_, err = th.Service.Store.Channel().SaveMember(th.Context, &model.ChannelMember{
ChannelId: th.BasicChannel.Id,
UserId: th.BasicUser2.Id,
NotifyProps: model.GetDefaultChannelNotifyProps(),
SchemeGuest: th.BasicUser2.IsGuest(),
SchemeUser: !th.BasicUser2.IsGuest(),
})
require.NoError(t, err)
for _, fastIterate := range []bool{true, false} {
t.Run(fmt.Sprintf("fastIterate=%t", fastIterate), func(t *testing.T) {
t.Run("Basic", func(t *testing.T) {
connIndex := newHubConnectionIndex(1*time.Second, th.Service.Store, th.Service.logger, fastIterate)
// User1
wc1 := &WebConn{
Platform: th.Service,
Suite: th.Suite,
UserId: model.NewId(),
}
wc1.SetConnectionID(model.NewId())
wc1.SetSession(&model.Session{})
// User2
wc2 := &WebConn{
Platform: th.Service,
Suite: th.Suite,
UserId: model.NewId(),
}
wc2.SetConnectionID(model.NewId())
wc2.SetSession(&model.Session{})
wc3 := &WebConn{
Platform: th.Service,
Suite: th.Suite,
UserId: wc2.UserId,
}
wc3.SetConnectionID(model.NewId())
wc3.SetSession(&model.Session{})
wc4 := &WebConn{
Platform: th.Service,
Suite: th.Suite,
UserId: wc2.UserId,
}
wc4.SetConnectionID(model.NewId())
wc4.SetSession(&model.Session{})
errAdd := connIndex.Add(wc1)
require.NoError(t, errAdd)
err = connIndex.Add(wc2)
require.NoError(t, err)
err = connIndex.Add(wc3)
require.NoError(t, err)
err = connIndex.Add(wc4)
require.NoError(t, err)
t.Run("Basic", func(t *testing.T) {
assert.True(t, connIndex.Has(wc1))
assert.True(t, connIndex.Has(wc2))
MM-63130: Move to webHub iteration to be alloc-free (#30792) We switch to using iterators introduced in Go 1.23 to make iteration alloc-free and fast. And since element removal is allowed while iterating a map, this also means we don't need to even copy the slice any more. While here, we also address the comment https://github.com/mattermost/mattermost/pull/30178#discussion_r1954862151. I have simply gone back to using []string as the map entry rather than a type alias or a redirection with a struct. https://mattermost.atlassian.net/browse/MM-63130 ```release-note NONE ``` * Changed back nil to len ```release-note NONE ``` * fixing unused assignment ```release-note NONE ``` * add benchmark ``` goos: linux goarch: amd64 pkg: github.com/mattermost/mattermost/server/v8/channels/app/platform cpu: Intel(R) Core(TM) i5-8265U CPU @ 1.60GHz │ old.txt │ new.txt │ │ sec/op │ sec/op vs base │ HubConnIndexIterator/2_users-8 93.53n ± 1% 38.09n ± 1% -59.27% (p=0.000 n=10) HubConnIndexIterator/3_users-8 106.30n ± 0% 38.41n ± 1% -63.86% (p=0.000 n=10) HubConnIndexIterator/4_users-8 111.30n ± 1% 38.66n ± 1% -65.27% (p=0.000 n=10) geomean 103.4n 38.39n -62.89% │ old.txt │ new.txt │ │ B/op │ B/op vs base │ HubConnIndexIterator/2_users-8 16.00 ± 0% 24.00 ± 0% +50.00% (p=0.000 n=10) HubConnIndexIterator/3_users-8 24.00 ± 0% 24.00 ± 0% ~ (p=1.000 n=10) ¹ HubConnIndexIterator/4_users-8 32.00 ± 0% 24.00 ± 0% -25.00% (p=0.000 n=10) geomean 23.08 24.00 +4.00% ¹ all samples are equal │ old.txt │ new.txt │ │ allocs/op │ allocs/op vs base │ HubConnIndexIterator/2_users-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ HubConnIndexIterator/3_users-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ HubConnIndexIterator/4_users-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ geomean 1.000 1.000 +0.00% ¹ all samples are equal ``` ```release-note NONE ``` * ForChannel test as well ```release-note NONE ``` * review comments ```release-note NONE ``` * fix lint errors ```release-note NONE ``` --------- Co-authored-by: Mattermost Build <build@mattermost.com>
2025-05-11 02:30:12 -04:00
assert.ElementsMatch(t, slices.Collect(connIndex.ForUser(wc2.UserId)), []*WebConn{wc2, wc3, wc4})
assert.ElementsMatch(t, slices.Collect(connIndex.ForUser(wc1.UserId)), []*WebConn{wc1})
assert.True(t, connIndex.Has(wc2))
assert.True(t, connIndex.Has(wc1))
assert.Len(t, connIndex.All(), 4)
})
t.Run("RemoveMiddleUser2", func(t *testing.T) {
connIndex.Remove(wc3) // Remove from middle from user2
MM-63130: Move to webHub iteration to be alloc-free (#30792) We switch to using iterators introduced in Go 1.23 to make iteration alloc-free and fast. And since element removal is allowed while iterating a map, this also means we don't need to even copy the slice any more. While here, we also address the comment https://github.com/mattermost/mattermost/pull/30178#discussion_r1954862151. I have simply gone back to using []string as the map entry rather than a type alias or a redirection with a struct. https://mattermost.atlassian.net/browse/MM-63130 ```release-note NONE ``` * Changed back nil to len ```release-note NONE ``` * fixing unused assignment ```release-note NONE ``` * add benchmark ``` goos: linux goarch: amd64 pkg: github.com/mattermost/mattermost/server/v8/channels/app/platform cpu: Intel(R) Core(TM) i5-8265U CPU @ 1.60GHz │ old.txt │ new.txt │ │ sec/op │ sec/op vs base │ HubConnIndexIterator/2_users-8 93.53n ± 1% 38.09n ± 1% -59.27% (p=0.000 n=10) HubConnIndexIterator/3_users-8 106.30n ± 0% 38.41n ± 1% -63.86% (p=0.000 n=10) HubConnIndexIterator/4_users-8 111.30n ± 1% 38.66n ± 1% -65.27% (p=0.000 n=10) geomean 103.4n 38.39n -62.89% │ old.txt │ new.txt │ │ B/op │ B/op vs base │ HubConnIndexIterator/2_users-8 16.00 ± 0% 24.00 ± 0% +50.00% (p=0.000 n=10) HubConnIndexIterator/3_users-8 24.00 ± 0% 24.00 ± 0% ~ (p=1.000 n=10) ¹ HubConnIndexIterator/4_users-8 32.00 ± 0% 24.00 ± 0% -25.00% (p=0.000 n=10) geomean 23.08 24.00 +4.00% ¹ all samples are equal │ old.txt │ new.txt │ │ allocs/op │ allocs/op vs base │ HubConnIndexIterator/2_users-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ HubConnIndexIterator/3_users-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ HubConnIndexIterator/4_users-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ geomean 1.000 1.000 +0.00% ¹ all samples are equal ``` ```release-note NONE ``` * ForChannel test as well ```release-note NONE ``` * review comments ```release-note NONE ``` * fix lint errors ```release-note NONE ``` --------- Co-authored-by: Mattermost Build <build@mattermost.com>
2025-05-11 02:30:12 -04:00
assert.ElementsMatch(t, slices.Collect(connIndex.ForUser(wc2.UserId)), []*WebConn{wc2, wc4})
assert.ElementsMatch(t, slices.Collect(connIndex.ForUser(wc1.UserId)), []*WebConn{wc1})
assert.True(t, connIndex.Has(wc2))
assert.False(t, connIndex.Has(wc3))
assert.True(t, connIndex.Has(wc4))
assert.Len(t, connIndex.All(), 3)
})
t.Run("RemoveUser1", func(t *testing.T) {
connIndex.Remove(wc1) // Remove sole connection from user1
MM-63130: Move to webHub iteration to be alloc-free (#30792) We switch to using iterators introduced in Go 1.23 to make iteration alloc-free and fast. And since element removal is allowed while iterating a map, this also means we don't need to even copy the slice any more. While here, we also address the comment https://github.com/mattermost/mattermost/pull/30178#discussion_r1954862151. I have simply gone back to using []string as the map entry rather than a type alias or a redirection with a struct. https://mattermost.atlassian.net/browse/MM-63130 ```release-note NONE ``` * Changed back nil to len ```release-note NONE ``` * fixing unused assignment ```release-note NONE ``` * add benchmark ``` goos: linux goarch: amd64 pkg: github.com/mattermost/mattermost/server/v8/channels/app/platform cpu: Intel(R) Core(TM) i5-8265U CPU @ 1.60GHz │ old.txt │ new.txt │ │ sec/op │ sec/op vs base │ HubConnIndexIterator/2_users-8 93.53n ± 1% 38.09n ± 1% -59.27% (p=0.000 n=10) HubConnIndexIterator/3_users-8 106.30n ± 0% 38.41n ± 1% -63.86% (p=0.000 n=10) HubConnIndexIterator/4_users-8 111.30n ± 1% 38.66n ± 1% -65.27% (p=0.000 n=10) geomean 103.4n 38.39n -62.89% │ old.txt │ new.txt │ │ B/op │ B/op vs base │ HubConnIndexIterator/2_users-8 16.00 ± 0% 24.00 ± 0% +50.00% (p=0.000 n=10) HubConnIndexIterator/3_users-8 24.00 ± 0% 24.00 ± 0% ~ (p=1.000 n=10) ¹ HubConnIndexIterator/4_users-8 32.00 ± 0% 24.00 ± 0% -25.00% (p=0.000 n=10) geomean 23.08 24.00 +4.00% ¹ all samples are equal │ old.txt │ new.txt │ │ allocs/op │ allocs/op vs base │ HubConnIndexIterator/2_users-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ HubConnIndexIterator/3_users-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ HubConnIndexIterator/4_users-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ geomean 1.000 1.000 +0.00% ¹ all samples are equal ``` ```release-note NONE ``` * ForChannel test as well ```release-note NONE ``` * review comments ```release-note NONE ``` * fix lint errors ```release-note NONE ``` --------- Co-authored-by: Mattermost Build <build@mattermost.com>
2025-05-11 02:30:12 -04:00
assert.ElementsMatch(t, slices.Collect(connIndex.ForUser(wc2.UserId)), []*WebConn{wc2, wc4})
assert.ElementsMatch(t, slices.Collect(connIndex.ForUser(wc1.UserId)), []*WebConn{})
assert.Len(t, slices.Collect(connIndex.ForUser(wc1.UserId)), 0)
assert.Len(t, connIndex.All(), 2)
assert.False(t, connIndex.Has(wc1))
assert.True(t, connIndex.Has(wc2))
})
t.Run("RemoveEndUser2", func(t *testing.T) {
connIndex.Remove(wc4) // Remove from end from user2
MM-63130: Move to webHub iteration to be alloc-free (#30792) We switch to using iterators introduced in Go 1.23 to make iteration alloc-free and fast. And since element removal is allowed while iterating a map, this also means we don't need to even copy the slice any more. While here, we also address the comment https://github.com/mattermost/mattermost/pull/30178#discussion_r1954862151. I have simply gone back to using []string as the map entry rather than a type alias or a redirection with a struct. https://mattermost.atlassian.net/browse/MM-63130 ```release-note NONE ``` * Changed back nil to len ```release-note NONE ``` * fixing unused assignment ```release-note NONE ``` * add benchmark ``` goos: linux goarch: amd64 pkg: github.com/mattermost/mattermost/server/v8/channels/app/platform cpu: Intel(R) Core(TM) i5-8265U CPU @ 1.60GHz │ old.txt │ new.txt │ │ sec/op │ sec/op vs base │ HubConnIndexIterator/2_users-8 93.53n ± 1% 38.09n ± 1% -59.27% (p=0.000 n=10) HubConnIndexIterator/3_users-8 106.30n ± 0% 38.41n ± 1% -63.86% (p=0.000 n=10) HubConnIndexIterator/4_users-8 111.30n ± 1% 38.66n ± 1% -65.27% (p=0.000 n=10) geomean 103.4n 38.39n -62.89% │ old.txt │ new.txt │ │ B/op │ B/op vs base │ HubConnIndexIterator/2_users-8 16.00 ± 0% 24.00 ± 0% +50.00% (p=0.000 n=10) HubConnIndexIterator/3_users-8 24.00 ± 0% 24.00 ± 0% ~ (p=1.000 n=10) ¹ HubConnIndexIterator/4_users-8 32.00 ± 0% 24.00 ± 0% -25.00% (p=0.000 n=10) geomean 23.08 24.00 +4.00% ¹ all samples are equal │ old.txt │ new.txt │ │ allocs/op │ allocs/op vs base │ HubConnIndexIterator/2_users-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ HubConnIndexIterator/3_users-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ HubConnIndexIterator/4_users-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ geomean 1.000 1.000 +0.00% ¹ all samples are equal ``` ```release-note NONE ``` * ForChannel test as well ```release-note NONE ``` * review comments ```release-note NONE ``` * fix lint errors ```release-note NONE ``` --------- Co-authored-by: Mattermost Build <build@mattermost.com>
2025-05-11 02:30:12 -04:00
assert.ElementsMatch(t, slices.Collect(connIndex.ForUser(wc2.UserId)), []*WebConn{wc2})
assert.ElementsMatch(t, slices.Collect(connIndex.ForUser(wc1.UserId)), []*WebConn{})
assert.True(t, connIndex.Has(wc2))
assert.False(t, connIndex.Has(wc3))
assert.False(t, connIndex.Has(wc4))
assert.Len(t, connIndex.All(), 1)
})
})
t.Run("ByConnectionId", func(t *testing.T) {
connIndex := newHubConnectionIndex(1*time.Second, th.Service.Store, th.Service.logger, fastIterate)
// User1
wc1ID := model.NewId()
wc1 := &WebConn{
Platform: th.Service,
Suite: th.Suite,
UserId: th.BasicUser.Id,
}
wc1.SetConnectionID(wc1ID)
wc1.SetSession(&model.Session{})
// User2
wc2ID := model.NewId()
wc2 := &WebConn{
Platform: th.Service,
Suite: th.Suite,
UserId: th.BasicUser2.Id,
}
wc2.SetConnectionID(wc2ID)
wc2.SetSession(&model.Session{})
wc3ID := model.NewId()
wc3 := &WebConn{
Platform: th.Service,
Suite: th.Suite,
UserId: wc2.UserId,
}
wc3.SetConnectionID(wc3ID)
wc3.SetSession(&model.Session{})
t.Run("no connections", func(t *testing.T) {
assert.False(t, connIndex.Has(wc1))
assert.False(t, connIndex.Has(wc2))
assert.False(t, connIndex.Has(wc3))
assert.Empty(t, connIndex.byConnectionId)
})
t.Run("adding", func(t *testing.T) {
err = connIndex.Add(wc1)
require.NoError(t, err)
err = connIndex.Add(wc3)
require.NoError(t, err)
assert.Len(t, connIndex.byConnectionId, 2)
assert.Equal(t, wc1, connIndex.ForConnection(wc1ID))
assert.Equal(t, wc3, connIndex.ForConnection(wc3ID))
assert.Equal(t, (*WebConn)(nil), connIndex.ForConnection(wc2ID))
})
t.Run("removing", func(t *testing.T) {
connIndex.Remove(wc3)
assert.Len(t, connIndex.byConnectionId, 1)
assert.Equal(t, wc1, connIndex.ForConnection(wc1ID))
assert.Equal(t, (*WebConn)(nil), connIndex.ForConnection(wc3ID))
assert.Equal(t, (*WebConn)(nil), connIndex.ForConnection(wc2ID))
})
})
})
}
t.Run("ByChannelId", func(t *testing.T) {
connIndex := newHubConnectionIndex(1*time.Second, th.Service.Store, th.Service.logger, true)
// User1
wc1ID := model.NewId()
wc1 := &WebConn{
Platform: th.Service,
Suite: th.Suite,
UserId: th.BasicUser.Id,
}
wc1.SetConnectionID(wc1ID)
wc1.SetSession(&model.Session{})
// User2
wc2ID := model.NewId()
wc2 := &WebConn{
Platform: th.Service,
Suite: th.Suite,
UserId: th.BasicUser2.Id,
}
wc2.SetConnectionID(wc2ID)
wc2.SetSession(&model.Session{})
wc3ID := model.NewId()
wc3 := &WebConn{
Platform: th.Service,
Suite: th.Suite,
UserId: wc2.UserId,
}
wc3.SetConnectionID(wc3ID)
wc3.SetSession(&model.Session{})
err = connIndex.Add(wc1)
require.NoError(t, err)
err = connIndex.Add(wc2)
require.NoError(t, err)
err = connIndex.Add(wc3)
require.NoError(t, err)
t.Run("ForChannel", func(t *testing.T) {
require.Len(t, connIndex.byChannelID, 1)
ids := make([]string, 0)
MM-63130: Move to webHub iteration to be alloc-free (#30792) We switch to using iterators introduced in Go 1.23 to make iteration alloc-free and fast. And since element removal is allowed while iterating a map, this also means we don't need to even copy the slice any more. While here, we also address the comment https://github.com/mattermost/mattermost/pull/30178#discussion_r1954862151. I have simply gone back to using []string as the map entry rather than a type alias or a redirection with a struct. https://mattermost.atlassian.net/browse/MM-63130 ```release-note NONE ``` * Changed back nil to len ```release-note NONE ``` * fixing unused assignment ```release-note NONE ``` * add benchmark ``` goos: linux goarch: amd64 pkg: github.com/mattermost/mattermost/server/v8/channels/app/platform cpu: Intel(R) Core(TM) i5-8265U CPU @ 1.60GHz │ old.txt │ new.txt │ │ sec/op │ sec/op vs base │ HubConnIndexIterator/2_users-8 93.53n ± 1% 38.09n ± 1% -59.27% (p=0.000 n=10) HubConnIndexIterator/3_users-8 106.30n ± 0% 38.41n ± 1% -63.86% (p=0.000 n=10) HubConnIndexIterator/4_users-8 111.30n ± 1% 38.66n ± 1% -65.27% (p=0.000 n=10) geomean 103.4n 38.39n -62.89% │ old.txt │ new.txt │ │ B/op │ B/op vs base │ HubConnIndexIterator/2_users-8 16.00 ± 0% 24.00 ± 0% +50.00% (p=0.000 n=10) HubConnIndexIterator/3_users-8 24.00 ± 0% 24.00 ± 0% ~ (p=1.000 n=10) ¹ HubConnIndexIterator/4_users-8 32.00 ± 0% 24.00 ± 0% -25.00% (p=0.000 n=10) geomean 23.08 24.00 +4.00% ¹ all samples are equal │ old.txt │ new.txt │ │ allocs/op │ allocs/op vs base │ HubConnIndexIterator/2_users-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ HubConnIndexIterator/3_users-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ HubConnIndexIterator/4_users-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ geomean 1.000 1.000 +0.00% ¹ all samples are equal ``` ```release-note NONE ``` * ForChannel test as well ```release-note NONE ``` * review comments ```release-note NONE ``` * fix lint errors ```release-note NONE ``` --------- Co-authored-by: Mattermost Build <build@mattermost.com>
2025-05-11 02:30:12 -04:00
for c := range connIndex.ForChannel(th.BasicChannel.Id) {
ids = append(ids, c.GetConnectionID())
}
require.ElementsMatch(t, []string{wc1ID, wc2ID, wc3ID}, ids)
MM-63130: Move to webHub iteration to be alloc-free (#30792) We switch to using iterators introduced in Go 1.23 to make iteration alloc-free and fast. And since element removal is allowed while iterating a map, this also means we don't need to even copy the slice any more. While here, we also address the comment https://github.com/mattermost/mattermost/pull/30178#discussion_r1954862151. I have simply gone back to using []string as the map entry rather than a type alias or a redirection with a struct. https://mattermost.atlassian.net/browse/MM-63130 ```release-note NONE ``` * Changed back nil to len ```release-note NONE ``` * fixing unused assignment ```release-note NONE ``` * add benchmark ``` goos: linux goarch: amd64 pkg: github.com/mattermost/mattermost/server/v8/channels/app/platform cpu: Intel(R) Core(TM) i5-8265U CPU @ 1.60GHz │ old.txt │ new.txt │ │ sec/op │ sec/op vs base │ HubConnIndexIterator/2_users-8 93.53n ± 1% 38.09n ± 1% -59.27% (p=0.000 n=10) HubConnIndexIterator/3_users-8 106.30n ± 0% 38.41n ± 1% -63.86% (p=0.000 n=10) HubConnIndexIterator/4_users-8 111.30n ± 1% 38.66n ± 1% -65.27% (p=0.000 n=10) geomean 103.4n 38.39n -62.89% │ old.txt │ new.txt │ │ B/op │ B/op vs base │ HubConnIndexIterator/2_users-8 16.00 ± 0% 24.00 ± 0% +50.00% (p=0.000 n=10) HubConnIndexIterator/3_users-8 24.00 ± 0% 24.00 ± 0% ~ (p=1.000 n=10) ¹ HubConnIndexIterator/4_users-8 32.00 ± 0% 24.00 ± 0% -25.00% (p=0.000 n=10) geomean 23.08 24.00 +4.00% ¹ all samples are equal │ old.txt │ new.txt │ │ allocs/op │ allocs/op vs base │ HubConnIndexIterator/2_users-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ HubConnIndexIterator/3_users-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ HubConnIndexIterator/4_users-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ geomean 1.000 1.000 +0.00% ¹ all samples are equal ``` ```release-note NONE ``` * ForChannel test as well ```release-note NONE ``` * review comments ```release-note NONE ``` * fix lint errors ```release-note NONE ``` --------- Co-authored-by: Mattermost Build <build@mattermost.com>
2025-05-11 02:30:12 -04:00
require.Len(t, slices.Collect(connIndex.ForChannel("notexist")), 0)
})
ch := th.CreateChannel(t, th.BasicTeam)
_, err = th.Service.Store.Channel().SaveMember(th.Context, &model.ChannelMember{
ChannelId: ch.Id,
UserId: th.BasicUser2.Id,
NotifyProps: model.GetDefaultChannelNotifyProps(),
SchemeGuest: th.BasicUser2.IsGuest(),
SchemeUser: !th.BasicUser2.IsGuest(),
})
require.NoError(t, err)
t.Run("InvalidateCMCacheForUser", func(t *testing.T) {
require.NoError(t, connIndex.InvalidateCMCacheForUser(th.BasicUser2.Id))
require.Len(t, connIndex.byChannelID, 2)
MM-63130: Move to webHub iteration to be alloc-free (#30792) We switch to using iterators introduced in Go 1.23 to make iteration alloc-free and fast. And since element removal is allowed while iterating a map, this also means we don't need to even copy the slice any more. While here, we also address the comment https://github.com/mattermost/mattermost/pull/30178#discussion_r1954862151. I have simply gone back to using []string as the map entry rather than a type alias or a redirection with a struct. https://mattermost.atlassian.net/browse/MM-63130 ```release-note NONE ``` * Changed back nil to len ```release-note NONE ``` * fixing unused assignment ```release-note NONE ``` * add benchmark ``` goos: linux goarch: amd64 pkg: github.com/mattermost/mattermost/server/v8/channels/app/platform cpu: Intel(R) Core(TM) i5-8265U CPU @ 1.60GHz │ old.txt │ new.txt │ │ sec/op │ sec/op vs base │ HubConnIndexIterator/2_users-8 93.53n ± 1% 38.09n ± 1% -59.27% (p=0.000 n=10) HubConnIndexIterator/3_users-8 106.30n ± 0% 38.41n ± 1% -63.86% (p=0.000 n=10) HubConnIndexIterator/4_users-8 111.30n ± 1% 38.66n ± 1% -65.27% (p=0.000 n=10) geomean 103.4n 38.39n -62.89% │ old.txt │ new.txt │ │ B/op │ B/op vs base │ HubConnIndexIterator/2_users-8 16.00 ± 0% 24.00 ± 0% +50.00% (p=0.000 n=10) HubConnIndexIterator/3_users-8 24.00 ± 0% 24.00 ± 0% ~ (p=1.000 n=10) ¹ HubConnIndexIterator/4_users-8 32.00 ± 0% 24.00 ± 0% -25.00% (p=0.000 n=10) geomean 23.08 24.00 +4.00% ¹ all samples are equal │ old.txt │ new.txt │ │ allocs/op │ allocs/op vs base │ HubConnIndexIterator/2_users-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ HubConnIndexIterator/3_users-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ HubConnIndexIterator/4_users-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ geomean 1.000 1.000 +0.00% ¹ all samples are equal ``` ```release-note NONE ``` * ForChannel test as well ```release-note NONE ``` * review comments ```release-note NONE ``` * fix lint errors ```release-note NONE ``` --------- Co-authored-by: Mattermost Build <build@mattermost.com>
2025-05-11 02:30:12 -04:00
require.Len(t, slices.Collect(connIndex.ForChannel(th.BasicChannel.Id)), 3)
require.Len(t, slices.Collect(connIndex.ForChannel(ch.Id)), 2)
})
t.Run("Remove", func(t *testing.T) {
connIndex.Remove(wc3)
require.Len(t, connIndex.byChannelID, 2)
MM-63130: Move to webHub iteration to be alloc-free (#30792) We switch to using iterators introduced in Go 1.23 to make iteration alloc-free and fast. And since element removal is allowed while iterating a map, this also means we don't need to even copy the slice any more. While here, we also address the comment https://github.com/mattermost/mattermost/pull/30178#discussion_r1954862151. I have simply gone back to using []string as the map entry rather than a type alias or a redirection with a struct. https://mattermost.atlassian.net/browse/MM-63130 ```release-note NONE ``` * Changed back nil to len ```release-note NONE ``` * fixing unused assignment ```release-note NONE ``` * add benchmark ``` goos: linux goarch: amd64 pkg: github.com/mattermost/mattermost/server/v8/channels/app/platform cpu: Intel(R) Core(TM) i5-8265U CPU @ 1.60GHz │ old.txt │ new.txt │ │ sec/op │ sec/op vs base │ HubConnIndexIterator/2_users-8 93.53n ± 1% 38.09n ± 1% -59.27% (p=0.000 n=10) HubConnIndexIterator/3_users-8 106.30n ± 0% 38.41n ± 1% -63.86% (p=0.000 n=10) HubConnIndexIterator/4_users-8 111.30n ± 1% 38.66n ± 1% -65.27% (p=0.000 n=10) geomean 103.4n 38.39n -62.89% │ old.txt │ new.txt │ │ B/op │ B/op vs base │ HubConnIndexIterator/2_users-8 16.00 ± 0% 24.00 ± 0% +50.00% (p=0.000 n=10) HubConnIndexIterator/3_users-8 24.00 ± 0% 24.00 ± 0% ~ (p=1.000 n=10) ¹ HubConnIndexIterator/4_users-8 32.00 ± 0% 24.00 ± 0% -25.00% (p=0.000 n=10) geomean 23.08 24.00 +4.00% ¹ all samples are equal │ old.txt │ new.txt │ │ allocs/op │ allocs/op vs base │ HubConnIndexIterator/2_users-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ HubConnIndexIterator/3_users-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ HubConnIndexIterator/4_users-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ geomean 1.000 1.000 +0.00% ¹ all samples are equal ``` ```release-note NONE ``` * ForChannel test as well ```release-note NONE ``` * review comments ```release-note NONE ``` * fix lint errors ```release-note NONE ``` --------- Co-authored-by: Mattermost Build <build@mattermost.com>
2025-05-11 02:30:12 -04:00
require.Len(t, slices.Collect(connIndex.ForChannel(th.BasicChannel.Id)), 2)
})
})
}
func TestHubConnIndexIncorrectRemoval(t *testing.T) {
mainHelper.Parallel(t)
th := Setup(t)
connIndex := newHubConnectionIndex(1*time.Second, th.Service.Store, th.Service.logger, false)
// User2
wc2 := &WebConn{
Platform: th.Service,
Suite: th.Suite,
UserId: model.NewId(),
}
wc2.SetConnectionID("first")
wc2.SetSession(&model.Session{})
wc3 := &WebConn{
Platform: th.Service,
Suite: th.Suite,
UserId: wc2.UserId,
}
wc3.SetConnectionID("myID")
wc3.SetSession(&model.Session{})
wc4 := &WebConn{
Platform: th.Service,
Suite: th.Suite,
UserId: wc2.UserId,
}
wc4.SetConnectionID("last")
wc4.SetSession(&model.Session{})
err := connIndex.Add(wc2)
require.NoError(t, err)
err = connIndex.Add(wc3)
require.NoError(t, err)
err = connIndex.Add(wc4)
require.NoError(t, err)
MM-63130: Move to webHub iteration to be alloc-free (#30792) We switch to using iterators introduced in Go 1.23 to make iteration alloc-free and fast. And since element removal is allowed while iterating a map, this also means we don't need to even copy the slice any more. While here, we also address the comment https://github.com/mattermost/mattermost/pull/30178#discussion_r1954862151. I have simply gone back to using []string as the map entry rather than a type alias or a redirection with a struct. https://mattermost.atlassian.net/browse/MM-63130 ```release-note NONE ``` * Changed back nil to len ```release-note NONE ``` * fixing unused assignment ```release-note NONE ``` * add benchmark ``` goos: linux goarch: amd64 pkg: github.com/mattermost/mattermost/server/v8/channels/app/platform cpu: Intel(R) Core(TM) i5-8265U CPU @ 1.60GHz │ old.txt │ new.txt │ │ sec/op │ sec/op vs base │ HubConnIndexIterator/2_users-8 93.53n ± 1% 38.09n ± 1% -59.27% (p=0.000 n=10) HubConnIndexIterator/3_users-8 106.30n ± 0% 38.41n ± 1% -63.86% (p=0.000 n=10) HubConnIndexIterator/4_users-8 111.30n ± 1% 38.66n ± 1% -65.27% (p=0.000 n=10) geomean 103.4n 38.39n -62.89% │ old.txt │ new.txt │ │ B/op │ B/op vs base │ HubConnIndexIterator/2_users-8 16.00 ± 0% 24.00 ± 0% +50.00% (p=0.000 n=10) HubConnIndexIterator/3_users-8 24.00 ± 0% 24.00 ± 0% ~ (p=1.000 n=10) ¹ HubConnIndexIterator/4_users-8 32.00 ± 0% 24.00 ± 0% -25.00% (p=0.000 n=10) geomean 23.08 24.00 +4.00% ¹ all samples are equal │ old.txt │ new.txt │ │ allocs/op │ allocs/op vs base │ HubConnIndexIterator/2_users-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ HubConnIndexIterator/3_users-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ HubConnIndexIterator/4_users-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ geomean 1.000 1.000 +0.00% ¹ all samples are equal ``` ```release-note NONE ``` * ForChannel test as well ```release-note NONE ``` * review comments ```release-note NONE ``` * fix lint errors ```release-note NONE ``` --------- Co-authored-by: Mattermost Build <build@mattermost.com>
2025-05-11 02:30:12 -04:00
for wc := range connIndex.ForUser(wc2.UserId) {
if !connIndex.Has(wc) {
require.Failf(t, "Failed to find connection", "connection: %v", wc)
continue
}
if connIndex.ForConnection("myID") != nil {
connIndex.Remove(wc)
}
}
}
MM-32950: Reliable WebSockets: Basic single server (#17406) * MM-32950: Reliable WebSockets: Basic single server This PR adds reliable websocket support for a single server. Below is a brief overview of the three states of a connection: Normal: - All messages are routed via web hub. - Each web conn has a send queue to which it gets pushed. - A message gets pulled from the queue, and before it gets written to the wire, it is added to the dead queue. Disconnect: - Hub Unregister gets called, where the connection is just marked as inactive. And new messages keep getting pushed to the send queue. If it gets full, the channel is closed and the conn gets removed from conn index. Reconnect: - We query the hub for the connection ID, and get back the queues. - We construct a WebConn reusing the old queues, or a fresh one depending on whether the connection ID was found or not. - Now there is a tricky bit here which needs to be carefully processed. On register, we would always send the hello message in the send queue. But we cannot do that now because the send queue might already have messages. Therefore, we don't send the hello message from web hub, if we reuse a connection. Instead, we move that logic to the web conn write pump. We check if the sequence number is in dead queue, and if it is, then we drain the dead queue, and start consuming from the active queue. No hello message is sent here. But if the message does not exist in the dead queue, and the sequence number is actually something that should have existed, then we set a new connction id and clear the dead queue, and send a hello message. The client, on receiving a new connection id will automatically set its sequence number to 0, and make the sync API calls to manage any lost data. https://mattermost.atlassian.net/browse/MM-32590 ```release-note NONE ``` * gofmt * Add EnableReliableWebSockets to the client config * Refactoring isInDeadQueue * Passing index to drainDeadQueue * refactoring webconn * fix pointer * review comments * simplify hasMsgLoss * safety comment * fix test * Trigger CI * Trigger CI Co-authored-by: Devin Binnie <devin.binnie@mattermost.com> Co-authored-by: Mattermod <mattermod@users.noreply.github.com>
2021-04-26 10:21:25 -04:00
func TestHubConnIndexInactive(t *testing.T) {
mainHelper.Parallel(t)
MM-46604: Fix racy access to session props (#20996) The core mistake was that the webconn doesn't really go out of scope once the connection disconnects. It is kept in the webhub connIndex to be reconnected if the user connects again. This was the new behavior as part of reliable websockets. Therefore, it was a mistake to return the session to the pool once the connection drops. Because the connection would still recieve events from the web_hub. And once you release the session, another login might acquire the session and set some props, while the web_hub might still try to send events to it, which will cause a read of the map prop. The following test case illustrates such a race. It is very hard to trigger it organically, hence I artificially wrote the code. The right fix is to release the session only when the connection is stale and gets deleted from the conn index. The PR has been load tested in `-race` mode just for extra sanity check. ```go func TestHubSessionRace(t *testing.T) { th := Setup(t).InitBasic() defer th.TearDown() s := httptest.NewServer(dummyWebsocketHandler(t)) defer s.Close() th.Server.HubStart() wc1 := registerDummyWebConn(t, th.App, s.Listener.Addr(), th.BasicUser.Id) defer wc1.Close() var wg sync.WaitGroup wg.Add(2) go func() { defer wg.Done() token := wc1.GetSessionToken() // Return to pool after *WebConn.Pump finishes wc1.App.Srv().userService.ReturnSessionToPool(wc1.GetSession()) // A new HTTP requests acquires a session which gets it from the pool sess, _ := wc1.App.GetSession(token) // Login happens which sets some session properties sess.AddProp(model.SessionPropPlatform, "chrome") }() go func() { defer wg.Done() // Called from *WebConn.shouldSendEvent t.Log("session: ", wc1.GetSession().Props[model.SessionPropIsGuest] == "true") }() wg.Wait() } ``` https://mattermost.atlassian.net/browse/MM-46604 ```release-note NONE ```
2022-09-14 04:57:24 -04:00
th := Setup(t)
connIndex := newHubConnectionIndex(2*time.Second, th.Service.Store, th.Service.logger, false)
MM-32950: Reliable WebSockets: Basic single server (#17406) * MM-32950: Reliable WebSockets: Basic single server This PR adds reliable websocket support for a single server. Below is a brief overview of the three states of a connection: Normal: - All messages are routed via web hub. - Each web conn has a send queue to which it gets pushed. - A message gets pulled from the queue, and before it gets written to the wire, it is added to the dead queue. Disconnect: - Hub Unregister gets called, where the connection is just marked as inactive. And new messages keep getting pushed to the send queue. If it gets full, the channel is closed and the conn gets removed from conn index. Reconnect: - We query the hub for the connection ID, and get back the queues. - We construct a WebConn reusing the old queues, or a fresh one depending on whether the connection ID was found or not. - Now there is a tricky bit here which needs to be carefully processed. On register, we would always send the hello message in the send queue. But we cannot do that now because the send queue might already have messages. Therefore, we don't send the hello message from web hub, if we reuse a connection. Instead, we move that logic to the web conn write pump. We check if the sequence number is in dead queue, and if it is, then we drain the dead queue, and start consuming from the active queue. No hello message is sent here. But if the message does not exist in the dead queue, and the sequence number is actually something that should have existed, then we set a new connction id and clear the dead queue, and send a hello message. The client, on receiving a new connection id will automatically set its sequence number to 0, and make the sync API calls to manage any lost data. https://mattermost.atlassian.net/browse/MM-32590 ```release-note NONE ``` * gofmt * Add EnableReliableWebSockets to the client config * Refactoring isInDeadQueue * Passing index to drainDeadQueue * refactoring webconn * fix pointer * review comments * simplify hasMsgLoss * safety comment * fix test * Trigger CI * Trigger CI Co-authored-by: Devin Binnie <devin.binnie@mattermost.com> Co-authored-by: Mattermod <mattermod@users.noreply.github.com>
2021-04-26 10:21:25 -04:00
// User1
wc1 := &WebConn{
Platform: th.Service,
UserId: model.NewId(),
MM-32950: Reliable WebSockets: Basic single server (#17406) * MM-32950: Reliable WebSockets: Basic single server This PR adds reliable websocket support for a single server. Below is a brief overview of the three states of a connection: Normal: - All messages are routed via web hub. - Each web conn has a send queue to which it gets pushed. - A message gets pulled from the queue, and before it gets written to the wire, it is added to the dead queue. Disconnect: - Hub Unregister gets called, where the connection is just marked as inactive. And new messages keep getting pushed to the send queue. If it gets full, the channel is closed and the conn gets removed from conn index. Reconnect: - We query the hub for the connection ID, and get back the queues. - We construct a WebConn reusing the old queues, or a fresh one depending on whether the connection ID was found or not. - Now there is a tricky bit here which needs to be carefully processed. On register, we would always send the hello message in the send queue. But we cannot do that now because the send queue might already have messages. Therefore, we don't send the hello message from web hub, if we reuse a connection. Instead, we move that logic to the web conn write pump. We check if the sequence number is in dead queue, and if it is, then we drain the dead queue, and start consuming from the active queue. No hello message is sent here. But if the message does not exist in the dead queue, and the sequence number is actually something that should have existed, then we set a new connction id and clear the dead queue, and send a hello message. The client, on receiving a new connection id will automatically set its sequence number to 0, and make the sync API calls to manage any lost data. https://mattermost.atlassian.net/browse/MM-32590 ```release-note NONE ``` * gofmt * Add EnableReliableWebSockets to the client config * Refactoring isInDeadQueue * Passing index to drainDeadQueue * refactoring webconn * fix pointer * review comments * simplify hasMsgLoss * safety comment * fix test * Trigger CI * Trigger CI Co-authored-by: Devin Binnie <devin.binnie@mattermost.com> Co-authored-by: Mattermod <mattermod@users.noreply.github.com>
2021-04-26 10:21:25 -04:00
}
wc1.Active.Store(true)
MM-32950: Reliable WebSockets: Basic single server (#17406) * MM-32950: Reliable WebSockets: Basic single server This PR adds reliable websocket support for a single server. Below is a brief overview of the three states of a connection: Normal: - All messages are routed via web hub. - Each web conn has a send queue to which it gets pushed. - A message gets pulled from the queue, and before it gets written to the wire, it is added to the dead queue. Disconnect: - Hub Unregister gets called, where the connection is just marked as inactive. And new messages keep getting pushed to the send queue. If it gets full, the channel is closed and the conn gets removed from conn index. Reconnect: - We query the hub for the connection ID, and get back the queues. - We construct a WebConn reusing the old queues, or a fresh one depending on whether the connection ID was found or not. - Now there is a tricky bit here which needs to be carefully processed. On register, we would always send the hello message in the send queue. But we cannot do that now because the send queue might already have messages. Therefore, we don't send the hello message from web hub, if we reuse a connection. Instead, we move that logic to the web conn write pump. We check if the sequence number is in dead queue, and if it is, then we drain the dead queue, and start consuming from the active queue. No hello message is sent here. But if the message does not exist in the dead queue, and the sequence number is actually something that should have existed, then we set a new connction id and clear the dead queue, and send a hello message. The client, on receiving a new connection id will automatically set its sequence number to 0, and make the sync API calls to manage any lost data. https://mattermost.atlassian.net/browse/MM-32590 ```release-note NONE ``` * gofmt * Add EnableReliableWebSockets to the client config * Refactoring isInDeadQueue * Passing index to drainDeadQueue * refactoring webconn * fix pointer * review comments * simplify hasMsgLoss * safety comment * fix test * Trigger CI * Trigger CI Co-authored-by: Devin Binnie <devin.binnie@mattermost.com> Co-authored-by: Mattermod <mattermod@users.noreply.github.com>
2021-04-26 10:21:25 -04:00
wc1.SetConnectionID("conn1")
MM-46604: Fix racy access to session props (#20996) The core mistake was that the webconn doesn't really go out of scope once the connection disconnects. It is kept in the webhub connIndex to be reconnected if the user connects again. This was the new behavior as part of reliable websockets. Therefore, it was a mistake to return the session to the pool once the connection drops. Because the connection would still recieve events from the web_hub. And once you release the session, another login might acquire the session and set some props, while the web_hub might still try to send events to it, which will cause a read of the map prop. The following test case illustrates such a race. It is very hard to trigger it organically, hence I artificially wrote the code. The right fix is to release the session only when the connection is stale and gets deleted from the conn index. The PR has been load tested in `-race` mode just for extra sanity check. ```go func TestHubSessionRace(t *testing.T) { th := Setup(t).InitBasic() defer th.TearDown() s := httptest.NewServer(dummyWebsocketHandler(t)) defer s.Close() th.Server.HubStart() wc1 := registerDummyWebConn(t, th.App, s.Listener.Addr(), th.BasicUser.Id) defer wc1.Close() var wg sync.WaitGroup wg.Add(2) go func() { defer wg.Done() token := wc1.GetSessionToken() // Return to pool after *WebConn.Pump finishes wc1.App.Srv().userService.ReturnSessionToPool(wc1.GetSession()) // A new HTTP requests acquires a session which gets it from the pool sess, _ := wc1.App.GetSession(token) // Login happens which sets some session properties sess.AddProp(model.SessionPropPlatform, "chrome") }() go func() { defer wg.Done() // Called from *WebConn.shouldSendEvent t.Log("session: ", wc1.GetSession().Props[model.SessionPropIsGuest] == "true") }() wg.Wait() } ``` https://mattermost.atlassian.net/browse/MM-46604 ```release-note NONE ```
2022-09-14 04:57:24 -04:00
wc1.SetSession(&model.Session{})
MM-32950: Reliable WebSockets: Basic single server (#17406) * MM-32950: Reliable WebSockets: Basic single server This PR adds reliable websocket support for a single server. Below is a brief overview of the three states of a connection: Normal: - All messages are routed via web hub. - Each web conn has a send queue to which it gets pushed. - A message gets pulled from the queue, and before it gets written to the wire, it is added to the dead queue. Disconnect: - Hub Unregister gets called, where the connection is just marked as inactive. And new messages keep getting pushed to the send queue. If it gets full, the channel is closed and the conn gets removed from conn index. Reconnect: - We query the hub for the connection ID, and get back the queues. - We construct a WebConn reusing the old queues, or a fresh one depending on whether the connection ID was found or not. - Now there is a tricky bit here which needs to be carefully processed. On register, we would always send the hello message in the send queue. But we cannot do that now because the send queue might already have messages. Therefore, we don't send the hello message from web hub, if we reuse a connection. Instead, we move that logic to the web conn write pump. We check if the sequence number is in dead queue, and if it is, then we drain the dead queue, and start consuming from the active queue. No hello message is sent here. But if the message does not exist in the dead queue, and the sequence number is actually something that should have existed, then we set a new connction id and clear the dead queue, and send a hello message. The client, on receiving a new connection id will automatically set its sequence number to 0, and make the sync API calls to manage any lost data. https://mattermost.atlassian.net/browse/MM-32590 ```release-note NONE ``` * gofmt * Add EnableReliableWebSockets to the client config * Refactoring isInDeadQueue * Passing index to drainDeadQueue * refactoring webconn * fix pointer * review comments * simplify hasMsgLoss * safety comment * fix test * Trigger CI * Trigger CI Co-authored-by: Devin Binnie <devin.binnie@mattermost.com> Co-authored-by: Mattermod <mattermod@users.noreply.github.com>
2021-04-26 10:21:25 -04:00
// User2
wc2 := &WebConn{
Platform: th.Service,
UserId: model.NewId(),
MM-32950: Reliable WebSockets: Basic single server (#17406) * MM-32950: Reliable WebSockets: Basic single server This PR adds reliable websocket support for a single server. Below is a brief overview of the three states of a connection: Normal: - All messages are routed via web hub. - Each web conn has a send queue to which it gets pushed. - A message gets pulled from the queue, and before it gets written to the wire, it is added to the dead queue. Disconnect: - Hub Unregister gets called, where the connection is just marked as inactive. And new messages keep getting pushed to the send queue. If it gets full, the channel is closed and the conn gets removed from conn index. Reconnect: - We query the hub for the connection ID, and get back the queues. - We construct a WebConn reusing the old queues, or a fresh one depending on whether the connection ID was found or not. - Now there is a tricky bit here which needs to be carefully processed. On register, we would always send the hello message in the send queue. But we cannot do that now because the send queue might already have messages. Therefore, we don't send the hello message from web hub, if we reuse a connection. Instead, we move that logic to the web conn write pump. We check if the sequence number is in dead queue, and if it is, then we drain the dead queue, and start consuming from the active queue. No hello message is sent here. But if the message does not exist in the dead queue, and the sequence number is actually something that should have existed, then we set a new connction id and clear the dead queue, and send a hello message. The client, on receiving a new connection id will automatically set its sequence number to 0, and make the sync API calls to manage any lost data. https://mattermost.atlassian.net/browse/MM-32590 ```release-note NONE ``` * gofmt * Add EnableReliableWebSockets to the client config * Refactoring isInDeadQueue * Passing index to drainDeadQueue * refactoring webconn * fix pointer * review comments * simplify hasMsgLoss * safety comment * fix test * Trigger CI * Trigger CI Co-authored-by: Devin Binnie <devin.binnie@mattermost.com> Co-authored-by: Mattermod <mattermod@users.noreply.github.com>
2021-04-26 10:21:25 -04:00
}
wc2.Active.Store(true)
MM-32950: Reliable WebSockets: Basic single server (#17406) * MM-32950: Reliable WebSockets: Basic single server This PR adds reliable websocket support for a single server. Below is a brief overview of the three states of a connection: Normal: - All messages are routed via web hub. - Each web conn has a send queue to which it gets pushed. - A message gets pulled from the queue, and before it gets written to the wire, it is added to the dead queue. Disconnect: - Hub Unregister gets called, where the connection is just marked as inactive. And new messages keep getting pushed to the send queue. If it gets full, the channel is closed and the conn gets removed from conn index. Reconnect: - We query the hub for the connection ID, and get back the queues. - We construct a WebConn reusing the old queues, or a fresh one depending on whether the connection ID was found or not. - Now there is a tricky bit here which needs to be carefully processed. On register, we would always send the hello message in the send queue. But we cannot do that now because the send queue might already have messages. Therefore, we don't send the hello message from web hub, if we reuse a connection. Instead, we move that logic to the web conn write pump. We check if the sequence number is in dead queue, and if it is, then we drain the dead queue, and start consuming from the active queue. No hello message is sent here. But if the message does not exist in the dead queue, and the sequence number is actually something that should have existed, then we set a new connction id and clear the dead queue, and send a hello message. The client, on receiving a new connection id will automatically set its sequence number to 0, and make the sync API calls to manage any lost data. https://mattermost.atlassian.net/browse/MM-32590 ```release-note NONE ``` * gofmt * Add EnableReliableWebSockets to the client config * Refactoring isInDeadQueue * Passing index to drainDeadQueue * refactoring webconn * fix pointer * review comments * simplify hasMsgLoss * safety comment * fix test * Trigger CI * Trigger CI Co-authored-by: Devin Binnie <devin.binnie@mattermost.com> Co-authored-by: Mattermod <mattermod@users.noreply.github.com>
2021-04-26 10:21:25 -04:00
wc2.SetConnectionID("conn2")
MM-46604: Fix racy access to session props (#20996) The core mistake was that the webconn doesn't really go out of scope once the connection disconnects. It is kept in the webhub connIndex to be reconnected if the user connects again. This was the new behavior as part of reliable websockets. Therefore, it was a mistake to return the session to the pool once the connection drops. Because the connection would still recieve events from the web_hub. And once you release the session, another login might acquire the session and set some props, while the web_hub might still try to send events to it, which will cause a read of the map prop. The following test case illustrates such a race. It is very hard to trigger it organically, hence I artificially wrote the code. The right fix is to release the session only when the connection is stale and gets deleted from the conn index. The PR has been load tested in `-race` mode just for extra sanity check. ```go func TestHubSessionRace(t *testing.T) { th := Setup(t).InitBasic() defer th.TearDown() s := httptest.NewServer(dummyWebsocketHandler(t)) defer s.Close() th.Server.HubStart() wc1 := registerDummyWebConn(t, th.App, s.Listener.Addr(), th.BasicUser.Id) defer wc1.Close() var wg sync.WaitGroup wg.Add(2) go func() { defer wg.Done() token := wc1.GetSessionToken() // Return to pool after *WebConn.Pump finishes wc1.App.Srv().userService.ReturnSessionToPool(wc1.GetSession()) // A new HTTP requests acquires a session which gets it from the pool sess, _ := wc1.App.GetSession(token) // Login happens which sets some session properties sess.AddProp(model.SessionPropPlatform, "chrome") }() go func() { defer wg.Done() // Called from *WebConn.shouldSendEvent t.Log("session: ", wc1.GetSession().Props[model.SessionPropIsGuest] == "true") }() wg.Wait() } ``` https://mattermost.atlassian.net/browse/MM-46604 ```release-note NONE ```
2022-09-14 04:57:24 -04:00
wc2.SetSession(&model.Session{})
MM-32950: Reliable WebSockets: Basic single server (#17406) * MM-32950: Reliable WebSockets: Basic single server This PR adds reliable websocket support for a single server. Below is a brief overview of the three states of a connection: Normal: - All messages are routed via web hub. - Each web conn has a send queue to which it gets pushed. - A message gets pulled from the queue, and before it gets written to the wire, it is added to the dead queue. Disconnect: - Hub Unregister gets called, where the connection is just marked as inactive. And new messages keep getting pushed to the send queue. If it gets full, the channel is closed and the conn gets removed from conn index. Reconnect: - We query the hub for the connection ID, and get back the queues. - We construct a WebConn reusing the old queues, or a fresh one depending on whether the connection ID was found or not. - Now there is a tricky bit here which needs to be carefully processed. On register, we would always send the hello message in the send queue. But we cannot do that now because the send queue might already have messages. Therefore, we don't send the hello message from web hub, if we reuse a connection. Instead, we move that logic to the web conn write pump. We check if the sequence number is in dead queue, and if it is, then we drain the dead queue, and start consuming from the active queue. No hello message is sent here. But if the message does not exist in the dead queue, and the sequence number is actually something that should have existed, then we set a new connction id and clear the dead queue, and send a hello message. The client, on receiving a new connection id will automatically set its sequence number to 0, and make the sync API calls to manage any lost data. https://mattermost.atlassian.net/browse/MM-32590 ```release-note NONE ``` * gofmt * Add EnableReliableWebSockets to the client config * Refactoring isInDeadQueue * Passing index to drainDeadQueue * refactoring webconn * fix pointer * review comments * simplify hasMsgLoss * safety comment * fix test * Trigger CI * Trigger CI Co-authored-by: Devin Binnie <devin.binnie@mattermost.com> Co-authored-by: Mattermod <mattermod@users.noreply.github.com>
2021-04-26 10:21:25 -04:00
wc3 := &WebConn{
Platform: th.Service,
UserId: wc2.UserId,
MM-32950: Reliable WebSockets: Basic single server (#17406) * MM-32950: Reliable WebSockets: Basic single server This PR adds reliable websocket support for a single server. Below is a brief overview of the three states of a connection: Normal: - All messages are routed via web hub. - Each web conn has a send queue to which it gets pushed. - A message gets pulled from the queue, and before it gets written to the wire, it is added to the dead queue. Disconnect: - Hub Unregister gets called, where the connection is just marked as inactive. And new messages keep getting pushed to the send queue. If it gets full, the channel is closed and the conn gets removed from conn index. Reconnect: - We query the hub for the connection ID, and get back the queues. - We construct a WebConn reusing the old queues, or a fresh one depending on whether the connection ID was found or not. - Now there is a tricky bit here which needs to be carefully processed. On register, we would always send the hello message in the send queue. But we cannot do that now because the send queue might already have messages. Therefore, we don't send the hello message from web hub, if we reuse a connection. Instead, we move that logic to the web conn write pump. We check if the sequence number is in dead queue, and if it is, then we drain the dead queue, and start consuming from the active queue. No hello message is sent here. But if the message does not exist in the dead queue, and the sequence number is actually something that should have existed, then we set a new connction id and clear the dead queue, and send a hello message. The client, on receiving a new connection id will automatically set its sequence number to 0, and make the sync API calls to manage any lost data. https://mattermost.atlassian.net/browse/MM-32590 ```release-note NONE ``` * gofmt * Add EnableReliableWebSockets to the client config * Refactoring isInDeadQueue * Passing index to drainDeadQueue * refactoring webconn * fix pointer * review comments * simplify hasMsgLoss * safety comment * fix test * Trigger CI * Trigger CI Co-authored-by: Devin Binnie <devin.binnie@mattermost.com> Co-authored-by: Mattermod <mattermod@users.noreply.github.com>
2021-04-26 10:21:25 -04:00
}
wc3.Active.Store(false)
MM-32950: Reliable WebSockets: Basic single server (#17406) * MM-32950: Reliable WebSockets: Basic single server This PR adds reliable websocket support for a single server. Below is a brief overview of the three states of a connection: Normal: - All messages are routed via web hub. - Each web conn has a send queue to which it gets pushed. - A message gets pulled from the queue, and before it gets written to the wire, it is added to the dead queue. Disconnect: - Hub Unregister gets called, where the connection is just marked as inactive. And new messages keep getting pushed to the send queue. If it gets full, the channel is closed and the conn gets removed from conn index. Reconnect: - We query the hub for the connection ID, and get back the queues. - We construct a WebConn reusing the old queues, or a fresh one depending on whether the connection ID was found or not. - Now there is a tricky bit here which needs to be carefully processed. On register, we would always send the hello message in the send queue. But we cannot do that now because the send queue might already have messages. Therefore, we don't send the hello message from web hub, if we reuse a connection. Instead, we move that logic to the web conn write pump. We check if the sequence number is in dead queue, and if it is, then we drain the dead queue, and start consuming from the active queue. No hello message is sent here. But if the message does not exist in the dead queue, and the sequence number is actually something that should have existed, then we set a new connction id and clear the dead queue, and send a hello message. The client, on receiving a new connection id will automatically set its sequence number to 0, and make the sync API calls to manage any lost data. https://mattermost.atlassian.net/browse/MM-32590 ```release-note NONE ``` * gofmt * Add EnableReliableWebSockets to the client config * Refactoring isInDeadQueue * Passing index to drainDeadQueue * refactoring webconn * fix pointer * review comments * simplify hasMsgLoss * safety comment * fix test * Trigger CI * Trigger CI Co-authored-by: Devin Binnie <devin.binnie@mattermost.com> Co-authored-by: Mattermod <mattermod@users.noreply.github.com>
2021-04-26 10:21:25 -04:00
wc3.SetConnectionID("conn3")
MM-46604: Fix racy access to session props (#20996) The core mistake was that the webconn doesn't really go out of scope once the connection disconnects. It is kept in the webhub connIndex to be reconnected if the user connects again. This was the new behavior as part of reliable websockets. Therefore, it was a mistake to return the session to the pool once the connection drops. Because the connection would still recieve events from the web_hub. And once you release the session, another login might acquire the session and set some props, while the web_hub might still try to send events to it, which will cause a read of the map prop. The following test case illustrates such a race. It is very hard to trigger it organically, hence I artificially wrote the code. The right fix is to release the session only when the connection is stale and gets deleted from the conn index. The PR has been load tested in `-race` mode just for extra sanity check. ```go func TestHubSessionRace(t *testing.T) { th := Setup(t).InitBasic() defer th.TearDown() s := httptest.NewServer(dummyWebsocketHandler(t)) defer s.Close() th.Server.HubStart() wc1 := registerDummyWebConn(t, th.App, s.Listener.Addr(), th.BasicUser.Id) defer wc1.Close() var wg sync.WaitGroup wg.Add(2) go func() { defer wg.Done() token := wc1.GetSessionToken() // Return to pool after *WebConn.Pump finishes wc1.App.Srv().userService.ReturnSessionToPool(wc1.GetSession()) // A new HTTP requests acquires a session which gets it from the pool sess, _ := wc1.App.GetSession(token) // Login happens which sets some session properties sess.AddProp(model.SessionPropPlatform, "chrome") }() go func() { defer wg.Done() // Called from *WebConn.shouldSendEvent t.Log("session: ", wc1.GetSession().Props[model.SessionPropIsGuest] == "true") }() wg.Wait() } ``` https://mattermost.atlassian.net/browse/MM-46604 ```release-note NONE ```
2022-09-14 04:57:24 -04:00
wc3.SetSession(&model.Session{})
MM-32950: Reliable WebSockets: Basic single server (#17406) * MM-32950: Reliable WebSockets: Basic single server This PR adds reliable websocket support for a single server. Below is a brief overview of the three states of a connection: Normal: - All messages are routed via web hub. - Each web conn has a send queue to which it gets pushed. - A message gets pulled from the queue, and before it gets written to the wire, it is added to the dead queue. Disconnect: - Hub Unregister gets called, where the connection is just marked as inactive. And new messages keep getting pushed to the send queue. If it gets full, the channel is closed and the conn gets removed from conn index. Reconnect: - We query the hub for the connection ID, and get back the queues. - We construct a WebConn reusing the old queues, or a fresh one depending on whether the connection ID was found or not. - Now there is a tricky bit here which needs to be carefully processed. On register, we would always send the hello message in the send queue. But we cannot do that now because the send queue might already have messages. Therefore, we don't send the hello message from web hub, if we reuse a connection. Instead, we move that logic to the web conn write pump. We check if the sequence number is in dead queue, and if it is, then we drain the dead queue, and start consuming from the active queue. No hello message is sent here. But if the message does not exist in the dead queue, and the sequence number is actually something that should have existed, then we set a new connction id and clear the dead queue, and send a hello message. The client, on receiving a new connection id will automatically set its sequence number to 0, and make the sync API calls to manage any lost data. https://mattermost.atlassian.net/browse/MM-32590 ```release-note NONE ``` * gofmt * Add EnableReliableWebSockets to the client config * Refactoring isInDeadQueue * Passing index to drainDeadQueue * refactoring webconn * fix pointer * review comments * simplify hasMsgLoss * safety comment * fix test * Trigger CI * Trigger CI Co-authored-by: Devin Binnie <devin.binnie@mattermost.com> Co-authored-by: Mattermod <mattermod@users.noreply.github.com>
2021-04-26 10:21:25 -04:00
err := connIndex.Add(wc1)
require.NoError(t, err)
err = connIndex.Add(wc2)
require.NoError(t, err)
err = connIndex.Add(wc3)
require.NoError(t, err)
MM-32950: Reliable WebSockets: Basic single server (#17406) * MM-32950: Reliable WebSockets: Basic single server This PR adds reliable websocket support for a single server. Below is a brief overview of the three states of a connection: Normal: - All messages are routed via web hub. - Each web conn has a send queue to which it gets pushed. - A message gets pulled from the queue, and before it gets written to the wire, it is added to the dead queue. Disconnect: - Hub Unregister gets called, where the connection is just marked as inactive. And new messages keep getting pushed to the send queue. If it gets full, the channel is closed and the conn gets removed from conn index. Reconnect: - We query the hub for the connection ID, and get back the queues. - We construct a WebConn reusing the old queues, or a fresh one depending on whether the connection ID was found or not. - Now there is a tricky bit here which needs to be carefully processed. On register, we would always send the hello message in the send queue. But we cannot do that now because the send queue might already have messages. Therefore, we don't send the hello message from web hub, if we reuse a connection. Instead, we move that logic to the web conn write pump. We check if the sequence number is in dead queue, and if it is, then we drain the dead queue, and start consuming from the active queue. No hello message is sent here. But if the message does not exist in the dead queue, and the sequence number is actually something that should have existed, then we set a new connction id and clear the dead queue, and send a hello message. The client, on receiving a new connection id will automatically set its sequence number to 0, and make the sync API calls to manage any lost data. https://mattermost.atlassian.net/browse/MM-32590 ```release-note NONE ``` * gofmt * Add EnableReliableWebSockets to the client config * Refactoring isInDeadQueue * Passing index to drainDeadQueue * refactoring webconn * fix pointer * review comments * simplify hasMsgLoss * safety comment * fix test * Trigger CI * Trigger CI Co-authored-by: Devin Binnie <devin.binnie@mattermost.com> Co-authored-by: Mattermod <mattermod@users.noreply.github.com>
2021-04-26 10:21:25 -04:00
assert.Nil(t, connIndex.RemoveInactiveByConnectionID(wc2.UserId, "conn2"))
assert.Equal(t, connIndex.ForUserActiveCount(wc2.UserId), 1)
MM-32950: Reliable WebSockets: Basic single server (#17406) * MM-32950: Reliable WebSockets: Basic single server This PR adds reliable websocket support for a single server. Below is a brief overview of the three states of a connection: Normal: - All messages are routed via web hub. - Each web conn has a send queue to which it gets pushed. - A message gets pulled from the queue, and before it gets written to the wire, it is added to the dead queue. Disconnect: - Hub Unregister gets called, where the connection is just marked as inactive. And new messages keep getting pushed to the send queue. If it gets full, the channel is closed and the conn gets removed from conn index. Reconnect: - We query the hub for the connection ID, and get back the queues. - We construct a WebConn reusing the old queues, or a fresh one depending on whether the connection ID was found or not. - Now there is a tricky bit here which needs to be carefully processed. On register, we would always send the hello message in the send queue. But we cannot do that now because the send queue might already have messages. Therefore, we don't send the hello message from web hub, if we reuse a connection. Instead, we move that logic to the web conn write pump. We check if the sequence number is in dead queue, and if it is, then we drain the dead queue, and start consuming from the active queue. No hello message is sent here. But if the message does not exist in the dead queue, and the sequence number is actually something that should have existed, then we set a new connction id and clear the dead queue, and send a hello message. The client, on receiving a new connection id will automatically set its sequence number to 0, and make the sync API calls to manage any lost data. https://mattermost.atlassian.net/browse/MM-32590 ```release-note NONE ``` * gofmt * Add EnableReliableWebSockets to the client config * Refactoring isInDeadQueue * Passing index to drainDeadQueue * refactoring webconn * fix pointer * review comments * simplify hasMsgLoss * safety comment * fix test * Trigger CI * Trigger CI Co-authored-by: Devin Binnie <devin.binnie@mattermost.com> Co-authored-by: Mattermod <mattermod@users.noreply.github.com>
2021-04-26 10:21:25 -04:00
assert.NotNil(t, connIndex.RemoveInactiveByConnectionID(wc2.UserId, "conn3"))
assert.Equal(t, connIndex.ForUserActiveCount(wc2.UserId), 1)
MM-32950: Reliable WebSockets: Basic single server (#17406) * MM-32950: Reliable WebSockets: Basic single server This PR adds reliable websocket support for a single server. Below is a brief overview of the three states of a connection: Normal: - All messages are routed via web hub. - Each web conn has a send queue to which it gets pushed. - A message gets pulled from the queue, and before it gets written to the wire, it is added to the dead queue. Disconnect: - Hub Unregister gets called, where the connection is just marked as inactive. And new messages keep getting pushed to the send queue. If it gets full, the channel is closed and the conn gets removed from conn index. Reconnect: - We query the hub for the connection ID, and get back the queues. - We construct a WebConn reusing the old queues, or a fresh one depending on whether the connection ID was found or not. - Now there is a tricky bit here which needs to be carefully processed. On register, we would always send the hello message in the send queue. But we cannot do that now because the send queue might already have messages. Therefore, we don't send the hello message from web hub, if we reuse a connection. Instead, we move that logic to the web conn write pump. We check if the sequence number is in dead queue, and if it is, then we drain the dead queue, and start consuming from the active queue. No hello message is sent here. But if the message does not exist in the dead queue, and the sequence number is actually something that should have existed, then we set a new connction id and clear the dead queue, and send a hello message. The client, on receiving a new connection id will automatically set its sequence number to 0, and make the sync API calls to manage any lost data. https://mattermost.atlassian.net/browse/MM-32590 ```release-note NONE ``` * gofmt * Add EnableReliableWebSockets to the client config * Refactoring isInDeadQueue * Passing index to drainDeadQueue * refactoring webconn * fix pointer * review comments * simplify hasMsgLoss * safety comment * fix test * Trigger CI * Trigger CI Co-authored-by: Devin Binnie <devin.binnie@mattermost.com> Co-authored-by: Mattermod <mattermod@users.noreply.github.com>
2021-04-26 10:21:25 -04:00
assert.Nil(t, connIndex.RemoveInactiveByConnectionID(wc1.UserId, "conn3"))
assert.False(t, connIndex.Has(wc3))
MM-63130: Move to webHub iteration to be alloc-free (#30792) We switch to using iterators introduced in Go 1.23 to make iteration alloc-free and fast. And since element removal is allowed while iterating a map, this also means we don't need to even copy the slice any more. While here, we also address the comment https://github.com/mattermost/mattermost/pull/30178#discussion_r1954862151. I have simply gone back to using []string as the map entry rather than a type alias or a redirection with a struct. https://mattermost.atlassian.net/browse/MM-63130 ```release-note NONE ``` * Changed back nil to len ```release-note NONE ``` * fixing unused assignment ```release-note NONE ``` * add benchmark ``` goos: linux goarch: amd64 pkg: github.com/mattermost/mattermost/server/v8/channels/app/platform cpu: Intel(R) Core(TM) i5-8265U CPU @ 1.60GHz │ old.txt │ new.txt │ │ sec/op │ sec/op vs base │ HubConnIndexIterator/2_users-8 93.53n ± 1% 38.09n ± 1% -59.27% (p=0.000 n=10) HubConnIndexIterator/3_users-8 106.30n ± 0% 38.41n ± 1% -63.86% (p=0.000 n=10) HubConnIndexIterator/4_users-8 111.30n ± 1% 38.66n ± 1% -65.27% (p=0.000 n=10) geomean 103.4n 38.39n -62.89% │ old.txt │ new.txt │ │ B/op │ B/op vs base │ HubConnIndexIterator/2_users-8 16.00 ± 0% 24.00 ± 0% +50.00% (p=0.000 n=10) HubConnIndexIterator/3_users-8 24.00 ± 0% 24.00 ± 0% ~ (p=1.000 n=10) ¹ HubConnIndexIterator/4_users-8 32.00 ± 0% 24.00 ± 0% -25.00% (p=0.000 n=10) geomean 23.08 24.00 +4.00% ¹ all samples are equal │ old.txt │ new.txt │ │ allocs/op │ allocs/op vs base │ HubConnIndexIterator/2_users-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ HubConnIndexIterator/3_users-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ HubConnIndexIterator/4_users-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ geomean 1.000 1.000 +0.00% ¹ all samples are equal ``` ```release-note NONE ``` * ForChannel test as well ```release-note NONE ``` * review comments ```release-note NONE ``` * fix lint errors ```release-note NONE ``` --------- Co-authored-by: Mattermost Build <build@mattermost.com>
2025-05-11 02:30:12 -04:00
assert.Len(t, slices.Collect(connIndex.ForUser(wc2.UserId)), 1)
MM-32950: Reliable WebSockets: Basic single server (#17406) * MM-32950: Reliable WebSockets: Basic single server This PR adds reliable websocket support for a single server. Below is a brief overview of the three states of a connection: Normal: - All messages are routed via web hub. - Each web conn has a send queue to which it gets pushed. - A message gets pulled from the queue, and before it gets written to the wire, it is added to the dead queue. Disconnect: - Hub Unregister gets called, where the connection is just marked as inactive. And new messages keep getting pushed to the send queue. If it gets full, the channel is closed and the conn gets removed from conn index. Reconnect: - We query the hub for the connection ID, and get back the queues. - We construct a WebConn reusing the old queues, or a fresh one depending on whether the connection ID was found or not. - Now there is a tricky bit here which needs to be carefully processed. On register, we would always send the hello message in the send queue. But we cannot do that now because the send queue might already have messages. Therefore, we don't send the hello message from web hub, if we reuse a connection. Instead, we move that logic to the web conn write pump. We check if the sequence number is in dead queue, and if it is, then we drain the dead queue, and start consuming from the active queue. No hello message is sent here. But if the message does not exist in the dead queue, and the sequence number is actually something that should have existed, then we set a new connction id and clear the dead queue, and send a hello message. The client, on receiving a new connection id will automatically set its sequence number to 0, and make the sync API calls to manage any lost data. https://mattermost.atlassian.net/browse/MM-32590 ```release-note NONE ``` * gofmt * Add EnableReliableWebSockets to the client config * Refactoring isInDeadQueue * Passing index to drainDeadQueue * refactoring webconn * fix pointer * review comments * simplify hasMsgLoss * safety comment * fix test * Trigger CI * Trigger CI Co-authored-by: Devin Binnie <devin.binnie@mattermost.com> Co-authored-by: Mattermod <mattermod@users.noreply.github.com>
2021-04-26 10:21:25 -04:00
wc3.lastUserActivityAt = model.GetMillis()
err = connIndex.Add(wc3)
require.NoError(t, err)
MM-32950: Reliable WebSockets: Basic single server (#17406) * MM-32950: Reliable WebSockets: Basic single server This PR adds reliable websocket support for a single server. Below is a brief overview of the three states of a connection: Normal: - All messages are routed via web hub. - Each web conn has a send queue to which it gets pushed. - A message gets pulled from the queue, and before it gets written to the wire, it is added to the dead queue. Disconnect: - Hub Unregister gets called, where the connection is just marked as inactive. And new messages keep getting pushed to the send queue. If it gets full, the channel is closed and the conn gets removed from conn index. Reconnect: - We query the hub for the connection ID, and get back the queues. - We construct a WebConn reusing the old queues, or a fresh one depending on whether the connection ID was found or not. - Now there is a tricky bit here which needs to be carefully processed. On register, we would always send the hello message in the send queue. But we cannot do that now because the send queue might already have messages. Therefore, we don't send the hello message from web hub, if we reuse a connection. Instead, we move that logic to the web conn write pump. We check if the sequence number is in dead queue, and if it is, then we drain the dead queue, and start consuming from the active queue. No hello message is sent here. But if the message does not exist in the dead queue, and the sequence number is actually something that should have existed, then we set a new connction id and clear the dead queue, and send a hello message. The client, on receiving a new connection id will automatically set its sequence number to 0, and make the sync API calls to manage any lost data. https://mattermost.atlassian.net/browse/MM-32590 ```release-note NONE ``` * gofmt * Add EnableReliableWebSockets to the client config * Refactoring isInDeadQueue * Passing index to drainDeadQueue * refactoring webconn * fix pointer * review comments * simplify hasMsgLoss * safety comment * fix test * Trigger CI * Trigger CI Co-authored-by: Devin Binnie <devin.binnie@mattermost.com> Co-authored-by: Mattermod <mattermod@users.noreply.github.com>
2021-04-26 10:21:25 -04:00
connIndex.RemoveInactiveConnections()
assert.True(t, connIndex.Has(wc3))
MM-63130: Move to webHub iteration to be alloc-free (#30792) We switch to using iterators introduced in Go 1.23 to make iteration alloc-free and fast. And since element removal is allowed while iterating a map, this also means we don't need to even copy the slice any more. While here, we also address the comment https://github.com/mattermost/mattermost/pull/30178#discussion_r1954862151. I have simply gone back to using []string as the map entry rather than a type alias or a redirection with a struct. https://mattermost.atlassian.net/browse/MM-63130 ```release-note NONE ``` * Changed back nil to len ```release-note NONE ``` * fixing unused assignment ```release-note NONE ``` * add benchmark ``` goos: linux goarch: amd64 pkg: github.com/mattermost/mattermost/server/v8/channels/app/platform cpu: Intel(R) Core(TM) i5-8265U CPU @ 1.60GHz │ old.txt │ new.txt │ │ sec/op │ sec/op vs base │ HubConnIndexIterator/2_users-8 93.53n ± 1% 38.09n ± 1% -59.27% (p=0.000 n=10) HubConnIndexIterator/3_users-8 106.30n ± 0% 38.41n ± 1% -63.86% (p=0.000 n=10) HubConnIndexIterator/4_users-8 111.30n ± 1% 38.66n ± 1% -65.27% (p=0.000 n=10) geomean 103.4n 38.39n -62.89% │ old.txt │ new.txt │ │ B/op │ B/op vs base │ HubConnIndexIterator/2_users-8 16.00 ± 0% 24.00 ± 0% +50.00% (p=0.000 n=10) HubConnIndexIterator/3_users-8 24.00 ± 0% 24.00 ± 0% ~ (p=1.000 n=10) ¹ HubConnIndexIterator/4_users-8 32.00 ± 0% 24.00 ± 0% -25.00% (p=0.000 n=10) geomean 23.08 24.00 +4.00% ¹ all samples are equal │ old.txt │ new.txt │ │ allocs/op │ allocs/op vs base │ HubConnIndexIterator/2_users-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ HubConnIndexIterator/3_users-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ HubConnIndexIterator/4_users-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ geomean 1.000 1.000 +0.00% ¹ all samples are equal ``` ```release-note NONE ``` * ForChannel test as well ```release-note NONE ``` * review comments ```release-note NONE ``` * fix lint errors ```release-note NONE ``` --------- Co-authored-by: Mattermost Build <build@mattermost.com>
2025-05-11 02:30:12 -04:00
assert.Len(t, slices.Collect(connIndex.ForUser(wc2.UserId)), 2)
assert.Equal(t, connIndex.ForUserActiveCount(wc2.UserId), 1)
MM-32950: Reliable WebSockets: Basic single server (#17406) * MM-32950: Reliable WebSockets: Basic single server This PR adds reliable websocket support for a single server. Below is a brief overview of the three states of a connection: Normal: - All messages are routed via web hub. - Each web conn has a send queue to which it gets pushed. - A message gets pulled from the queue, and before it gets written to the wire, it is added to the dead queue. Disconnect: - Hub Unregister gets called, where the connection is just marked as inactive. And new messages keep getting pushed to the send queue. If it gets full, the channel is closed and the conn gets removed from conn index. Reconnect: - We query the hub for the connection ID, and get back the queues. - We construct a WebConn reusing the old queues, or a fresh one depending on whether the connection ID was found or not. - Now there is a tricky bit here which needs to be carefully processed. On register, we would always send the hello message in the send queue. But we cannot do that now because the send queue might already have messages. Therefore, we don't send the hello message from web hub, if we reuse a connection. Instead, we move that logic to the web conn write pump. We check if the sequence number is in dead queue, and if it is, then we drain the dead queue, and start consuming from the active queue. No hello message is sent here. But if the message does not exist in the dead queue, and the sequence number is actually something that should have existed, then we set a new connction id and clear the dead queue, and send a hello message. The client, on receiving a new connection id will automatically set its sequence number to 0, and make the sync API calls to manage any lost data. https://mattermost.atlassian.net/browse/MM-32590 ```release-note NONE ``` * gofmt * Add EnableReliableWebSockets to the client config * Refactoring isInDeadQueue * Passing index to drainDeadQueue * refactoring webconn * fix pointer * review comments * simplify hasMsgLoss * safety comment * fix test * Trigger CI * Trigger CI Co-authored-by: Devin Binnie <devin.binnie@mattermost.com> Co-authored-by: Mattermod <mattermod@users.noreply.github.com>
2021-04-26 10:21:25 -04:00
assert.Len(t, connIndex.All(), 3)
wc3.lastUserActivityAt = model.GetMillis() - (time.Minute).Milliseconds()
connIndex.RemoveInactiveConnections()
assert.False(t, connIndex.Has(wc3))
MM-63130: Move to webHub iteration to be alloc-free (#30792) We switch to using iterators introduced in Go 1.23 to make iteration alloc-free and fast. And since element removal is allowed while iterating a map, this also means we don't need to even copy the slice any more. While here, we also address the comment https://github.com/mattermost/mattermost/pull/30178#discussion_r1954862151. I have simply gone back to using []string as the map entry rather than a type alias or a redirection with a struct. https://mattermost.atlassian.net/browse/MM-63130 ```release-note NONE ``` * Changed back nil to len ```release-note NONE ``` * fixing unused assignment ```release-note NONE ``` * add benchmark ``` goos: linux goarch: amd64 pkg: github.com/mattermost/mattermost/server/v8/channels/app/platform cpu: Intel(R) Core(TM) i5-8265U CPU @ 1.60GHz │ old.txt │ new.txt │ │ sec/op │ sec/op vs base │ HubConnIndexIterator/2_users-8 93.53n ± 1% 38.09n ± 1% -59.27% (p=0.000 n=10) HubConnIndexIterator/3_users-8 106.30n ± 0% 38.41n ± 1% -63.86% (p=0.000 n=10) HubConnIndexIterator/4_users-8 111.30n ± 1% 38.66n ± 1% -65.27% (p=0.000 n=10) geomean 103.4n 38.39n -62.89% │ old.txt │ new.txt │ │ B/op │ B/op vs base │ HubConnIndexIterator/2_users-8 16.00 ± 0% 24.00 ± 0% +50.00% (p=0.000 n=10) HubConnIndexIterator/3_users-8 24.00 ± 0% 24.00 ± 0% ~ (p=1.000 n=10) ¹ HubConnIndexIterator/4_users-8 32.00 ± 0% 24.00 ± 0% -25.00% (p=0.000 n=10) geomean 23.08 24.00 +4.00% ¹ all samples are equal │ old.txt │ new.txt │ │ allocs/op │ allocs/op vs base │ HubConnIndexIterator/2_users-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ HubConnIndexIterator/3_users-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ HubConnIndexIterator/4_users-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ geomean 1.000 1.000 +0.00% ¹ all samples are equal ``` ```release-note NONE ``` * ForChannel test as well ```release-note NONE ``` * review comments ```release-note NONE ``` * fix lint errors ```release-note NONE ``` --------- Co-authored-by: Mattermost Build <build@mattermost.com>
2025-05-11 02:30:12 -04:00
assert.Len(t, slices.Collect(connIndex.ForUser(wc2.UserId)), 1)
assert.Equal(t, connIndex.ForUserActiveCount(wc2.UserId), 1)
MM-32950: Reliable WebSockets: Basic single server (#17406) * MM-32950: Reliable WebSockets: Basic single server This PR adds reliable websocket support for a single server. Below is a brief overview of the three states of a connection: Normal: - All messages are routed via web hub. - Each web conn has a send queue to which it gets pushed. - A message gets pulled from the queue, and before it gets written to the wire, it is added to the dead queue. Disconnect: - Hub Unregister gets called, where the connection is just marked as inactive. And new messages keep getting pushed to the send queue. If it gets full, the channel is closed and the conn gets removed from conn index. Reconnect: - We query the hub for the connection ID, and get back the queues. - We construct a WebConn reusing the old queues, or a fresh one depending on whether the connection ID was found or not. - Now there is a tricky bit here which needs to be carefully processed. On register, we would always send the hello message in the send queue. But we cannot do that now because the send queue might already have messages. Therefore, we don't send the hello message from web hub, if we reuse a connection. Instead, we move that logic to the web conn write pump. We check if the sequence number is in dead queue, and if it is, then we drain the dead queue, and start consuming from the active queue. No hello message is sent here. But if the message does not exist in the dead queue, and the sequence number is actually something that should have existed, then we set a new connction id and clear the dead queue, and send a hello message. The client, on receiving a new connection id will automatically set its sequence number to 0, and make the sync API calls to manage any lost data. https://mattermost.atlassian.net/browse/MM-32590 ```release-note NONE ``` * gofmt * Add EnableReliableWebSockets to the client config * Refactoring isInDeadQueue * Passing index to drainDeadQueue * refactoring webconn * fix pointer * review comments * simplify hasMsgLoss * safety comment * fix test * Trigger CI * Trigger CI Co-authored-by: Devin Binnie <devin.binnie@mattermost.com> Co-authored-by: Mattermod <mattermod@users.noreply.github.com>
2021-04-26 10:21:25 -04:00
assert.Len(t, connIndex.All(), 2)
}
func TestReliableWebSocketSend(t *testing.T) {
mainHelper.Parallel(t)
testCluster := &testlib.FakeClusterInterface{}
th := SetupWithCluster(t, testCluster)
ev := model.NewWebSocketEvent("test_unreliable_event", "", "", "", nil, "")
ev = ev.SetBroadcast(&model.WebsocketBroadcast{})
th.Service.Publish(ev)
ev2 := model.NewWebSocketEvent("test_reliable_event", "", "", "", nil, "")
ev2 = ev2.SetBroadcast(&model.WebsocketBroadcast{
ReliableClusterSend: true,
})
th.Service.Publish(ev2)
messages := testCluster.GetMessages()
evJSON, err := ev.ToJSON()
require.NoError(t, err)
ev2JSON, err := ev2.ToJSON()
require.NoError(t, err)
require.Contains(t, messages, &model.ClusterMessage{
Event: model.ClusterEventPublish,
Data: evJSON,
SendType: model.ClusterSendBestEffort,
})
require.Contains(t, messages, &model.ClusterMessage{
Event: model.ClusterEventPublish,
Data: ev2JSON,
SendType: model.ClusterSendReliable,
})
}
MM-24972: make GetHubForUserId lock-less and zero-alloc (#14945) * MM-24972: make GetHubForUserId lock-less and zero-alloc The hubs aren't meant to be modified after the server starts up. But the key problem was the SetHubs method which would zero out the underlying hubs slice. This wasn't ideally necessary because an hub would only be stopped once during server shutdown. However, this was required by a test which would shutdown the hub twice. And since the hub shutdown isn't idempotent, zeroing the slice would just skip over shutting down the hub again. To improve the overall situation, we apply several optimizations. - We use the new hash/maphash package which exposes Go runtime's internal hash algorithms to be used as a package. This is much faster than hash/fnv. - We move around the initialization of the hub to happen before the metrics server starts. This allows us to initialize the hub before any of the hub elements are being accessed. - To make the test run successfully, we do not call th.TearDown. This is fine for a test, because anyways the test process would eventually stop and relinquish the resources to the OS. This allows us to completely remove any mutexes and thereby we can remove all the methods and any edge-case checks related to index being out of bounds. As a result, the fast path becomes very straightforward and zero-alloc. name old time/op new time/op delta GetHubForUserId-8 116ns ± 1% 38ns ± 7% -67.22% (p=0.000 n=10+10) name old alloc/op new alloc/op delta GetHubForUserId-8 36.0B ± 0% 0.0B -100.00% (p=0.000 n=10+10) name old allocs/op new allocs/op delta GetHubForUserId-8 2.00 ± 0% 0.00 -100.00% (p=0.000 n=10+10) Manually tested with some load testing and running Hub tests in -race mode. * remove mutex * incorporate review comments
2020-07-07 01:53:45 -04:00
func TestHubIsRegistered(t *testing.T) {
mainHelper.Parallel(t)
th := Setup(t).InitBasic(t)
MM-24972: make GetHubForUserId lock-less and zero-alloc (#14945) * MM-24972: make GetHubForUserId lock-less and zero-alloc The hubs aren't meant to be modified after the server starts up. But the key problem was the SetHubs method which would zero out the underlying hubs slice. This wasn't ideally necessary because an hub would only be stopped once during server shutdown. However, this was required by a test which would shutdown the hub twice. And since the hub shutdown isn't idempotent, zeroing the slice would just skip over shutting down the hub again. To improve the overall situation, we apply several optimizations. - We use the new hash/maphash package which exposes Go runtime's internal hash algorithms to be used as a package. This is much faster than hash/fnv. - We move around the initialization of the hub to happen before the metrics server starts. This allows us to initialize the hub before any of the hub elements are being accessed. - To make the test run successfully, we do not call th.TearDown. This is fine for a test, because anyways the test process would eventually stop and relinquish the resources to the OS. This allows us to completely remove any mutexes and thereby we can remove all the methods and any edge-case checks related to index being out of bounds. As a result, the fast path becomes very straightforward and zero-alloc. name old time/op new time/op delta GetHubForUserId-8 116ns ± 1% 38ns ± 7% -67.22% (p=0.000 n=10+10) name old alloc/op new alloc/op delta GetHubForUserId-8 36.0B ± 0% 0.0B -100.00% (p=0.000 n=10+10) name old allocs/op new allocs/op delta GetHubForUserId-8 2.00 ± 0% 0.00 -100.00% (p=0.000 n=10+10) Manually tested with some load testing and running Hub tests in -race mode. * remove mutex * incorporate review comments
2020-07-07 01:53:45 -04:00
session, err := th.Service.CreateSession(th.Context, &model.Session{
UserId: th.BasicUser.Id,
})
require.NoError(t, err)
mockSuite := &platform_mocks.SuiteIFace{}
mockSuite.On("GetSession", session.Token).Return(session, nil)
mockSuite.On("MFARequired", mock.Anything).Return(nil)
th.Suite = mockSuite
MM-24972: make GetHubForUserId lock-less and zero-alloc (#14945) * MM-24972: make GetHubForUserId lock-less and zero-alloc The hubs aren't meant to be modified after the server starts up. But the key problem was the SetHubs method which would zero out the underlying hubs slice. This wasn't ideally necessary because an hub would only be stopped once during server shutdown. However, this was required by a test which would shutdown the hub twice. And since the hub shutdown isn't idempotent, zeroing the slice would just skip over shutting down the hub again. To improve the overall situation, we apply several optimizations. - We use the new hash/maphash package which exposes Go runtime's internal hash algorithms to be used as a package. This is much faster than hash/fnv. - We move around the initialization of the hub to happen before the metrics server starts. This allows us to initialize the hub before any of the hub elements are being accessed. - To make the test run successfully, we do not call th.TearDown. This is fine for a test, because anyways the test process would eventually stop and relinquish the resources to the OS. This allows us to completely remove any mutexes and thereby we can remove all the methods and any edge-case checks related to index being out of bounds. As a result, the fast path becomes very straightforward and zero-alloc. name old time/op new time/op delta GetHubForUserId-8 116ns ± 1% 38ns ± 7% -67.22% (p=0.000 n=10+10) name old alloc/op new alloc/op delta GetHubForUserId-8 36.0B ± 0% 0.0B -100.00% (p=0.000 n=10+10) name old allocs/op new allocs/op delta GetHubForUserId-8 2.00 ± 0% 0.00 -100.00% (p=0.000 n=10+10) Manually tested with some load testing and running Hub tests in -race mode. * remove mutex * incorporate review comments
2020-07-07 01:53:45 -04:00
s := httptest.NewServer(dummyWebsocketHandler(t))
defer s.Close()
wc1 := registerDummyWebConn(t, th, s.Listener.Addr(), session)
wc2 := registerDummyWebConn(t, th, s.Listener.Addr(), session)
wc3 := registerDummyWebConn(t, th, s.Listener.Addr(), session)
MM-24972: make GetHubForUserId lock-less and zero-alloc (#14945) * MM-24972: make GetHubForUserId lock-less and zero-alloc The hubs aren't meant to be modified after the server starts up. But the key problem was the SetHubs method which would zero out the underlying hubs slice. This wasn't ideally necessary because an hub would only be stopped once during server shutdown. However, this was required by a test which would shutdown the hub twice. And since the hub shutdown isn't idempotent, zeroing the slice would just skip over shutting down the hub again. To improve the overall situation, we apply several optimizations. - We use the new hash/maphash package which exposes Go runtime's internal hash algorithms to be used as a package. This is much faster than hash/fnv. - We move around the initialization of the hub to happen before the metrics server starts. This allows us to initialize the hub before any of the hub elements are being accessed. - To make the test run successfully, we do not call th.TearDown. This is fine for a test, because anyways the test process would eventually stop and relinquish the resources to the OS. This allows us to completely remove any mutexes and thereby we can remove all the methods and any edge-case checks related to index being out of bounds. As a result, the fast path becomes very straightforward and zero-alloc. name old time/op new time/op delta GetHubForUserId-8 116ns ± 1% 38ns ± 7% -67.22% (p=0.000 n=10+10) name old alloc/op new alloc/op delta GetHubForUserId-8 36.0B ± 0% 0.0B -100.00% (p=0.000 n=10+10) name old allocs/op new allocs/op delta GetHubForUserId-8 2.00 ± 0% 0.00 -100.00% (p=0.000 n=10+10) Manually tested with some load testing and running Hub tests in -race mode. * remove mutex * incorporate review comments
2020-07-07 01:53:45 -04:00
defer wc1.Close()
defer wc2.Close()
defer wc3.Close()
assert.True(t, th.Service.SessionIsRegistered(*wc1.session.Load()))
assert.True(t, th.Service.SessionIsRegistered(*wc2.session.Load()))
assert.True(t, th.Service.SessionIsRegistered(*wc3.session.Load()))
MM-24972: make GetHubForUserId lock-less and zero-alloc (#14945) * MM-24972: make GetHubForUserId lock-less and zero-alloc The hubs aren't meant to be modified after the server starts up. But the key problem was the SetHubs method which would zero out the underlying hubs slice. This wasn't ideally necessary because an hub would only be stopped once during server shutdown. However, this was required by a test which would shutdown the hub twice. And since the hub shutdown isn't idempotent, zeroing the slice would just skip over shutting down the hub again. To improve the overall situation, we apply several optimizations. - We use the new hash/maphash package which exposes Go runtime's internal hash algorithms to be used as a package. This is much faster than hash/fnv. - We move around the initialization of the hub to happen before the metrics server starts. This allows us to initialize the hub before any of the hub elements are being accessed. - To make the test run successfully, we do not call th.TearDown. This is fine for a test, because anyways the test process would eventually stop and relinquish the resources to the OS. This allows us to completely remove any mutexes and thereby we can remove all the methods and any edge-case checks related to index being out of bounds. As a result, the fast path becomes very straightforward and zero-alloc. name old time/op new time/op delta GetHubForUserId-8 116ns ± 1% 38ns ± 7% -67.22% (p=0.000 n=10+10) name old alloc/op new alloc/op delta GetHubForUserId-8 36.0B ± 0% 0.0B -100.00% (p=0.000 n=10+10) name old allocs/op new allocs/op delta GetHubForUserId-8 2.00 ± 0% 0.00 -100.00% (p=0.000 n=10+10) Manually tested with some load testing and running Hub tests in -race mode. * remove mutex * incorporate review comments
2020-07-07 01:53:45 -04:00
session4, err := th.Service.CreateSession(th.Context, &model.Session{
MM-24972: make GetHubForUserId lock-less and zero-alloc (#14945) * MM-24972: make GetHubForUserId lock-less and zero-alloc The hubs aren't meant to be modified after the server starts up. But the key problem was the SetHubs method which would zero out the underlying hubs slice. This wasn't ideally necessary because an hub would only be stopped once during server shutdown. However, this was required by a test which would shutdown the hub twice. And since the hub shutdown isn't idempotent, zeroing the slice would just skip over shutting down the hub again. To improve the overall situation, we apply several optimizations. - We use the new hash/maphash package which exposes Go runtime's internal hash algorithms to be used as a package. This is much faster than hash/fnv. - We move around the initialization of the hub to happen before the metrics server starts. This allows us to initialize the hub before any of the hub elements are being accessed. - To make the test run successfully, we do not call th.TearDown. This is fine for a test, because anyways the test process would eventually stop and relinquish the resources to the OS. This allows us to completely remove any mutexes and thereby we can remove all the methods and any edge-case checks related to index being out of bounds. As a result, the fast path becomes very straightforward and zero-alloc. name old time/op new time/op delta GetHubForUserId-8 116ns ± 1% 38ns ± 7% -67.22% (p=0.000 n=10+10) name old alloc/op new alloc/op delta GetHubForUserId-8 36.0B ± 0% 0.0B -100.00% (p=0.000 n=10+10) name old allocs/op new allocs/op delta GetHubForUserId-8 2.00 ± 0% 0.00 -100.00% (p=0.000 n=10+10) Manually tested with some load testing and running Hub tests in -race mode. * remove mutex * incorporate review comments
2020-07-07 01:53:45 -04:00
UserId: th.BasicUser2.Id,
})
require.NoError(t, err)
assert.False(t, th.Service.SessionIsRegistered(*session4))
MM-24972: make GetHubForUserId lock-less and zero-alloc (#14945) * MM-24972: make GetHubForUserId lock-less and zero-alloc The hubs aren't meant to be modified after the server starts up. But the key problem was the SetHubs method which would zero out the underlying hubs slice. This wasn't ideally necessary because an hub would only be stopped once during server shutdown. However, this was required by a test which would shutdown the hub twice. And since the hub shutdown isn't idempotent, zeroing the slice would just skip over shutting down the hub again. To improve the overall situation, we apply several optimizations. - We use the new hash/maphash package which exposes Go runtime's internal hash algorithms to be used as a package. This is much faster than hash/fnv. - We move around the initialization of the hub to happen before the metrics server starts. This allows us to initialize the hub before any of the hub elements are being accessed. - To make the test run successfully, we do not call th.TearDown. This is fine for a test, because anyways the test process would eventually stop and relinquish the resources to the OS. This allows us to completely remove any mutexes and thereby we can remove all the methods and any edge-case checks related to index being out of bounds. As a result, the fast path becomes very straightforward and zero-alloc. name old time/op new time/op delta GetHubForUserId-8 116ns ± 1% 38ns ± 7% -67.22% (p=0.000 n=10+10) name old alloc/op new alloc/op delta GetHubForUserId-8 36.0B ± 0% 0.0B -100.00% (p=0.000 n=10+10) name old allocs/op new allocs/op delta GetHubForUserId-8 2.00 ± 0% 0.00 -100.00% (p=0.000 n=10+10) Manually tested with some load testing and running Hub tests in -race mode. * remove mutex * incorporate review comments
2020-07-07 01:53:45 -04:00
}
func TestHubWebConnCount(t *testing.T) {
mainHelper.Parallel(t)
th := Setup(t).InitBasic(t)
session, err := th.Service.CreateSession(th.Context, &model.Session{
UserId: th.BasicUser.Id,
})
require.NoError(t, err)
mockSuite := &platform_mocks.SuiteIFace{}
mockSuite.On("GetSession", session.Token).Return(session, nil)
mockSuite.On("MFARequired", mock.Anything).Return(nil)
th.Suite = mockSuite
s := httptest.NewServer(dummyWebsocketHandler(t))
defer s.Close()
wc1 := registerDummyWebConn(t, th, s.Listener.Addr(), session)
wc2 := registerDummyWebConn(t, th, s.Listener.Addr(), session)
defer wc1.Close()
assert.Equal(t, 2, th.Service.WebConnCountForUser(th.BasicUser.Id))
wc2.Close()
assert.Equal(t, 1, th.Service.WebConnCountForUser(th.BasicUser.Id))
assert.Equal(t, 0, th.Service.WebConnCountForUser("none"))
}
MM-63130: Move to webHub iteration to be alloc-free (#30792) We switch to using iterators introduced in Go 1.23 to make iteration alloc-free and fast. And since element removal is allowed while iterating a map, this also means we don't need to even copy the slice any more. While here, we also address the comment https://github.com/mattermost/mattermost/pull/30178#discussion_r1954862151. I have simply gone back to using []string as the map entry rather than a type alias or a redirection with a struct. https://mattermost.atlassian.net/browse/MM-63130 ```release-note NONE ``` * Changed back nil to len ```release-note NONE ``` * fixing unused assignment ```release-note NONE ``` * add benchmark ``` goos: linux goarch: amd64 pkg: github.com/mattermost/mattermost/server/v8/channels/app/platform cpu: Intel(R) Core(TM) i5-8265U CPU @ 1.60GHz │ old.txt │ new.txt │ │ sec/op │ sec/op vs base │ HubConnIndexIterator/2_users-8 93.53n ± 1% 38.09n ± 1% -59.27% (p=0.000 n=10) HubConnIndexIterator/3_users-8 106.30n ± 0% 38.41n ± 1% -63.86% (p=0.000 n=10) HubConnIndexIterator/4_users-8 111.30n ± 1% 38.66n ± 1% -65.27% (p=0.000 n=10) geomean 103.4n 38.39n -62.89% │ old.txt │ new.txt │ │ B/op │ B/op vs base │ HubConnIndexIterator/2_users-8 16.00 ± 0% 24.00 ± 0% +50.00% (p=0.000 n=10) HubConnIndexIterator/3_users-8 24.00 ± 0% 24.00 ± 0% ~ (p=1.000 n=10) ¹ HubConnIndexIterator/4_users-8 32.00 ± 0% 24.00 ± 0% -25.00% (p=0.000 n=10) geomean 23.08 24.00 +4.00% ¹ all samples are equal │ old.txt │ new.txt │ │ allocs/op │ allocs/op vs base │ HubConnIndexIterator/2_users-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ HubConnIndexIterator/3_users-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ HubConnIndexIterator/4_users-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ geomean 1.000 1.000 +0.00% ¹ all samples are equal ``` ```release-note NONE ``` * ForChannel test as well ```release-note NONE ``` * review comments ```release-note NONE ``` * fix lint errors ```release-note NONE ``` --------- Co-authored-by: Mattermost Build <build@mattermost.com>
2025-05-11 02:30:12 -04:00
var globalIter iter.Seq[*WebConn]
func BenchmarkHubConnIndexIteratorForUser(b *testing.B) {
th := Setup(b)
connIndex := newHubConnectionIndex(2*time.Second, th.Service.Store, th.Service.logger, false)
// User1
wc1 := &WebConn{
Platform: th.Service,
UserId: model.NewId(),
}
wc1.Active.Store(true)
wc1.SetConnectionID("conn1")
wc1.SetSession(&model.Session{})
// User2
wc2 := &WebConn{
Platform: th.Service,
UserId: model.NewId(),
}
wc2.Active.Store(true)
wc2.SetConnectionID("conn2")
wc2.SetSession(&model.Session{})
wc3 := &WebConn{
Platform: th.Service,
UserId: wc2.UserId,
}
wc3.Active.Store(false)
wc3.SetConnectionID("conn3")
wc3.SetSession(&model.Session{})
require.NoError(b, connIndex.Add(wc1))
require.NoError(b, connIndex.Add(wc2))
require.NoError(b, connIndex.Add(wc3))
b.Run("2 users", func(b *testing.B) {
for b.Loop() {
MM-63130: Move to webHub iteration to be alloc-free (#30792) We switch to using iterators introduced in Go 1.23 to make iteration alloc-free and fast. And since element removal is allowed while iterating a map, this also means we don't need to even copy the slice any more. While here, we also address the comment https://github.com/mattermost/mattermost/pull/30178#discussion_r1954862151. I have simply gone back to using []string as the map entry rather than a type alias or a redirection with a struct. https://mattermost.atlassian.net/browse/MM-63130 ```release-note NONE ``` * Changed back nil to len ```release-note NONE ``` * fixing unused assignment ```release-note NONE ``` * add benchmark ``` goos: linux goarch: amd64 pkg: github.com/mattermost/mattermost/server/v8/channels/app/platform cpu: Intel(R) Core(TM) i5-8265U CPU @ 1.60GHz │ old.txt │ new.txt │ │ sec/op │ sec/op vs base │ HubConnIndexIterator/2_users-8 93.53n ± 1% 38.09n ± 1% -59.27% (p=0.000 n=10) HubConnIndexIterator/3_users-8 106.30n ± 0% 38.41n ± 1% -63.86% (p=0.000 n=10) HubConnIndexIterator/4_users-8 111.30n ± 1% 38.66n ± 1% -65.27% (p=0.000 n=10) geomean 103.4n 38.39n -62.89% │ old.txt │ new.txt │ │ B/op │ B/op vs base │ HubConnIndexIterator/2_users-8 16.00 ± 0% 24.00 ± 0% +50.00% (p=0.000 n=10) HubConnIndexIterator/3_users-8 24.00 ± 0% 24.00 ± 0% ~ (p=1.000 n=10) ¹ HubConnIndexIterator/4_users-8 32.00 ± 0% 24.00 ± 0% -25.00% (p=0.000 n=10) geomean 23.08 24.00 +4.00% ¹ all samples are equal │ old.txt │ new.txt │ │ allocs/op │ allocs/op vs base │ HubConnIndexIterator/2_users-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ HubConnIndexIterator/3_users-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ HubConnIndexIterator/4_users-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ geomean 1.000 1.000 +0.00% ¹ all samples are equal ``` ```release-note NONE ``` * ForChannel test as well ```release-note NONE ``` * review comments ```release-note NONE ``` * fix lint errors ```release-note NONE ``` --------- Co-authored-by: Mattermost Build <build@mattermost.com>
2025-05-11 02:30:12 -04:00
globalIter = connIndex.ForUser(wc2.UserId)
}
})
wc4 := &WebConn{
Platform: th.Service,
UserId: wc2.UserId,
}
wc4.Active.Store(false)
wc4.SetConnectionID("conn4")
wc4.SetSession(&model.Session{})
require.NoError(b, connIndex.Add(wc4))
b.Run("3 users", func(b *testing.B) {
for b.Loop() {
MM-63130: Move to webHub iteration to be alloc-free (#30792) We switch to using iterators introduced in Go 1.23 to make iteration alloc-free and fast. And since element removal is allowed while iterating a map, this also means we don't need to even copy the slice any more. While here, we also address the comment https://github.com/mattermost/mattermost/pull/30178#discussion_r1954862151. I have simply gone back to using []string as the map entry rather than a type alias or a redirection with a struct. https://mattermost.atlassian.net/browse/MM-63130 ```release-note NONE ``` * Changed back nil to len ```release-note NONE ``` * fixing unused assignment ```release-note NONE ``` * add benchmark ``` goos: linux goarch: amd64 pkg: github.com/mattermost/mattermost/server/v8/channels/app/platform cpu: Intel(R) Core(TM) i5-8265U CPU @ 1.60GHz │ old.txt │ new.txt │ │ sec/op │ sec/op vs base │ HubConnIndexIterator/2_users-8 93.53n ± 1% 38.09n ± 1% -59.27% (p=0.000 n=10) HubConnIndexIterator/3_users-8 106.30n ± 0% 38.41n ± 1% -63.86% (p=0.000 n=10) HubConnIndexIterator/4_users-8 111.30n ± 1% 38.66n ± 1% -65.27% (p=0.000 n=10) geomean 103.4n 38.39n -62.89% │ old.txt │ new.txt │ │ B/op │ B/op vs base │ HubConnIndexIterator/2_users-8 16.00 ± 0% 24.00 ± 0% +50.00% (p=0.000 n=10) HubConnIndexIterator/3_users-8 24.00 ± 0% 24.00 ± 0% ~ (p=1.000 n=10) ¹ HubConnIndexIterator/4_users-8 32.00 ± 0% 24.00 ± 0% -25.00% (p=0.000 n=10) geomean 23.08 24.00 +4.00% ¹ all samples are equal │ old.txt │ new.txt │ │ allocs/op │ allocs/op vs base │ HubConnIndexIterator/2_users-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ HubConnIndexIterator/3_users-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ HubConnIndexIterator/4_users-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ geomean 1.000 1.000 +0.00% ¹ all samples are equal ``` ```release-note NONE ``` * ForChannel test as well ```release-note NONE ``` * review comments ```release-note NONE ``` * fix lint errors ```release-note NONE ``` --------- Co-authored-by: Mattermost Build <build@mattermost.com>
2025-05-11 02:30:12 -04:00
globalIter = connIndex.ForUser(wc2.UserId)
}
})
wc5 := &WebConn{
Platform: th.Service,
UserId: wc2.UserId,
}
wc5.Active.Store(false)
wc5.SetConnectionID("conn5")
wc5.SetSession(&model.Session{})
require.NoError(b, connIndex.Add(wc5))
b.Run("4 users", func(b *testing.B) {
for b.Loop() {
MM-63130: Move to webHub iteration to be alloc-free (#30792) We switch to using iterators introduced in Go 1.23 to make iteration alloc-free and fast. And since element removal is allowed while iterating a map, this also means we don't need to even copy the slice any more. While here, we also address the comment https://github.com/mattermost/mattermost/pull/30178#discussion_r1954862151. I have simply gone back to using []string as the map entry rather than a type alias or a redirection with a struct. https://mattermost.atlassian.net/browse/MM-63130 ```release-note NONE ``` * Changed back nil to len ```release-note NONE ``` * fixing unused assignment ```release-note NONE ``` * add benchmark ``` goos: linux goarch: amd64 pkg: github.com/mattermost/mattermost/server/v8/channels/app/platform cpu: Intel(R) Core(TM) i5-8265U CPU @ 1.60GHz │ old.txt │ new.txt │ │ sec/op │ sec/op vs base │ HubConnIndexIterator/2_users-8 93.53n ± 1% 38.09n ± 1% -59.27% (p=0.000 n=10) HubConnIndexIterator/3_users-8 106.30n ± 0% 38.41n ± 1% -63.86% (p=0.000 n=10) HubConnIndexIterator/4_users-8 111.30n ± 1% 38.66n ± 1% -65.27% (p=0.000 n=10) geomean 103.4n 38.39n -62.89% │ old.txt │ new.txt │ │ B/op │ B/op vs base │ HubConnIndexIterator/2_users-8 16.00 ± 0% 24.00 ± 0% +50.00% (p=0.000 n=10) HubConnIndexIterator/3_users-8 24.00 ± 0% 24.00 ± 0% ~ (p=1.000 n=10) ¹ HubConnIndexIterator/4_users-8 32.00 ± 0% 24.00 ± 0% -25.00% (p=0.000 n=10) geomean 23.08 24.00 +4.00% ¹ all samples are equal │ old.txt │ new.txt │ │ allocs/op │ allocs/op vs base │ HubConnIndexIterator/2_users-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ HubConnIndexIterator/3_users-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ HubConnIndexIterator/4_users-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ geomean 1.000 1.000 +0.00% ¹ all samples are equal ``` ```release-note NONE ``` * ForChannel test as well ```release-note NONE ``` * review comments ```release-note NONE ``` * fix lint errors ```release-note NONE ``` --------- Co-authored-by: Mattermost Build <build@mattermost.com>
2025-05-11 02:30:12 -04:00
globalIter = connIndex.ForUser(wc2.UserId)
}
})
}
func BenchmarkHubConnIndexIteratorForChannel(b *testing.B) {
th := Setup(b).InitBasic(b)
MM-63130: Move to webHub iteration to be alloc-free (#30792) We switch to using iterators introduced in Go 1.23 to make iteration alloc-free and fast. And since element removal is allowed while iterating a map, this also means we don't need to even copy the slice any more. While here, we also address the comment https://github.com/mattermost/mattermost/pull/30178#discussion_r1954862151. I have simply gone back to using []string as the map entry rather than a type alias or a redirection with a struct. https://mattermost.atlassian.net/browse/MM-63130 ```release-note NONE ``` * Changed back nil to len ```release-note NONE ``` * fixing unused assignment ```release-note NONE ``` * add benchmark ``` goos: linux goarch: amd64 pkg: github.com/mattermost/mattermost/server/v8/channels/app/platform cpu: Intel(R) Core(TM) i5-8265U CPU @ 1.60GHz │ old.txt │ new.txt │ │ sec/op │ sec/op vs base │ HubConnIndexIterator/2_users-8 93.53n ± 1% 38.09n ± 1% -59.27% (p=0.000 n=10) HubConnIndexIterator/3_users-8 106.30n ± 0% 38.41n ± 1% -63.86% (p=0.000 n=10) HubConnIndexIterator/4_users-8 111.30n ± 1% 38.66n ± 1% -65.27% (p=0.000 n=10) geomean 103.4n 38.39n -62.89% │ old.txt │ new.txt │ │ B/op │ B/op vs base │ HubConnIndexIterator/2_users-8 16.00 ± 0% 24.00 ± 0% +50.00% (p=0.000 n=10) HubConnIndexIterator/3_users-8 24.00 ± 0% 24.00 ± 0% ~ (p=1.000 n=10) ¹ HubConnIndexIterator/4_users-8 32.00 ± 0% 24.00 ± 0% -25.00% (p=0.000 n=10) geomean 23.08 24.00 +4.00% ¹ all samples are equal │ old.txt │ new.txt │ │ allocs/op │ allocs/op vs base │ HubConnIndexIterator/2_users-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ HubConnIndexIterator/3_users-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ HubConnIndexIterator/4_users-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ geomean 1.000 1.000 +0.00% ¹ all samples are equal ``` ```release-note NONE ``` * ForChannel test as well ```release-note NONE ``` * review comments ```release-note NONE ``` * fix lint errors ```release-note NONE ``` --------- Co-authored-by: Mattermost Build <build@mattermost.com>
2025-05-11 02:30:12 -04:00
_, err := th.Service.Store.Channel().SaveMember(th.Context, &model.ChannelMember{
ChannelId: th.BasicChannel.Id,
UserId: th.BasicUser.Id,
NotifyProps: model.GetDefaultChannelNotifyProps(),
SchemeGuest: th.BasicUser.IsGuest(),
SchemeUser: !th.BasicUser.IsGuest(),
})
require.NoError(b, err)
_, err = th.Service.Store.Channel().SaveMember(th.Context, &model.ChannelMember{
ChannelId: th.BasicChannel.Id,
UserId: th.BasicUser2.Id,
NotifyProps: model.GetDefaultChannelNotifyProps(),
SchemeGuest: th.BasicUser2.IsGuest(),
SchemeUser: !th.BasicUser2.IsGuest(),
})
require.NoError(b, err)
connIndex := newHubConnectionIndex(1*time.Second, th.Service.Store, th.Service.logger, true)
// User1
wc1ID := model.NewId()
wc1 := &WebConn{
Platform: th.Service,
Suite: th.Suite,
UserId: th.BasicUser.Id,
}
wc1.SetConnectionID(wc1ID)
wc1.SetSession(&model.Session{})
// User2
wc2ID := model.NewId()
wc2 := &WebConn{
Platform: th.Service,
Suite: th.Suite,
UserId: th.BasicUser2.Id,
}
wc2.SetConnectionID(wc2ID)
wc2.SetSession(&model.Session{})
wc3ID := model.NewId()
wc3 := &WebConn{
Platform: th.Service,
Suite: th.Suite,
UserId: wc2.UserId,
}
wc3.SetConnectionID(wc3ID)
wc3.SetSession(&model.Session{})
require.NoError(b, connIndex.Add(wc1))
require.NoError(b, connIndex.Add(wc2))
require.NoError(b, connIndex.Add(wc3))
for b.Loop() {
MM-63130: Move to webHub iteration to be alloc-free (#30792) We switch to using iterators introduced in Go 1.23 to make iteration alloc-free and fast. And since element removal is allowed while iterating a map, this also means we don't need to even copy the slice any more. While here, we also address the comment https://github.com/mattermost/mattermost/pull/30178#discussion_r1954862151. I have simply gone back to using []string as the map entry rather than a type alias or a redirection with a struct. https://mattermost.atlassian.net/browse/MM-63130 ```release-note NONE ``` * Changed back nil to len ```release-note NONE ``` * fixing unused assignment ```release-note NONE ``` * add benchmark ``` goos: linux goarch: amd64 pkg: github.com/mattermost/mattermost/server/v8/channels/app/platform cpu: Intel(R) Core(TM) i5-8265U CPU @ 1.60GHz │ old.txt │ new.txt │ │ sec/op │ sec/op vs base │ HubConnIndexIterator/2_users-8 93.53n ± 1% 38.09n ± 1% -59.27% (p=0.000 n=10) HubConnIndexIterator/3_users-8 106.30n ± 0% 38.41n ± 1% -63.86% (p=0.000 n=10) HubConnIndexIterator/4_users-8 111.30n ± 1% 38.66n ± 1% -65.27% (p=0.000 n=10) geomean 103.4n 38.39n -62.89% │ old.txt │ new.txt │ │ B/op │ B/op vs base │ HubConnIndexIterator/2_users-8 16.00 ± 0% 24.00 ± 0% +50.00% (p=0.000 n=10) HubConnIndexIterator/3_users-8 24.00 ± 0% 24.00 ± 0% ~ (p=1.000 n=10) ¹ HubConnIndexIterator/4_users-8 32.00 ± 0% 24.00 ± 0% -25.00% (p=0.000 n=10) geomean 23.08 24.00 +4.00% ¹ all samples are equal │ old.txt │ new.txt │ │ allocs/op │ allocs/op vs base │ HubConnIndexIterator/2_users-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ HubConnIndexIterator/3_users-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ HubConnIndexIterator/4_users-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ geomean 1.000 1.000 +0.00% ¹ all samples are equal ``` ```release-note NONE ``` * ForChannel test as well ```release-note NONE ``` * review comments ```release-note NONE ``` * fix lint errors ```release-note NONE ``` --------- Co-authored-by: Mattermost Build <build@mattermost.com>
2025-05-11 02:30:12 -04:00
globalIter = connIndex.ForChannel(th.BasicChannel.Id)
}
}
// Always run this with -benchtime=0.1s
// See: https://github.com/golang/go/issues/27217.
func BenchmarkHubConnIndex(b *testing.B) {
th := Setup(b).InitBasic(b)
connIndex := newHubConnectionIndex(1*time.Second, th.Service.Store, th.Service.logger, false)
// User1
wc1 := &WebConn{
Platform: th.Service,
Suite: th.Suite,
UserId: model.NewId(),
}
// User2
wc2 := &WebConn{
Platform: th.Service,
Suite: th.Suite,
UserId: model.NewId(),
}
b.Run("Add", func(b *testing.B) {
for b.Loop() {
err := connIndex.Add(wc1)
require.NoError(b, err)
err = connIndex.Add(wc2)
require.NoError(b, err)
// Cleanup
b.StopTimer()
connIndex.Remove(wc1)
connIndex.Remove(wc2)
b.StartTimer()
}
})
b.Run("Remove", func(b *testing.B) {
for b.Loop() {
// Setup
b.StopTimer()
err := connIndex.Add(wc1)
require.NoError(b, err)
err = connIndex.Add(wc2)
require.NoError(b, err)
b.StartTimer()
connIndex.Remove(wc1)
connIndex.Remove(wc2)
}
})
}
func TestHubConnIndexRemoveMemLeak(t *testing.T) {
mainHelper.Parallel(t)
th := Setup(t)
connIndex := newHubConnectionIndex(1*time.Second, th.Service.Store, th.Service.logger, false)
wc := &WebConn{
Platform: th.Service,
Suite: th.Suite,
}
wc.SetConnectionID(model.NewId())
wc.SetSession(&model.Session{})
ch := make(chan struct{})
runtime.SetFinalizer(wc, func(*WebConn) {
close(ch)
})
err := connIndex.Add(wc)
require.NoError(t, err)
connIndex.Remove(wc)
runtime.GC()
timer := time.NewTimer(3 * time.Second)
defer timer.Stop()
select {
case <-ch:
case <-timer.C:
require.Fail(t, "timeout waiting for collection of wc")
}
assert.Len(t, connIndex.byConnection, 0)
}
MM-24972: make GetHubForUserId lock-less and zero-alloc (#14945) * MM-24972: make GetHubForUserId lock-less and zero-alloc The hubs aren't meant to be modified after the server starts up. But the key problem was the SetHubs method which would zero out the underlying hubs slice. This wasn't ideally necessary because an hub would only be stopped once during server shutdown. However, this was required by a test which would shutdown the hub twice. And since the hub shutdown isn't idempotent, zeroing the slice would just skip over shutting down the hub again. To improve the overall situation, we apply several optimizations. - We use the new hash/maphash package which exposes Go runtime's internal hash algorithms to be used as a package. This is much faster than hash/fnv. - We move around the initialization of the hub to happen before the metrics server starts. This allows us to initialize the hub before any of the hub elements are being accessed. - To make the test run successfully, we do not call th.TearDown. This is fine for a test, because anyways the test process would eventually stop and relinquish the resources to the OS. This allows us to completely remove any mutexes and thereby we can remove all the methods and any edge-case checks related to index being out of bounds. As a result, the fast path becomes very straightforward and zero-alloc. name old time/op new time/op delta GetHubForUserId-8 116ns ± 1% 38ns ± 7% -67.22% (p=0.000 n=10+10) name old alloc/op new alloc/op delta GetHubForUserId-8 36.0B ± 0% 0.0B -100.00% (p=0.000 n=10+10) name old allocs/op new allocs/op delta GetHubForUserId-8 2.00 ± 0% 0.00 -100.00% (p=0.000 n=10+10) Manually tested with some load testing and running Hub tests in -race mode. * remove mutex * incorporate review comments
2020-07-07 01:53:45 -04:00
var hubSink *Hub
MM-24972: make GetHubForUserId lock-less and zero-alloc (#14945) * MM-24972: make GetHubForUserId lock-less and zero-alloc The hubs aren't meant to be modified after the server starts up. But the key problem was the SetHubs method which would zero out the underlying hubs slice. This wasn't ideally necessary because an hub would only be stopped once during server shutdown. However, this was required by a test which would shutdown the hub twice. And since the hub shutdown isn't idempotent, zeroing the slice would just skip over shutting down the hub again. To improve the overall situation, we apply several optimizations. - We use the new hash/maphash package which exposes Go runtime's internal hash algorithms to be used as a package. This is much faster than hash/fnv. - We move around the initialization of the hub to happen before the metrics server starts. This allows us to initialize the hub before any of the hub elements are being accessed. - To make the test run successfully, we do not call th.TearDown. This is fine for a test, because anyways the test process would eventually stop and relinquish the resources to the OS. This allows us to completely remove any mutexes and thereby we can remove all the methods and any edge-case checks related to index being out of bounds. As a result, the fast path becomes very straightforward and zero-alloc. name old time/op new time/op delta GetHubForUserId-8 116ns ± 1% 38ns ± 7% -67.22% (p=0.000 n=10+10) name old alloc/op new alloc/op delta GetHubForUserId-8 36.0B ± 0% 0.0B -100.00% (p=0.000 n=10+10) name old allocs/op new allocs/op delta GetHubForUserId-8 2.00 ± 0% 0.00 -100.00% (p=0.000 n=10+10) Manually tested with some load testing and running Hub tests in -race mode. * remove mutex * incorporate review comments
2020-07-07 01:53:45 -04:00
func BenchmarkGetHubForUserId(b *testing.B) {
th := Setup(b).InitBasic(b)
err := th.Service.Start(nil)
require.NoError(b, err)
for b.Loop() {
hubSink = th.Service.GetHubForUserId(th.BasicUser.Id)
MM-24972: make GetHubForUserId lock-less and zero-alloc (#14945) * MM-24972: make GetHubForUserId lock-less and zero-alloc The hubs aren't meant to be modified after the server starts up. But the key problem was the SetHubs method which would zero out the underlying hubs slice. This wasn't ideally necessary because an hub would only be stopped once during server shutdown. However, this was required by a test which would shutdown the hub twice. And since the hub shutdown isn't idempotent, zeroing the slice would just skip over shutting down the hub again. To improve the overall situation, we apply several optimizations. - We use the new hash/maphash package which exposes Go runtime's internal hash algorithms to be used as a package. This is much faster than hash/fnv. - We move around the initialization of the hub to happen before the metrics server starts. This allows us to initialize the hub before any of the hub elements are being accessed. - To make the test run successfully, we do not call th.TearDown. This is fine for a test, because anyways the test process would eventually stop and relinquish the resources to the OS. This allows us to completely remove any mutexes and thereby we can remove all the methods and any edge-case checks related to index being out of bounds. As a result, the fast path becomes very straightforward and zero-alloc. name old time/op new time/op delta GetHubForUserId-8 116ns ± 1% 38ns ± 7% -67.22% (p=0.000 n=10+10) name old alloc/op new alloc/op delta GetHubForUserId-8 36.0B ± 0% 0.0B -100.00% (p=0.000 n=10+10) name old allocs/op new allocs/op delta GetHubForUserId-8 2.00 ± 0% 0.00 -100.00% (p=0.000 n=10+10) Manually tested with some load testing and running Hub tests in -race mode. * remove mutex * incorporate review comments
2020-07-07 01:53:45 -04:00
}
}
func TestClusterBroadcast(t *testing.T) {
mainHelper.Parallel(t)
testCluster := &testlib.FakeClusterInterface{}
th := SetupWithCluster(t, testCluster)
ev := model.NewWebSocketEvent("test_event", "", "", "", nil, "")
broadcast := &model.WebsocketBroadcast{
ContainsSanitizedData: true,
ContainsSensitiveData: true,
}
ev = ev.SetBroadcast(broadcast)
th.Service.Publish(ev)
messages := testCluster.GetMessages()
var clusterEvent struct {
Event string `json:"event"`
Data map[string]any `json:"data"`
Broadcast *model.WebsocketBroadcast `json:"broadcast"`
Sequence int64 `json:"seq"`
}
err := json.Unmarshal(messages[0].Data, &clusterEvent)
require.NoError(t, err)
require.Equal(t, clusterEvent.Broadcast, broadcast)
}
func TestClusterBroadcastHooks(t *testing.T) {
mainHelper.Parallel(t)
t.Run("should send broadcast hook information across cluster", func(t *testing.T) {
testCluster := &testlib.FakeClusterInterface{}
th := SetupWithCluster(t, testCluster)
hookID := broadcastTest
hookArgs := map[string]any{
"makes_changes": true,
}
event := model.NewWebSocketEvent(model.WebsocketEventPosted, "", "", "", nil, "")
event.GetBroadcast().AddHook(hookID, hookArgs)
th.Service.Publish(event)
received, err := model.WebSocketEventFromJSON(bytes.NewReader(testCluster.GetMessages()[0].Data))
require.NoError(t, err)
assert.Equal(t, []string{hookID}, received.GetBroadcast().BroadcastHooks)
assert.Equal(t, []map[string]any{hookArgs}, received.GetBroadcast().BroadcastHookArgs)
})
t.Run("should not preserve type information for args", func(t *testing.T) {
// This behaviour isn't ideal, but this test confirms that it hasn't changed
testCluster := &testlib.FakeClusterInterface{}
th := SetupWithCluster(t, testCluster)
hookID := "test_broadcast_hook_with_args"
hookArgs := map[string]any{
"user": &model.User{Id: "user1"},
"array": []string{"a", "b", "c"},
}
event := model.NewWebSocketEvent(model.WebsocketEventPosted, "", "", "", nil, "")
event.GetBroadcast().AddHook(hookID, hookArgs)
th.Service.Publish(event)
received, err := model.WebSocketEventFromJSON(bytes.NewReader(testCluster.GetMessages()[0].Data))
require.NoError(t, err)
assert.Equal(t, []string{hookID}, received.GetBroadcast().BroadcastHooks)
assert.IsType(t, map[string]any{}, received.GetBroadcast().BroadcastHookArgs[0]["user"])
assert.IsType(t, []any{}, received.GetBroadcast().BroadcastHookArgs[0]["array"])
})
}