MM-61886: Add actionable page navigation metrics (#29332)

Page load is one of the metrics that we track and present
to MLT. However, in its current form, it is not very
actionable because it also contains the network latency.

We split the whole metric into these parts:
startTime
|
responseStart = TTFB
|
responseEnd = TTLB
|
domInteractive = Start of processing phase
|
loadEventEnd = Load complete

This gives us better visibility into exactly
which phase in the load process is slow.

I have experimented with other metrics like
- domContentLoadedEventStart
- domContentLoadedEventEnd
- domComplete

and observed that they do not have sufficient
gaps in the timespan to have any relevance.

Additionally, I have moved TTFB from being a
web vitals metric to being tracked from the performance
metrics to remain consistent with the other navigation
metrics measured.

Lastly, I took this chance to improve some of the
validation errors that we threw to include more
context into the input that was passed and why
does it fail.

This also meant that I had to change the tests
to check for error strings rather than direct
errors which is a bad thing, but I don't think
it's worth the effort trying to have named error
variables for all of them.

https://mattermost.atlassian.net/browse/MM-61886

```release-note
NONE
```
This commit is contained in:
Agniva De Sarker 2024-11-29 11:24:35 +05:30 committed by GitHub
parent b33622e32c
commit 4ec4b4d525
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 138 additions and 36 deletions

View file

@ -28,6 +28,15 @@ func (a *App) RegisterPerformanceReport(rctx request.CTX, report *model.Performa
switch h.Metric {
case model.ClientTimeToFirstByte:
a.Metrics().ObserveClientTimeToFirstByte(commonLabels["platform"], commonLabels["agent"], h.Value/1000)
case model.ClientTimeToLastByte:
a.Metrics().ObserveClientTimeToLastByte(commonLabels["platform"], commonLabels["agent"], h.Value/1000)
case model.ClientTimeToDOMInteractive:
a.Metrics().ObserveClientTimeToDomInteractive(commonLabels["platform"], commonLabels["agent"], h.Value/1000)
case model.ClientSplashScreenEnd:
a.Metrics().ObserveClientSplashScreenEnd(commonLabels["platform"],
commonLabels["agent"],
h.GetLabelValue("page_type", model.AcceptedSplashScreenOrigins, "team_controller"),
h.Value/1000)
case model.ClientFirstContentfulPaint:
a.Metrics().ObserveClientFirstContentfulPaint(commonLabels["platform"], commonLabels["agent"], h.Value/1000)
case model.ClientLargestContentfulPaint:

View file

@ -105,6 +105,9 @@ type MetricsInterface interface {
IncrementNotificationUnsupportedCounter(notificationType model.NotificationType, notSentReason model.NotificationReason, platform string)
ObserveClientTimeToFirstByte(platform, agent string, elapsed float64)
ObserveClientTimeToLastByte(platform, agent string, elapsed float64)
ObserveClientTimeToDomInteractive(platform, agent string, elapsed float64)
ObserveClientSplashScreenEnd(platform, agent, pageType string, elapsed float64)
ObserveClientFirstContentfulPaint(platform, agent string, elapsed float64)
ObserveClientLargestContentfulPaint(platform, agent, region string, elapsed float64)
ObserveClientInteractionToNextPaint(platform, agent, interaction string, elapsed float64)

View file

@ -338,16 +338,31 @@ func (_m *MetricsInterface) ObserveClientRHSLoadDuration(platform string, agent
_m.Called(platform, agent, elapsed)
}
// ObserveClientSplashScreenEnd provides a mock function with given fields: platform, agent, pageType, elapsed
func (_m *MetricsInterface) ObserveClientSplashScreenEnd(platform string, agent string, pageType string, elapsed float64) {
_m.Called(platform, agent, pageType, elapsed)
}
// ObserveClientTeamSwitchDuration provides a mock function with given fields: platform, agent, fresh, elapsed
func (_m *MetricsInterface) ObserveClientTeamSwitchDuration(platform string, agent string, fresh string, elapsed float64) {
_m.Called(platform, agent, fresh, elapsed)
}
// ObserveClientTimeToDomInteractive provides a mock function with given fields: platform, agent, elapsed
func (_m *MetricsInterface) ObserveClientTimeToDomInteractive(platform string, agent string, elapsed float64) {
_m.Called(platform, agent, elapsed)
}
// ObserveClientTimeToFirstByte provides a mock function with given fields: platform, agent, elapsed
func (_m *MetricsInterface) ObserveClientTimeToFirstByte(platform string, agent string, elapsed float64) {
_m.Called(platform, agent, elapsed)
}
// ObserveClientTimeToLastByte provides a mock function with given fields: platform, agent, elapsed
func (_m *MetricsInterface) ObserveClientTimeToLastByte(platform string, agent string, elapsed float64) {
_m.Called(platform, agent, elapsed)
}
// ObserveClusterRequestDuration provides a mock function with given fields: elapsed
func (_m *MetricsInterface) ObserveClusterRequestDuration(elapsed float64) {
_m.Called(elapsed)

View file

@ -202,6 +202,9 @@ type MetricsInterfaceImpl struct {
NotificationUnsupportedCounters *prometheus.CounterVec
ClientTimeToFirstByte *prometheus.HistogramVec
ClientTimeToLastByte *prometheus.HistogramVec
ClientTimeToDOMInteractive *prometheus.HistogramVec
ClientSplashScreenEnd *prometheus.HistogramVec
ClientFirstContentfulPaint *prometheus.HistogramVec
ClientLargestContentfulPaint *prometheus.HistogramVec
ClientInteractionToNextPaint *prometheus.HistogramVec
@ -1187,6 +1190,39 @@ func New(ps *platform.PlatformService, driver, dataSource string) *MetricsInterf
)
m.Registry.MustRegister(m.ClientTimeToFirstByte)
m.ClientTimeToLastByte = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystemClientsWeb,
Name: "time_to_last_byte",
Help: "Duration from when a browser starts to request a page from a server until when it receives the last byte of the resource or immediately before the transport connection is closed, whichever comes first. (seconds)",
},
[]string{"platform", "agent"},
)
m.Registry.MustRegister(m.ClientTimeToLastByte)
m.ClientTimeToDOMInteractive = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystemClientsWeb,
Name: "dom_interactive",
Help: "Duration from when a browser starts to request a page from a server until when it sets the document's readyState to interactive. (seconds)",
},
[]string{"platform", "agent"},
)
m.Registry.MustRegister(m.ClientTimeToDOMInteractive)
m.ClientSplashScreenEnd = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystemClientsWeb,
Name: "splash_screen",
Help: "Duration from when a browser starts to request a page from a server until when the splash screen ends. (seconds)",
},
[]string{"platform", "agent", "page_type"},
)
m.Registry.MustRegister(m.ClientSplashScreenEnd)
m.ClientFirstContentfulPaint = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: MetricsNamespace,
@ -1838,6 +1874,18 @@ func (mi *MetricsInterfaceImpl) ObserveClientTimeToFirstByte(platform, agent str
mi.ClientTimeToFirstByte.With(prometheus.Labels{"platform": platform, "agent": agent}).Observe(elapsed)
}
func (mi *MetricsInterfaceImpl) ObserveClientTimeToLastByte(platform, agent string, elapsed float64) {
mi.ClientTimeToLastByte.With(prometheus.Labels{"platform": platform, "agent": agent}).Observe(elapsed)
}
func (mi *MetricsInterfaceImpl) ObserveClientTimeToDomInteractive(platform, agent string, elapsed float64) {
mi.ClientTimeToDOMInteractive.With(prometheus.Labels{"platform": platform, "agent": agent}).Observe(elapsed)
}
func (mi *MetricsInterfaceImpl) ObserveClientSplashScreenEnd(platform, agent, pageType string, elapsed float64) {
mi.ClientSplashScreenEnd.With(prometheus.Labels{"platform": platform, "agent": agent, "page_type": pageType}).Observe(elapsed)
}
func (mi *MetricsInterfaceImpl) ObserveClientFirstContentfulPaint(platform, agent string, elapsed float64) {
mi.ClientFirstContentfulPaint.With(prometheus.Labels{"platform": platform, "agent": agent}).Observe(elapsed)
}

View file

@ -6,7 +6,6 @@ package model
import (
"fmt"
"strings"
"time"
"github.com/blang/semver/v4"
)
@ -15,6 +14,9 @@ type MetricType string
const (
ClientTimeToFirstByte MetricType = "TTFB"
ClientTimeToLastByte MetricType = "TTLB"
ClientTimeToDOMInteractive MetricType = "dom_interactive"
ClientSplashScreenEnd MetricType = "splash_screen"
ClientFirstContentfulPaint MetricType = "FCP"
ClientLargestContentfulPaint MetricType = "LCP"
ClientInteractionToNextPaint MetricType = "INP"
@ -54,7 +56,8 @@ var (
"modal_content",
"other",
)
AcceptedTrueFalseLabels = sliceToMapKey("true", "false")
AcceptedTrueFalseLabels = sliceToMapKey("true", "false")
AcceptedSplashScreenOrigins = sliceToMapKey("root", "team_controller")
)
type MetricSample struct {
@ -86,7 +89,7 @@ func (r *PerformanceReport) IsValid() error {
reportVersion, err := semver.ParseTolerant(r.Version)
if err != nil {
return err
return fmt.Errorf("could not parse semver version: %s, %w", r.Version, err)
}
if reportVersion.Major != performanceReportVersion.Major || reportVersion.Minor > performanceReportVersion.Minor {
@ -94,12 +97,12 @@ func (r *PerformanceReport) IsValid() error {
}
if r.Start > r.End {
return fmt.Errorf("report timestamps are erroneous")
return fmt.Errorf("report timestamps are erroneous: start_timestamp %f is greater than end_timestamp %f", r.Start, r.End)
}
now := time.Now().UnixMilli()
now := GetMillis()
if r.End < float64(now-performanceReportTTLMilliseconds) {
return fmt.Errorf("report is outdated: %f", r.End)
return fmt.Errorf("report is outdated: end_time %f is past %d ms from now", r.End, performanceReportTTLMilliseconds)
}
return nil

View file

@ -4,7 +4,6 @@
package model
import (
"fmt"
"testing"
"time"
@ -16,7 +15,7 @@ func TestPerformanceReport_IsValid(t *testing.T) {
tests := []struct {
name string
report *PerformanceReport
expected error
expected string
}{
{
name: "ValidReport",
@ -26,12 +25,12 @@ func TestPerformanceReport_IsValid(t *testing.T) {
Start: float64(time.Now().UnixMilli() - 10000),
End: float64(time.Now().UnixMilli()),
},
expected: nil,
expected: "",
},
{
name: "NilReport",
report: nil,
expected: fmt.Errorf("the report is nil"),
expected: "the report is nil",
},
{
name: "UnsupportedVersion",
@ -41,7 +40,7 @@ func TestPerformanceReport_IsValid(t *testing.T) {
Start: float64(time.Now().UnixMilli() - 10000),
End: float64(time.Now().UnixMilli()),
},
expected: fmt.Errorf("report version is not supported: server version: 0.1.0, report version: 2.0.0"),
expected: "report version is not supported:",
},
{
name: "ErroneousTimestamps",
@ -51,7 +50,7 @@ func TestPerformanceReport_IsValid(t *testing.T) {
Start: float64(time.Now().UnixMilli()),
End: float64(time.Now().Add(-1 * time.Hour).UnixMilli()),
},
expected: fmt.Errorf("report timestamps are erroneous"),
expected: "report timestamps are erroneous",
},
{
name: "OutdatedReport",
@ -61,15 +60,15 @@ func TestPerformanceReport_IsValid(t *testing.T) {
Start: float64(time.Now().Add(-7 * time.Minute).UnixMilli()),
End: float64(outdatedTimestamp),
},
expected: fmt.Errorf("report is outdated: %f", float64(outdatedTimestamp)),
expected: "report is outdated:",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
err := tt.report.IsValid()
if tt.expected != nil {
require.EqualError(t, err, tt.expected.Error())
if tt.expected != "" {
require.Contains(t, err.Error(), tt.expected)
return
}
require.NoError(t, err)

View file

@ -1,6 +1,7 @@
// Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
// See LICENSE.txt for license information.
import {Measure, measureAndReport} from 'utils/performance_telemetry';
import {isDesktopApp} from 'utils/user_agent';
const ANIMATION_CLASS_FOR_MATTERMOST_LOGO_HIDE = 'LoadingAnimation__compass-shrink';
@ -99,7 +100,7 @@ export class InitialLoadingScreenClass {
/**
* The loading animations are always started as soon as the loading indicator is shown in the screen for the first time.
* But we still want to have this start method incase we need to start the loading animations manually any time.
* But we still want to have this start method in case we need to start the loading animations manually any time.
* If we do want to do that then we should remove the set timeout destroy call doing above.
*/
public start() {
@ -115,7 +116,7 @@ export class InitialLoadingScreenClass {
this.loadingAnimationElement.className = LOADING_CLASS_FOR_ANIMATION;
}
public stop() {
public stop(pageType: string) {
if (!this.loadingScreenElement || !this.loadingAnimationElement) {
return;
}
@ -124,6 +125,15 @@ export class InitialLoadingScreenClass {
this.loadingScreenElement.className = LOADING_COMPLETE_CLASS_FOR_SCREEN;
this.loadingAnimationElement.className = LOADING_COMPLETE_CLASS_FOR_ANIMATION;
measureAndReport({
name: Measure.SplashScreen,
startMark: 0,
canFail: false,
labels: {
page_type: pageType,
},
});
}
}

View file

@ -293,7 +293,7 @@ export default class Root extends React.PureComponent<Props, State> {
if (prevState.shouldMountAppRoutes === false && this.state.shouldMountAppRoutes === true) {
if (!doesRouteBelongToTeamControllerRoutes(this.props.location.pathname)) {
DesktopApp.reactAppInitialized();
InitialLoadingScreen.stop();
InitialLoadingScreen.stop('root');
}
}
}

View file

@ -55,7 +55,7 @@ function TeamController(props: Props) {
useTelemetryIdentitySync();
useEffect(() => {
InitialLoadingScreen.stop();
InitialLoadingScreen.stop('team_controller');
DesktopApp.reactAppInitialized();
async function fetchAllChannels() {
await props.fetchAllMyTeamsChannels();

View file

@ -13,6 +13,10 @@ export const enum Measure {
ChannelSwitch = 'channel_switch',
GlobalThreadsLoad = 'global_threads_load',
PageLoad = 'page_load',
TTFB = 'TTFB',
TTLB = 'TTLB',
SplashScreen = 'splash_screen',
DomInteractive = 'dom_interactive',
RhsLoad = 'rhs_load',
TeamSwitch = 'team_switch',
}
@ -29,7 +33,7 @@ export function markAndReport(name: string): PerformanceMark {
* Measures the duration between two performance marks, schedules it to be reported to the server, and returns the
* PerformanceMeasure created by doing this. If endMark is omitted, the measure will measure the duration until now.
*
* If either the start or end mark does not exist, undefined will be returned and, if canFail is false, an error
* If either the start or end mark does not exist, undefined will be returned, and if canFail is false, an error
* will be logged.
*/
export function measureAndReport({
@ -40,8 +44,8 @@ export function measureAndReport({
canFail = false,
}: {
name: string;
startMark: string;
endMark?: string;
startMark: string | DOMHighResTimeStamp;
endMark?: string | DOMHighResTimeStamp;
labels?: Record<string, string>;
canFail?: boolean;
}): PerformanceMeasure | undefined {

View file

@ -2,7 +2,7 @@
// See LICENSE.txt for license information.
import nock from 'nock';
import {onCLS, onFCP, onINP, onLCP, onTTFB} from 'web-vitals/attribution';
import {onCLS, onFCP, onINP, onLCP} from 'web-vitals/attribution';
import {Client4} from '@mattermost/client';
@ -216,8 +216,6 @@ describe.skip('PerformanceReporter', () => {
onINPCallback({name: 'INP', value: 200});
const onLCPCallback = (onLCP as jest.Mock).mock.calls[0][0];
onLCPCallback({name: 'LCP', value: 2500, entries: []});
const onTTFBCallback = (onTTFB as jest.Mock).mock.calls[0][0];
onTTFBCallback({name: 'TTFB', value: 800});
await waitForReport();
@ -234,10 +232,6 @@ describe.skip('PerformanceReporter', () => {
metric: 'LCP',
value: 2500,
},
{
metric: 'TTFB',
value: 800,
},
],
});

View file

@ -2,7 +2,7 @@
// See LICENSE.txt for license information.
import type {Store} from 'redux';
import {onCLS, onFCP, onINP, onLCP, onTTFB} from 'web-vitals/attribution';
import {onCLS, onFCP, onINP, onLCP} from 'web-vitals/attribution';
import type {INPMetricWithAttribution, LCPMetricWithAttribution, Metric} from 'web-vitals/attribution';
import type {Client4} from '@mattermost/client';
@ -114,16 +114,15 @@ export default class PerformanceReporter {
entryTypes: observedEntryTypes,
});
// Record the page load separately because it arrived before we were observing and because you can't use
// Record the page navigation separately because it arrived before we were observing and because you can't use
// the buffered option for PerformanceObserver with multiple entry types.
this.measurePageLoad();
this.measurePageNavigation();
// Register handlers for standard metrics and Web Vitals
onCLS((metric) => this.handleWebVital(metric));
onFCP((metric) => this.handleWebVital(metric));
onINP((metric) => this.handleWebVital(metric));
onLCP((metric) => this.handleWebVital(metric));
onTTFB((metric) => this.handleWebVital(metric));
// Periodically send performance telemetry to the server, roughly every minute but with some randomness to
// avoid overloading the server every minute.
@ -138,17 +137,35 @@ export default class PerformanceReporter {
}
}
private measurePageLoad() {
private measurePageNavigation() {
const entries = performance.getEntriesByType('navigation');
if (entries.length === 0) {
return;
}
const entry = entries[0];
const ts = Date.now();
this.histogramMeasures.push({
metric: Measure.TTFB,
value: entry.responseStart,
timestamp: ts,
});
this.histogramMeasures.push({
metric: Measure.TTLB,
value: entry.responseEnd,
timestamp: ts,
});
this.histogramMeasures.push({
metric: Measure.DomInteractive,
value: entry.domInteractive,
timestamp: ts,
});
this.histogramMeasures.push({
metric: Measure.PageLoad,
value: entries[0].duration,
timestamp: Date.now(),
value: entry.duration,
timestamp: ts,
});
}