mattermost/webapp/scripts/check-external-links.mjs
Jesse Hallam 5aefff30cf
Some checks are pending
API / build (push) Waiting to run
Server CI / Compute Go Version (push) Waiting to run
Server CI / Check mocks (push) Blocked by required conditions
Server CI / Check go mod tidy (push) Blocked by required conditions
Server CI / check-style (push) Blocked by required conditions
Server CI / Check serialization methods for hot structs (push) Blocked by required conditions
Server CI / Vet API (push) Blocked by required conditions
Server CI / Check migration files (push) Blocked by required conditions
Server CI / Generate email templates (push) Blocked by required conditions
Server CI / Check store layers (push) Blocked by required conditions
Server CI / Check mmctl docs (push) Blocked by required conditions
Server CI / Postgres with binary parameters (push) Blocked by required conditions
Server CI / Postgres (push) Blocked by required conditions
Server CI / Postgres (FIPS) (push) Blocked by required conditions
Server CI / Generate Test Coverage (push) Blocked by required conditions
Server CI / Run mmctl tests (push) Blocked by required conditions
Server CI / Run mmctl tests (FIPS) (push) Blocked by required conditions
Server CI / Build mattermost server app (push) Blocked by required conditions
Web App CI / check-lint (push) Waiting to run
Web App CI / check-i18n (push) Blocked by required conditions
Web App CI / check-external-links (push) Blocked by required conditions
Web App CI / check-types (push) Blocked by required conditions
Web App CI / test (platform) (push) Blocked by required conditions
Web App CI / test (mattermost-redux) (push) Blocked by required conditions
Web App CI / test (channels shard 1/4) (push) Blocked by required conditions
Web App CI / test (channels shard 2/4) (push) Blocked by required conditions
Web App CI / test (channels shard 3/4) (push) Blocked by required conditions
Web App CI / test (channels shard 4/4) (push) Blocked by required conditions
Web App CI / upload-coverage (push) Blocked by required conditions
Web App CI / build (push) Blocked by required conditions
Add CI check for broken mattermost.com links in webapp (#35093)
* Add CI check for broken mattermost.com links in webapp

Add a script that scans the webapp source files for links to mattermost.com
domains and tests each unique URL for 404s. This helps detect broken
documentation and marketing links early.

- New script: webapp/scripts/check-external-links.mjs
- New npm target: check-external-links
- New CI job in webapp-ci.yml to run on every commit

* Add --markdown flag for GitHub Actions job summary

* Fix job summary: use pipefail and suppress progress output

* Require mattermost.com links to use /pl/ permalink format

* Require all mattermost.com links (including subdomains) to use /pl/

* Allow exceptions for push servers and root domain

* Make non-permalink URLs warnings instead of errors

* Add User-Agent header and retry GET on 403

* Follow redirects when checking URLs

Check the final destination of redirects to catch broken links that
redirect to error pages. If a redirect response has the Cloudflare
cf-mitigated header, assume the URL is OK and stop following.

* Simplify link checker code

- Combine PUSH_SERVER_PATTERN and HPNS_PATTERN into single regex
- Simplify validatePermalink to return boolean (reason was unused)
- Consolidate Cloudflare header checks in processResponse

* replace broken links with valid ones

* updates
2026-03-11 17:43:08 -04:00

404 lines
12 KiB
JavaScript
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
// See LICENSE.txt for license information.
/* eslint-disable no-console */
import fs from 'node:fs';
import path from 'node:path';
import chalk from 'chalk';
const MATTERMOST_URL_PATTERN = /https?:\/\/(?:[a-z0-9-]+\.)*mattermost\.com(?:[/?#][^"'\s<>()]*)?/gi;
const PERMALINK_PATTERN = /^https?:\/\/(www\.)?mattermost\.com\/pl\//;
const PUSH_SERVER_PATTERN = /^https?:\/\/(([a-z0-9-]+\.)?push|hpns-[a-z]+)\.mattermost\.com(\/|$)/;
const ROOT_DOMAIN_PATTERN = /^https?:\/\/(www\.)?mattermost\.com\/?$/;
const SOURCE_EXTENSIONS = ['.ts', '.tsx', '.js', '.jsx'];
const DIRECTORIES_TO_SCAN = [
'channels/src',
'platform/client/src',
'platform/components/src',
'platform/mattermost-redux/src',
];
function getAllSourceFiles(dir, excludeTests = true) {
const files = [];
function walk(currentDir) {
if (!fs.existsSync(currentDir)) {
return;
}
const entries = fs.readdirSync(currentDir, {withFileTypes: true});
for (const entry of entries) {
const fullPath = path.join(currentDir, entry.name);
if (entry.isDirectory()) {
if (entry.name === 'node_modules' || entry.name === 'dist' || entry.name === 'coverage') {
continue;
}
walk(fullPath);
} else if (entry.isFile()) {
const ext = path.extname(entry.name);
if (!SOURCE_EXTENSIONS.includes(ext)) {
continue;
}
if (excludeTests && (entry.name.includes('.test.') || entry.name.includes('.spec.'))) {
continue;
}
files.push(fullPath);
}
}
}
walk(dir);
return files;
}
function extractUrls(filePath) {
const content = fs.readFileSync(filePath, 'utf-8');
const matches = content.match(MATTERMOST_URL_PATTERN) || [];
return matches.map((url) => {
let cleanUrl = url;
cleanUrl = cleanUrl.replace(/[',;)}\]]+$/, '');
cleanUrl = cleanUrl.replace(/\\n$/, '');
return cleanUrl;
});
}
function findAllMattermostUrls(rootDir, excludeTests = true) {
const urlMap = new Map();
for (const dir of DIRECTORIES_TO_SCAN) {
const fullDir = path.join(rootDir, dir);
const files = getAllSourceFiles(fullDir, excludeTests);
for (const file of files) {
const urls = extractUrls(file);
for (const url of urls) {
if (!urlMap.has(url)) {
urlMap.set(url, []);
}
urlMap.get(url).push(path.relative(rootDir, file));
}
}
}
return urlMap;
}
function isValidPermalink(url) {
return PERMALINK_PATTERN.test(url) ||
ROOT_DOMAIN_PATTERN.test(url) ||
PUSH_SERVER_PATTERN.test(url);
}
function findNonPermalinkUrls(urlMap) {
const invalid = [];
for (const [url, files] of urlMap) {
if (!isValidPermalink(url)) {
invalid.push({url, files});
}
}
return invalid;
}
const FETCH_HEADERS = {
'User-Agent': 'Mozilla/5.0 (compatible; MattermostLinkChecker/1.0; +https://github.com/mattermost/mattermost)',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.5',
};
const MAX_REDIRECTS = 10;
async function checkUrl(url, retries = 2) {
for (let attempt = 0; attempt <= retries; attempt++) {
try {
return await checkUrlWithRedirects(url);
} catch (error) {
if (attempt === retries) {
return {
url,
status: 0,
ok: false,
error: error.message,
};
}
await new Promise((resolve) => setTimeout(resolve, 1000 * (attempt + 1)));
}
}
}
async function checkUrlWithRedirects(originalUrl) {
let currentUrl = originalUrl;
for (let redirectCount = 0; redirectCount <= MAX_REDIRECTS; redirectCount++) {
const response = await fetch(currentUrl, {
method: 'HEAD',
redirect: 'manual',
headers: FETCH_HEADERS,
signal: AbortSignal.timeout(10000),
});
if (response.status === 405 || response.status === 403) {
const getResponse = await fetch(currentUrl, {
method: 'GET',
redirect: 'manual',
headers: FETCH_HEADERS,
signal: AbortSignal.timeout(10000),
});
return processResponse(originalUrl, getResponse);
}
const result = processResponse(originalUrl, response);
if (!result.redirect) {
return result;
}
const location = response.headers.get('location');
if (!location) {
return {
url: originalUrl,
status: response.status,
ok: false,
error: 'Redirect without Location header',
};
}
currentUrl = new URL(location, currentUrl).href;
}
return {
url: originalUrl,
status: 0,
ok: false,
error: 'Too many redirects',
};
}
function processResponse(originalUrl, response) {
const {status} = response;
const cfHeader = response.headers.get('cf-mitigated');
const isRedirect = status >= 300 && status < 400;
if (cfHeader && (isRedirect || status === 403 || status === 503)) {
return {url: originalUrl, status, ok: true};
}
if (status === 301) {
return {url: originalUrl, status, ok: true};
}
if (isRedirect) {
return {url: originalUrl, status, redirect: true};
}
if (response.ok) {
return {url: originalUrl, status, ok: true};
}
return {url: originalUrl, status, ok: false};
}
function shouldSkipUrlCheck(url) {
return PUSH_SERVER_PATTERN.test(url);
}
async function checkUrls(urls, concurrency = 5, silentProgress = false) {
const results = [];
const urlList = Array.from(urls.keys()).filter((url) => !shouldSkipUrlCheck(url));
for (let i = 0; i < urlList.length; i += concurrency) {
const batch = urlList.slice(i, i + concurrency);
const batchResults = await Promise.all(batch.map((url) => checkUrl(url)));
results.push(...batchResults);
if (!silentProgress) {
const completed = Math.min(i + concurrency, urlList.length);
process.stderr.write(`\rChecking URLs: ${completed}/${urlList.length}`);
}
}
if (!silentProgress) {
process.stderr.write('\n');
}
return results;
}
function printResults(results, urlMap, nonPermalinkUrls) {
const broken = results.filter((r) => !r.ok);
const working = results.filter((r) => r.ok);
console.log('\n' + chalk.bold('=== External Link Check Results ===\n'));
console.log(chalk.green(`${working.length} URLs are accessible`));
if (broken.length > 0) {
console.log(chalk.red(`${broken.length} URLs are broken`));
}
if (nonPermalinkUrls.length > 0) {
console.log(chalk.yellow(`${nonPermalinkUrls.length} URLs are not using permalink format (warning)`));
}
console.log();
if (broken.length > 0) {
console.log(chalk.red.bold('Broken URLs:\n'));
for (const result of broken) {
const statusText = result.error ? `Error: ${result.error}` : `HTTP ${result.status}`;
console.log(chalk.red(` ${result.url}`));
console.log(chalk.gray(` Status: ${statusText}`));
console.log(chalk.gray(` Found in:`));
for (const file of urlMap.get(result.url)) {
console.log(chalk.gray(` - ${file}`));
}
console.log();
}
}
if (nonPermalinkUrls.length > 0) {
console.log(chalk.yellow.bold('URLs not using permalink format (warning):\n'));
console.log(chalk.gray(' All mattermost.com links should route via https://mattermost.com/pl/\n'));
for (const item of nonPermalinkUrls) {
console.log(chalk.yellow(` ${item.url}`));
console.log(chalk.gray(` Found in:`));
for (const file of item.files) {
console.log(chalk.gray(` - ${file}`));
}
console.log();
}
}
if (broken.length === 0 && nonPermalinkUrls.length === 0) {
console.log(chalk.green.bold(`✓ All URLs are valid and accessible\n`));
}
return broken.length > 0 ? 1 : 0;
}
function generateMarkdownSummary(results, urlMap, nonPermalinkUrls) {
const broken = results.filter((r) => !r.ok);
const working = results.filter((r) => r.ok);
const lines = [];
lines.push('## External Link Check Results\n');
if (broken.length === 0 && nonPermalinkUrls.length === 0) {
lines.push(`✅ **All ${working.length} mattermost.com URLs are valid and accessible**\n`);
return lines.join('\n');
}
lines.push(`| Status | Count |`);
lines.push(`|--------|-------|`);
lines.push(`| ✅ Working | ${working.length} |`);
if (broken.length > 0) {
lines.push(`| ❌ Broken | ${broken.length} |`);
}
if (nonPermalinkUrls.length > 0) {
lines.push(`| ⚠️ Missing /pl/ prefix (warning) | ${nonPermalinkUrls.length} |`);
}
lines.push('');
if (broken.length > 0) {
lines.push('### Broken URLs\n');
lines.push('| URL | Status | Files |');
lines.push('|-----|--------|-------|');
for (const result of broken) {
const statusText = result.error ? `Error: ${result.error}` : `HTTP ${result.status}`;
const files = urlMap.get(result.url).map((f) => `\`${f}\``).join(', ');
lines.push(`| ${result.url} | ${statusText} | ${files} |`);
}
lines.push('');
}
if (nonPermalinkUrls.length > 0) {
lines.push('### ⚠️ URLs Missing Permalink Format (warning)\n');
lines.push('> All mattermost.com links should route via `https://mattermost.com/pl/`\n');
lines.push('<details>\n<summary>Show URLs</summary>\n');
lines.push('| URL | Files |');
lines.push('|-----|-------|');
for (const item of nonPermalinkUrls) {
const files = item.files.map((f) => `\`${f}\``).join(', ');
lines.push(`| ${item.url} | ${files} |`);
}
lines.push('\n</details>');
lines.push('');
}
return lines.join('\n');
}
async function main() {
const args = process.argv.slice(2);
const includeTests = args.includes('--include-tests');
const jsonOutput = args.includes('--json');
const markdownOutput = args.includes('--markdown');
const rootDir = process.cwd();
if (!markdownOutput) {
console.log(chalk.inverse.bold(' Checking mattermost.com links in webapp... ') + '\n');
if (includeTests) {
console.log(chalk.yellow('Including test files in scan\n'));
}
}
const urlMap = findAllMattermostUrls(rootDir, !includeTests);
if (!markdownOutput) {
console.log(`Found ${chalk.bold(urlMap.size)} unique mattermost.com URLs\n`);
}
if (urlMap.size === 0) {
if (markdownOutput) {
console.log('## External Link Check Results\n\n⚠ No URLs found to check');
} else {
console.log(chalk.yellow('No URLs found to check'));
}
return 0;
}
const nonPermalinkUrls = findNonPermalinkUrls(urlMap);
const results = await checkUrls(urlMap, 5, markdownOutput);
const brokenCount = results.filter((r) => !r.ok).length;
if (markdownOutput) {
console.log(generateMarkdownSummary(results, urlMap, nonPermalinkUrls));
return brokenCount > 0 ? 1 : 0;
}
if (jsonOutput) {
const output = {
total: results.length,
working: results.filter((r) => r.ok).length,
broken: results.filter((r) => !r.ok).map((r) => ({
url: r.url,
status: r.status,
error: r.error,
files: urlMap.get(r.url),
})),
nonPermalink: nonPermalinkUrls,
};
console.log(JSON.stringify(output, null, 2));
return brokenCount > 0 ? 1 : 0;
}
return printResults(results, urlMap, nonPermalinkUrls);
}
main().then((exitCode) => {
process.exitCode = exitCode;
}).catch((error) => {
console.error(chalk.red('Error:'), error);
process.exitCode = 1;
});