From ced3c88513b1eaefcc4b1366defcc4e31684fd21 Mon Sep 17 00:00:00 2001 From: Mykhailo Chalyi Date: Tue, 5 May 2026 22:28:32 -0500 Subject: [PATCH] fix(site): normalize generated doc links --- site/astro.config.mjs | 30 +++++ site/package.json | 2 +- site/scripts/normalize-generated-html.mjs | 129 ++++++++++++++++++++++ site/scripts/verify-public-links.mjs | 59 ++++++++++ 4 files changed, 219 insertions(+), 1 deletion(-) create mode 100644 site/scripts/normalize-generated-html.mjs create mode 100644 site/scripts/verify-public-links.mjs diff --git a/site/astro.config.mjs b/site/astro.config.mjs index ee9b7501..a2f9f962 100644 --- a/site/astro.config.mjs +++ b/site/astro.config.mjs @@ -49,6 +49,11 @@ function normalizeGuideMarkdown() { } function repoUrl(url) { + const renderedDocsUrl = renderedDocsRepoUrl(url); + if (renderedDocsUrl) { + return renderedDocsUrl; + } + if ( url.startsWith("http://") || url.startsWith("https://") || @@ -77,6 +82,31 @@ function repoUrl(url) { return null; } +function renderedDocsRepoUrl(url) { + const prefix = "/docs/"; + if (!url.startsWith(prefix)) { + return null; + } + + const docsPath = url.slice(prefix.length); + if (docsPath === "README.md" || docsPath === "SECURITY.md") { + return `https://github.com/everruns/bashkit/blob/main/${docsPath}`; + } + + const specsIndex = docsPath.indexOf("specs/"); + if (specsIndex >= 0) { + return `https://github.com/everruns/bashkit/blob/main/${docsPath.slice(specsIndex)}`; + } + + const cratesDocsIndex = docsPath.indexOf("crates/bashkit/docs/"); + if (cratesDocsIndex >= 0) { + const rustdocPath = docsPath.slice(cratesDocsIndex); + return `https://github.com/everruns/bashkit/blob/main/${rustdocPath}`; + } + + return null; +} + function rewriteRenderedLinks() { return (tree) => { visit(tree, (node) => { diff --git a/site/package.json b/site/package.json index 5bd12aca..8e18aff0 100644 --- a/site/package.json +++ b/site/package.json @@ -10,7 +10,7 @@ "scripts": { "dev": "astro dev", "build": "astro build", - "postbuild": "node scripts/verify-doc-routes.mjs && node scripts/verify-sitemap.mjs && node scripts/verify-robots.mjs && node scripts/verify-agent-skills.mjs && node scripts/verify-link-headers.mjs", + "postbuild": "node scripts/normalize-generated-html.mjs && node scripts/verify-doc-routes.mjs && node scripts/verify-public-links.mjs && node scripts/verify-sitemap.mjs && node scripts/verify-robots.mjs && node scripts/verify-agent-skills.mjs && node scripts/verify-link-headers.mjs", "preview": "wrangler dev", "deploy": "npm run build && wrangler deploy", "check": "astro check", diff --git a/site/scripts/normalize-generated-html.mjs b/site/scripts/normalize-generated-html.mjs new file mode 100644 index 00000000..dd3c2c66 --- /dev/null +++ b/site/scripts/normalize-generated-html.mjs @@ -0,0 +1,129 @@ +// Decision: rustdoc-compatible examples use `# ` setup lines so doctests can +// compile while docs hide boilerplate. Astro/Shiki renders those markers, so +// normalize generated HTML before deploy. +import { readdirSync, readFileSync, statSync, writeFileSync } from "node:fs"; +import path from "node:path"; +import { fileURLToPath } from "node:url"; + +const scriptDir = path.dirname(fileURLToPath(import.meta.url)); +const siteRoot = path.resolve(scriptDir, ".."); +const distRoot = path.join(siteRoot, "dist"); +let changedFiles = 0; +let hiddenLines = 0; +let remainingHiddenLines = 0; + +for (const filePath of htmlFiles(distRoot)) { + const html = readFileSync(filePath, "utf8"); + const normalized = normalizeRustdocHtml(html); + remainingHiddenLines += countRustdocHiddenLines(normalized); + + if (normalized !== html) { + changedFiles += 1; + writeFileSync(filePath, normalized); + } +} + +if (remainingHiddenLines > 0) { + throw new Error(`Generated HTML still contains ${remainingHiddenLines} rustdoc hidden line(s).`); +} + +console.log( + `Normalized ${hiddenLines} rustdoc hidden line(s) in ${changedFiles} generated HTML file(s).`, +); + +function* htmlFiles(dir) { + for (const name of readdirSync(dir)) { + const filePath = path.join(dir, name); + const stats = statSync(filePath); + + if (stats.isDirectory()) { + yield* htmlFiles(filePath); + continue; + } + + if (name.endsWith(".html")) { + yield filePath; + } + } +} + +function normalizeRustdocHtml(html) { + return html.replace( + /(]*\bdata-language="(?:rust|rs)"[^>]*>)([\s\S]*?)(<\/code><\/pre>)/g, + (_, open, code, close) => `${open}${normalizeRustdocCode(code)}${close}`, + ); +} + +function normalizeRustdocCode(code) { + const lines = code.split(/\n(?=)/); + const kept = []; + + for (const line of lines) { + const text = visiblePrefix(line); + const escaped = /^(\s*)##/.exec(text); + if (escaped) { + kept.push(removeHashAfterIndent(line, escaped[1].length)); + continue; + } + + if (/^\s*#(?:\s|$)/.test(text)) { + hiddenLines += 1; + continue; + } + + kept.push(line); + } + + return kept.join("\n"); +} + +function countRustdocHiddenLines(html) { + let count = 0; + html.replace( + /]*\bdata-language="(?:rust|rs)"[^>]*>([\s\S]*?)<\/code><\/pre>/g, + (_, code) => { + for (const line of code.split(/\n(?=)/)) { + const text = visiblePrefix(line); + if (/^\s*#(?:\s|$)/.test(text)) { + count += 1; + } + } + return ""; + }, + ); + return count; +} + +function visiblePrefix(line) { + let prefix = ""; + for (const match of line.matchAll(/>([^<]*)/g)) { + prefix += match[1]; + if (/^\s*##/.test(prefix) || /^\s*#(?:\s|$)/.test(prefix)) { + return prefix; + } + if (prefix.trimStart().length > 0) { + return prefix; + } + } + + return prefix; +} + +function removeHashAfterIndent(html, indentLength) { + let remaining = indentLength; + let removed = false; + + return html.replace(/(>)([^<]*)/g, (match, close, text) => { + if (removed) { + return match; + } + + if (remaining >= text.length) { + remaining -= text.length; + return match; + } + + removed = true; + return `${close}${text.slice(0, remaining)}${text.slice(remaining + 1)}`; + }); +} diff --git a/site/scripts/verify-public-links.mjs b/site/scripts/verify-public-links.mjs new file mode 100644 index 00000000..4782a954 --- /dev/null +++ b/site/scripts/verify-public-links.mjs @@ -0,0 +1,59 @@ +// Decision: generated public HTML must not link to repo-internal markdown paths. +// Internal docs/specs files are valid in GitHub, but bashkit.sh does not serve +// raw .md routes, so local markdown hrefs become crawler-visible 404s. +import { readdirSync, readFileSync, statSync } from "node:fs"; +import path from "node:path"; +import { fileURLToPath } from "node:url"; + +const SITE_URL = "https://bashkit.sh"; +const scriptDir = path.dirname(fileURLToPath(import.meta.url)); +const siteRoot = path.resolve(scriptDir, ".."); +const distRoot = path.join(siteRoot, "dist"); +const localMarkdownLinks = []; + +collectHtmlFiles(distRoot); + +if (localMarkdownLinks.length > 0) { + const details = localMarkdownLinks + .map(({ filePath, href }) => `${path.relative(distRoot, filePath)} -> ${href}`) + .join("\n"); + throw new Error(`Generated HTML contains local markdown links:\n${details}`); +} + +console.log("Verified generated HTML has no local markdown links."); + +function collectHtmlFiles(dir) { + for (const name of readdirSync(dir)) { + const filePath = path.join(dir, name); + const stats = statSync(filePath); + + if (stats.isDirectory()) { + collectHtmlFiles(filePath); + continue; + } + + if (!name.endsWith(".html")) { + continue; + } + + const html = readFileSync(filePath, "utf8"); + for (const href of html.matchAll(/\shref="([^"]+\.md(?:#[^"]*)?)"/g)) { + if (isLocalMarkdownHref(href[1])) { + localMarkdownLinks.push({ filePath, href: href[1] }); + } + } + } +} + +function isLocalMarkdownHref(href) { + if (href.startsWith(`${SITE_URL}/`)) { + return true; + } + + return ( + !href.startsWith("http://") && + !href.startsWith("https://") && + !href.startsWith("mailto:") && + !href.startsWith("#") + ); +}