Skip to content

Commit ced3c88

Browse files
committed
fix(site): normalize generated doc links
1 parent b522de3 commit ced3c88

4 files changed

Lines changed: 219 additions & 1 deletion

File tree

site/astro.config.mjs

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,11 @@ function normalizeGuideMarkdown() {
4949
}
5050

5151
function repoUrl(url) {
52+
const renderedDocsUrl = renderedDocsRepoUrl(url);
53+
if (renderedDocsUrl) {
54+
return renderedDocsUrl;
55+
}
56+
5257
if (
5358
url.startsWith("http://") ||
5459
url.startsWith("https://") ||
@@ -77,6 +82,31 @@ function repoUrl(url) {
7782
return null;
7883
}
7984

85+
function renderedDocsRepoUrl(url) {
86+
const prefix = "/docs/";
87+
if (!url.startsWith(prefix)) {
88+
return null;
89+
}
90+
91+
const docsPath = url.slice(prefix.length);
92+
if (docsPath === "README.md" || docsPath === "SECURITY.md") {
93+
return `https://github.com/everruns/bashkit/blob/main/${docsPath}`;
94+
}
95+
96+
const specsIndex = docsPath.indexOf("specs/");
97+
if (specsIndex >= 0) {
98+
return `https://github.com/everruns/bashkit/blob/main/${docsPath.slice(specsIndex)}`;
99+
}
100+
101+
const cratesDocsIndex = docsPath.indexOf("crates/bashkit/docs/");
102+
if (cratesDocsIndex >= 0) {
103+
const rustdocPath = docsPath.slice(cratesDocsIndex);
104+
return `https://github.com/everruns/bashkit/blob/main/${rustdocPath}`;
105+
}
106+
107+
return null;
108+
}
109+
80110
function rewriteRenderedLinks() {
81111
return (tree) => {
82112
visit(tree, (node) => {

site/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
"scripts": {
1111
"dev": "astro dev",
1212
"build": "astro build",
13-
"postbuild": "node scripts/verify-doc-routes.mjs && node scripts/verify-sitemap.mjs && node scripts/verify-robots.mjs && node scripts/verify-agent-skills.mjs && node scripts/verify-link-headers.mjs",
13+
"postbuild": "node scripts/normalize-generated-html.mjs && node scripts/verify-doc-routes.mjs && node scripts/verify-public-links.mjs && node scripts/verify-sitemap.mjs && node scripts/verify-robots.mjs && node scripts/verify-agent-skills.mjs && node scripts/verify-link-headers.mjs",
1414
"preview": "wrangler dev",
1515
"deploy": "npm run build && wrangler deploy",
1616
"check": "astro check",
Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
// Decision: rustdoc-compatible examples use `# ` setup lines so doctests can
2+
// compile while docs hide boilerplate. Astro/Shiki renders those markers, so
3+
// normalize generated HTML before deploy.
4+
import { readdirSync, readFileSync, statSync, writeFileSync } from "node:fs";
5+
import path from "node:path";
6+
import { fileURLToPath } from "node:url";
7+
8+
const scriptDir = path.dirname(fileURLToPath(import.meta.url));
9+
const siteRoot = path.resolve(scriptDir, "..");
10+
const distRoot = path.join(siteRoot, "dist");
11+
let changedFiles = 0;
12+
let hiddenLines = 0;
13+
let remainingHiddenLines = 0;
14+
15+
for (const filePath of htmlFiles(distRoot)) {
16+
const html = readFileSync(filePath, "utf8");
17+
const normalized = normalizeRustdocHtml(html);
18+
remainingHiddenLines += countRustdocHiddenLines(normalized);
19+
20+
if (normalized !== html) {
21+
changedFiles += 1;
22+
writeFileSync(filePath, normalized);
23+
}
24+
}
25+
26+
if (remainingHiddenLines > 0) {
27+
throw new Error(`Generated HTML still contains ${remainingHiddenLines} rustdoc hidden line(s).`);
28+
}
29+
30+
console.log(
31+
`Normalized ${hiddenLines} rustdoc hidden line(s) in ${changedFiles} generated HTML file(s).`,
32+
);
33+
34+
function* htmlFiles(dir) {
35+
for (const name of readdirSync(dir)) {
36+
const filePath = path.join(dir, name);
37+
const stats = statSync(filePath);
38+
39+
if (stats.isDirectory()) {
40+
yield* htmlFiles(filePath);
41+
continue;
42+
}
43+
44+
if (name.endsWith(".html")) {
45+
yield filePath;
46+
}
47+
}
48+
}
49+
50+
function normalizeRustdocHtml(html) {
51+
return html.replace(
52+
/(<pre\b[^>]*\bdata-language="(?:rust|rs)"[^>]*><code>)([\s\S]*?)(<\/code><\/pre>)/g,
53+
(_, open, code, close) => `${open}${normalizeRustdocCode(code)}${close}`,
54+
);
55+
}
56+
57+
function normalizeRustdocCode(code) {
58+
const lines = code.split(/\n(?=<span class="line">)/);
59+
const kept = [];
60+
61+
for (const line of lines) {
62+
const text = visiblePrefix(line);
63+
const escaped = /^(\s*)##/.exec(text);
64+
if (escaped) {
65+
kept.push(removeHashAfterIndent(line, escaped[1].length));
66+
continue;
67+
}
68+
69+
if (/^\s*#(?:\s|$)/.test(text)) {
70+
hiddenLines += 1;
71+
continue;
72+
}
73+
74+
kept.push(line);
75+
}
76+
77+
return kept.join("\n");
78+
}
79+
80+
function countRustdocHiddenLines(html) {
81+
let count = 0;
82+
html.replace(
83+
/<pre\b[^>]*\bdata-language="(?:rust|rs)"[^>]*><code>([\s\S]*?)<\/code><\/pre>/g,
84+
(_, code) => {
85+
for (const line of code.split(/\n(?=<span class="line">)/)) {
86+
const text = visiblePrefix(line);
87+
if (/^\s*#(?:\s|$)/.test(text)) {
88+
count += 1;
89+
}
90+
}
91+
return "";
92+
},
93+
);
94+
return count;
95+
}
96+
97+
function visiblePrefix(line) {
98+
let prefix = "";
99+
for (const match of line.matchAll(/>([^<]*)/g)) {
100+
prefix += match[1];
101+
if (/^\s*##/.test(prefix) || /^\s*#(?:\s|$)/.test(prefix)) {
102+
return prefix;
103+
}
104+
if (prefix.trimStart().length > 0) {
105+
return prefix;
106+
}
107+
}
108+
109+
return prefix;
110+
}
111+
112+
function removeHashAfterIndent(html, indentLength) {
113+
let remaining = indentLength;
114+
let removed = false;
115+
116+
return html.replace(/(>)([^<]*)/g, (match, close, text) => {
117+
if (removed) {
118+
return match;
119+
}
120+
121+
if (remaining >= text.length) {
122+
remaining -= text.length;
123+
return match;
124+
}
125+
126+
removed = true;
127+
return `${close}${text.slice(0, remaining)}${text.slice(remaining + 1)}`;
128+
});
129+
}
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
// Decision: generated public HTML must not link to repo-internal markdown paths.
2+
// Internal docs/specs files are valid in GitHub, but bashkit.sh does not serve
3+
// raw .md routes, so local markdown hrefs become crawler-visible 404s.
4+
import { readdirSync, readFileSync, statSync } from "node:fs";
5+
import path from "node:path";
6+
import { fileURLToPath } from "node:url";
7+
8+
const SITE_URL = "https://bashkit.sh";
9+
const scriptDir = path.dirname(fileURLToPath(import.meta.url));
10+
const siteRoot = path.resolve(scriptDir, "..");
11+
const distRoot = path.join(siteRoot, "dist");
12+
const localMarkdownLinks = [];
13+
14+
collectHtmlFiles(distRoot);
15+
16+
if (localMarkdownLinks.length > 0) {
17+
const details = localMarkdownLinks
18+
.map(({ filePath, href }) => `${path.relative(distRoot, filePath)} -> ${href}`)
19+
.join("\n");
20+
throw new Error(`Generated HTML contains local markdown links:\n${details}`);
21+
}
22+
23+
console.log("Verified generated HTML has no local markdown links.");
24+
25+
function collectHtmlFiles(dir) {
26+
for (const name of readdirSync(dir)) {
27+
const filePath = path.join(dir, name);
28+
const stats = statSync(filePath);
29+
30+
if (stats.isDirectory()) {
31+
collectHtmlFiles(filePath);
32+
continue;
33+
}
34+
35+
if (!name.endsWith(".html")) {
36+
continue;
37+
}
38+
39+
const html = readFileSync(filePath, "utf8");
40+
for (const href of html.matchAll(/\shref="([^"]+\.md(?:#[^"]*)?)"/g)) {
41+
if (isLocalMarkdownHref(href[1])) {
42+
localMarkdownLinks.push({ filePath, href: href[1] });
43+
}
44+
}
45+
}
46+
}
47+
48+
function isLocalMarkdownHref(href) {
49+
if (href.startsWith(`${SITE_URL}/`)) {
50+
return true;
51+
}
52+
53+
return (
54+
!href.startsWith("http://") &&
55+
!href.startsWith("https://") &&
56+
!href.startsWith("mailto:") &&
57+
!href.startsWith("#")
58+
);
59+
}

0 commit comments

Comments
 (0)