pFad - Phone/Frame/Anonymizer/Declutterfier! Saves Data!


--- a PPN by Garber Painting Akron. With Image Size Reduction included!

URL: http://github.com/strands-agents/docs/commit/dc6502e6380cc760d1a9a811a44159a13f399331

9407.css" /> fix: Inline link detection (#592) · strands-agents/docs@dc6502e · GitHub
Skip to content

Commit dc6502e

Browse files
authored
fix: Inline link detection (#592)
Co-authored-by: Mackenzie Zastrow <zastrowm@users.noreply.github.com>
1 parent fd67481 commit dc6502e

File tree

6 files changed

+333
-4
lines changed

6 files changed

+333
-4
lines changed

CMS-README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -709,3 +709,7 @@ Type declarations in `src/types/turndown-plugin-gfm.d.ts` (no @types package ava
709709
### `astro-broken-links-checker`
710710

711711
This package is pinned to an exact version (`1.0.6`) rather than using a semver range. It's a low-popularity package, so we avoid automatic updates to prevent potentially pulling in malicious or breaking changes without an explicit review. Before upgrading, manually inspect the changelog and diff on the package's repository.
712+
713+
**Known bug:** The upstream plugin does not account for Astro's `base` path configuration, causing it to incorrectly flag all internal links as broken when the site is deployed under a sub-path. See [imazen/astro-broken-link-checker#16](https://github.com/imazen/astro-broken-link-checker/issues/16).
714+
715+
**Local fix:** Rather than waiting for an upstream fix, the plugin source has been inlined into `scripts/astro-broken-links-checker-index.js` and `scripts/astro-broken-links-checker-check-links.js`. The fix captures `config.base` in the `astro:config:setup` hook and strips the base prefix from internal links before resolving them against the `dist/` directory. `astro.config.mjs` imports from the local copy instead of the npm package.

astro.config.mjs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ import { loadSidebarFromMkdocs } from "./src/sidebar.ts"
88
import AutoImport from './src/plugins/astro-auto-import.ts'
99
import astroExpressiveCode from "astro-expressive-code"
1010
import mdx from '@astrojs/mdx';
11-
import astroBrokenLinksChecker from 'astro-broken-links-checker';
11+
import astroBrokenLinksChecker from './scripts/astro-broken-links-checker-index.js';
1212

1313
// Generate sidebar from mkdocs nav (validates against existing content files)
1414
// Top-level groups will be rendered as tabs by the custom Sidebar component

package.json

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,11 @@
3232
"@strands-agents/sdk": "github:strands-agents/sdk-typescript",
3333
"@types/express": "^5.0.5",
3434
"@types/turndown": "^5.0.6",
35-
"astro-broken-links-checker": "1.0.6",
3635
"express": "^5.1.0",
36+
"fast-glob": "^3.3.3",
37+
"node-fetch": "^3.3.2",
38+
"node-html-parser": "^7.1.0",
39+
"p-limit": "^7.3.0",
3740
"prettier": "^3.6.2",
3841
"turndown": "^7.2.2",
3942
"turndown-plugin-gfm": "^1.0.2",
Lines changed: 216 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,216 @@
1+
/**
2+
* Inlined from astro-broken-links-checker v1.0.6
3+
* Original source: https://github.com/imazen/astro-broken-link-checker
4+
* License: Apache-2.0
5+
* Author: Lilith River
6+
*
7+
* This file is a local copy of check-links.js from the plugin, modified to
8+
* allow custom behavior adjustments without forking the upstream package.
9+
*/
10+
import {parse} from 'node-html-parser';
11+
import fs from 'fs';
12+
import fetch from 'node-fetch';
13+
import {URL, fileURLToPath} from 'url';
14+
import path from 'path';
15+
import pLimit from 'p-limit';
16+
17+
export async function checkLinksInHtml(
18+
htmlContent,
19+
brokenLinksMap,
20+
baseUrl,
21+
documentPath,
22+
checkedLinks = new Map(),
23+
distPath = '',
24+
astroConfigRedirects = {},
25+
logger,
26+
checkExternalLinks = true,
27+
trailingSlash = 'ignore',
28+
basePath = '',
29+
) {
30+
const root = parse(htmlContent);
31+
const linkElements = root.querySelectorAll('a[href]');
32+
const links = linkElements.map((el) => el.getAttribute('href'));
33+
// add img src
34+
const imgElements = root.querySelectorAll('img[src]');
35+
const imgLinks = imgElements.map((el) => el.getAttribute('src'));
36+
links.push(...imgLinks);
37+
38+
const limit = pLimit(50); // Limit to 10 concurrent link checks
39+
40+
const checkLinkPromises = links.map((link) =>
41+
limit(async () => {
42+
if (!isValidUrl(link)) {
43+
return;
44+
}
45+
46+
let absoluteLink;
47+
try {
48+
// Differentiate between absolute, domain-relative, and relative links
49+
if (/^https?:\/\//i.test(link) || /^:\/\//i.test(link)) {
50+
// Absolute URL
51+
absoluteLink = link;
52+
} else {
53+
absoluteLink = new URL(link, "https://localhost" + baseUrl).pathname;
54+
// if (link !== absoluteLink) {
55+
// logger.info(`Link ${link} was resolved to ${absoluteLink}`);
56+
// }
57+
}
58+
} catch (err) {
59+
// Invalid URL, skip
60+
logger.error(`Invalid URL in ${normalizePath(documentPath)} ${link} ${err}`);
61+
return;
62+
}
63+
64+
let fetchLink = link;
65+
if (absoluteLink.startsWith('/') && distPath) {
66+
fetchLink = absoluteLink;
67+
}
68+
69+
// Strip the base path prefix from internal links so they resolve correctly
70+
// against the dist directory. e.g. /docs/page -> /page when base is /docs/
71+
let fetchLinkWithoutBase = fetchLink;
72+
if (basePath && fetchLink.startsWith(basePath)) {
73+
fetchLinkWithoutBase = fetchLink.slice(basePath.length) || '/';
74+
}
75+
76+
// Redirect lookup uses the link without base prefix (redirects are defined without base)
77+
if (astroConfigRedirects[fetchLinkWithoutBase]) {
78+
const redirect = astroConfigRedirects[fetchLinkWithoutBase];
79+
if (redirect) {
80+
fetchLinkWithoutBase = redirect.destination ? redirect.destination : redirect;
81+
fetchLink = basePath + fetchLinkWithoutBase;
82+
}
83+
} else if (astroConfigRedirects[fetchLink]) {
84+
// fallback: try with full link including base
85+
const redirect = astroConfigRedirects[fetchLink];
86+
if (redirect) {
87+
fetchLink = redirect.destination ? redirect.destination : redirect;
88+
fetchLinkWithoutBase = basePath && fetchLink.startsWith(basePath)
89+
? fetchLink.slice(basePath.length) || '/'
90+
: fetchLink;
91+
}
92+
}
93+
94+
if (checkedLinks.has(fetchLink)) {
95+
const isBroken = !checkedLinks.get(fetchLink);
96+
if (isBroken) {
97+
addBrokenLink(brokenLinksMap, documentPath, link, distPath);
98+
}
99+
return;
100+
}
101+
102+
let isBroken = false;
103+
104+
if (fetchLink.startsWith('/') && distPath) {
105+
// Internal link in build mode, check if file exists.
106+
// Astro's base path is part of the URL but NOT reflected in the dist
107+
// directory structure — files are output at the root of dist/.
108+
// So we strip the base prefix and resolve against distPath directly.
109+
const relativePath = fetchLinkWithoutBase;
110+
// Potential file paths to check
111+
const possiblePaths = [
112+
path.join(distPath, relativePath),
113+
path.join(distPath, relativePath, 'index.html'),
114+
path.join(distPath, `${relativePath}.html`),
115+
];
116+
117+
// Check if any of the possible paths exist
118+
if (!possiblePaths.some((p) => fs.existsSync(p))) {
119+
// console.log('Failed paths', possiblePaths);
120+
isBroken = true;
121+
// Fall back to checking a redirect file if it exists.
122+
}
123+
124+
// check trailing slash is correct on internal links
125+
const re = /\/$|\.[a-z0-9]+$/; // match trailing slash or file extension
126+
if (trailingSlash === 'always' && !fetchLink.match(re)) {
127+
isBroken = true;
128+
} else if (trailingSlash === 'never' && fetchLink !== '/' && fetchLink.endsWith('/')) {
129+
isBroken = true;
130+
}
131+
} else {
132+
// External link, check via HTTP request. Retry 3 times if ECONNRESET
133+
if (checkExternalLinks) {
134+
let retries = 0;
135+
while (retries < 3) {
136+
try {
137+
const response = await fetch(fetchLink, {method: 'GET'});
138+
isBroken = !response.ok;
139+
if (isBroken) {
140+
logger.error(`${response.status} Error fetching ${fetchLink}`);
141+
}
142+
break;
143+
} catch (error) {
144+
isBroken = true;
145+
let statusCodeNumber = error.errno === 'ENOTFOUND' ? 404 : (error.errno);
146+
logger.error(`${statusCodeNumber} error fetching ${fetchLink}`);
147+
if (error.errno === 'ECONNRESET') {
148+
retries++;
149+
continue;
150+
}
151+
break;
152+
}
153+
}
154+
}
155+
}
156+
157+
// Cache the link's validity
158+
checkedLinks.set(fetchLink, !isBroken);
159+
checkedLinks.set(absoluteLink, !isBroken);
160+
161+
if (isBroken) {
162+
addBrokenLink(brokenLinksMap, documentPath, link, distPath);
163+
}
164+
})
165+
);
166+
167+
await Promise.all(checkLinkPromises);
168+
}
169+
170+
function isValidUrl(url) {
171+
// Skip mailto:, tel:, javascript:, and empty links
172+
return !(
173+
url.startsWith('mailto:') ||
174+
url.startsWith('tel:') ||
175+
url.startsWith('javascript:') ||
176+
url.startsWith('#') ||
177+
url.trim() === ''
178+
);
179+
}
180+
181+
function normalizePath(p) {
182+
p = p.toString();
183+
// Remove query parameters and fragments
184+
p = p.split('?')[0].split('#')[0];
185+
186+
// Remove '/index.html' or '.html' suffixes
187+
if (p.endsWith('/index.html')) {
188+
p = p.slice(0, -'index.html'.length);
189+
} else if (p.endsWith('.html')) {
190+
p = p.slice(0, -'.html'.length);
191+
}
192+
193+
// Ensure leading '/'
194+
if (!p.startsWith('/')) {
195+
p = '/' + p;
196+
}
197+
198+
return p;
199+
}
200+
201+
export function normalizeHtmlFilePath(filePath, distPath = '') {
202+
return normalizePath(distPath ? path.relative(distPath, filePath) : filePath);
203+
}
204+
205+
function addBrokenLink(brokenLinksMap, documentPath, brokenLink, distPath) {
206+
// Normalize document path
207+
documentPath = normalizeHtmlFilePath(documentPath, distPath);
208+
209+
// Normalize broken link for reporting
210+
let normalizedBrokenLink = brokenLink;
211+
212+
if (!brokenLinksMap.has(normalizedBrokenLink)) {
213+
brokenLinksMap.set(normalizedBrokenLink, new Set());
214+
}
215+
brokenLinksMap.get(normalizedBrokenLink).add(documentPath);
216+
}
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
/**
2+
* Inlined from astro-broken-links-checker v1.0.6
3+
* Original source: https://github.com/imazen/astro-broken-link-checker
4+
* License: Apache-2.0
5+
* Author: Lilith River
6+
*
7+
* This file is a local copy of index.js from the plugin, modified to
8+
* allow custom behavior adjustments without forking the upstream package.
9+
*/
10+
import {fileURLToPath} from 'url';
11+
import {join} from 'path';
12+
import fs from 'fs';
13+
import {checkLinksInHtml, normalizeHtmlFilePath} from './astro-broken-links-checker-check-links.js';
14+
import fastGlob from 'fast-glob';
15+
16+
export default function astroBrokenLinksChecker(options = {}) {
17+
const logFilePath = options.logFilePath || 'broken-links.log';
18+
const brokenLinksMap = new Map(); // Map of brokenLink -> Set of documents
19+
const checkedLinks = new Map();
20+
21+
return {
22+
name: 'astro-broken-links-checker',
23+
hooks: {
24+
'astro:config:setup': async ({config}) => {
25+
//console.log('config.redirects', config.redirects);
26+
// save the redirects to the options
27+
options.astroConfigRedirects = config.redirects;
28+
29+
// use astro trailingSlash setting, falling back to astro default of 'ignore'
30+
options.trailingSlash = config.trailingSlash || 'ignore';
31+
32+
// capture base path so internal links can be resolved correctly
33+
// normalize to always have a leading slash and no trailing slash
34+
const rawBase = config.base || '/';
35+
options.basePath = rawBase === '/' ? '' : rawBase.replace(/\/$/, '');
36+
},
37+
38+
'astro:build:done': async ({dir, logger}) => {
39+
const astroConfigRedirects = options.astroConfigRedirects;
40+
//console.log('astroConfigRedirects', astroConfigRedirects);
41+
const distPath = fileURLToPath(dir);
42+
const htmlFiles = await fastGlob('**/*.html', {cwd: distPath});
43+
logger.info(`Checking ${htmlFiles.length} html pages for broken links`);
44+
// start time
45+
const startTime = Date.now();
46+
const checkHtmlPromises = htmlFiles.map(async (htmlFile) => {
47+
const absoluteHtmlFilePath = join(distPath, htmlFile);
48+
const htmlContent = fs.readFileSync(absoluteHtmlFilePath, 'utf8');
49+
const baseUrl = normalizeHtmlFilePath(absoluteHtmlFilePath, distPath);
50+
await checkLinksInHtml(
51+
htmlContent,
52+
brokenLinksMap,
53+
baseUrl,
54+
absoluteHtmlFilePath, // Document path
55+
checkedLinks,
56+
distPath,
57+
astroConfigRedirects,
58+
logger,
59+
options.checkExternalLinks,
60+
options.trailingSlash,
61+
options.basePath || '',
62+
);
63+
});
64+
65+
await Promise.all(checkHtmlPromises);
66+
logBrokenLinks(brokenLinksMap, logFilePath, logger);
67+
68+
// end time
69+
const endTime = Date.now();
70+
logger.info(`Time to check links: ${endTime - startTime} ms`);
71+
72+
// stop the build if we have broken links and the option is set
73+
if (options.throwError && brokenLinksMap.size > 0) {
74+
throw new Error(`Broken links detected. Check the log file: ${logFilePath}`);
75+
}
76+
},
77+
},
78+
};
79+
}
80+
81+
function logBrokenLinks(brokenLinksMap, logFilePath, logger) {
82+
if (brokenLinksMap.size > 0) {
83+
let logData = '';
84+
for (const [brokenLink, documentsSet] of brokenLinksMap.entries()) {
85+
const documents = Array.from(documentsSet);
86+
logData += `Broken link: ${brokenLink}\n Found in:\n`;
87+
for (const doc of documents) {
88+
logData += ` - ${doc}\n`;
89+
}
90+
}
91+
logData = logData.trim();
92+
if (logFilePath) {
93+
fs.writeFileSync(logFilePath, logData, 'utf8');
94+
logger.info(`Broken links have been logged to ${logFilePath}`);
95+
logger.info(logData);
96+
} else {
97+
logger.info(logData);
98+
}
99+
} else {
100+
logger.info('No broken links detected.');
101+
if (fs.existsSync(logFilePath)) {
102+
logger.info('Removing old log file:', logFilePath);
103+
fs.rmSync(logFilePath);
104+
}
105+
}
106+
}

src/util/links.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -300,7 +300,7 @@ export function resolveHref(
300300
const knownStaticFiles = ['llms.txt', 'llms-full.txt']
301301
for (const file of knownStaticFiles) {
302302
if (href === file || href === `/${file}` || href.endsWith(`/${file}`)) {
303-
return { resolvedHref: pathWithBase(`/${file}`), found: true }
303+
return { resolvedHref: `/${file}`, found: true }
304304
}
305305
}
306306

@@ -313,7 +313,7 @@ export function resolveHref(
313313
const slugPart = pathOnly.replace(/^\//, '').replace(/\/$/, '')
314314
const found = docSlugs.has(slugPart)
315315
// Apply base path for @api links since they resolve to absolute paths
316-
return { resolvedHref: pathWithBase(resolved), found }
316+
return { resolvedHref: resolved, found }
317317
}
318318

319319
if (!isRelativeLink(href)) {

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad © 2024 Your Company Name. All rights reserved.





Check this box to remove all script contents from the fetched content.



Check this box to remove all images from the fetched content.


Check this box to remove all CSS styles from the fetched content.


Check this box to keep images inefficiently compressed and original size.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy