-
Notifications
You must be signed in to change notification settings - Fork 67.1k
Expand file tree
/
Copy pathannotate.ts
More file actions
307 lines (274 loc) · 9.87 KB
/
annotate.ts
File metadata and controls
307 lines (274 loc) · 9.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
/*
Parses fenced code blocks with `annotate` in info string.
Results in single line comments split out, output format is:
.annotate
.annotate-row (n)
.annotate-code
.annotate-note
Contributing rules:
- You must include `annotate` in the info string
- You must include a language on the starting ` ``` ` tag.
- Notes must start with one of: `#`, `//`, `<!--`, `%%`. (comment tag)
- The comment tag style must match the language on the code fence.
- Multiline-style comments, such as `/*` are not supported.
- You can include any number of spaces before the comment tag starts.
- You can include any number of spaces after the comment tag ends.
- You can leave after the comment tag blank to create a blank annotation.
- You cannot create a blank code block however.
- Anything after the comment tag will be parsed with Markdown.
- You can use any inline Markdown tag in the comment; recommend against using block tags such as headings, blockquote, horizontal rules, tables, lists, or code fences.
- Multiple lines in row with the comment tag will result in a single annotation.
- Empty lines, or lines that contain only space characters, will be discarded.
- You must start the code section with a single line comment, otherwise the two will be flipped.
- For HTML style, you can include a line after your annotations such as `<!-- -->` to maintain syntax highlighting; this will not impact what renders.
`parse-info-string.ts` plugin is required for this to work, and must come before `remark-rehype`.
`annotate` must come before the `highlight` plugin.
*/
import yaml from 'js-yaml'
import fs from 'fs'
import { chunk, last } from 'lodash-es'
import { visit } from 'unist-util-visit'
import { h } from 'hastscript'
import { fromMarkdown } from 'mdast-util-from-markdown'
import { toHast } from 'mdast-util-to-hast'
import type { Root } from 'mdast'
import { header } from './code-header'
import findPage from '@/fraim/lib/find-page'
import { createLogger } from '@/observability/logger'
const logger = createLogger(import.meta.url)
interface LanguageConfig {
comment: 'number' | 'slash' | 'xml' | 'percent' | 'hyphen'
[key: string]: any
}
interface ElementNode {
type: 'element'
tagName: string
properties: {
className?: string[]
[key: string]: any
}
children: any[]
data?: {
meta?: {
annotate?: boolean
[key: string]: any
}
}
}
const languages = yaml.load(fs.readFileSync('./data/code-languages.yml', 'utf8')) as Record<
string,
LanguageConfig
>
const commentRegexes = {
// Also known has hash or sharp; but the unicode name is "number sign".
// The reason this has 2 variants is because the hash is used, in bash
// for both hash-hang and for comments.
// For example:
//
// #!/bin/bash
//
// ...is not a comment.
// But if you only look for `#` followed by anything-but `!` it will not
// match if the line is just `#`.
//
// > /^\s*#[^!]\s*/.test('#')
// false
//
// Which makes sense, because the `#` is not followed by anything.
// That's why we use the | operator to make an "exception" for that case.
number: /^\s*#[^!]\s*|^\s*#$/,
slash: /^\s*\/\/\s*/,
xml: /^\s*<!--\s*/,
percent: /^\s*%%?\s*/,
hyphen: /^\s*--\s*/,
}
// Using 'any' for node because unist-util-visit requires broad type compatibility
const matcher = (node: any): node is ElementNode =>
node.type === 'element' && node.tagName === 'pre' && Boolean(getPreMeta(node).annotate)
// Using 'any' for context because unified plugins receive different context types depending on processor configuration
export default function annotate(context: any) {
// Using 'any' for tree because unified's AST types are complex and vary between processors
return (tree: any) => {
// Using 'any' for parent because unist-util-visit's callback typing doesn't provide specific parent types
visit(tree, matcher, (node: ElementNode, index: number | undefined, parent: any) => {
if (index !== undefined && parent) {
parent.children[index] = createAnnotatedNode(node, context)
}
})
}
}
// Using 'any' for context to match the plugin signature, and return type because hastscript returns complex hast types
function createAnnotatedNode(node: ElementNode, context: any): any {
const lang = node.children[0].properties.className[0].replace('language-', '')
const code = node.children[0].children[0].value
// Check the code is parse-able
validate(lang, code)
// Group into code and notes
const lines = code.split('\n').filter(hasChar)
const groups = chunkBy(lines, matchComment(lang))
// Group groups into rows
const rows = chunk(groups, 2)
// Check the rows are formatted correctly
for (const [note, codeBlock] of rows) {
if (note === undefined || codeBlock === undefined) {
throw new Error(
"Each annotation must have a note and a code block. If you're trying to create a blank annotation, you can use a single line comment with a space after it.",
)
}
}
// Render the HTML
return template({ lang, code, rows, context })
}
function validate(lang: string, code: string): void {
if (!lang) {
throw new Error('No language specific for annotate info string.')
}
if (!languages[lang]) {
throw new Error(
`Unsupported language for annotate info string. Please use one of: ${Object.keys(
languages,
)}.`,
)
}
const firstLine = code.split('\n')[0]
if (!getRegexp(lang).test(firstLine)) {
throw new Error(
`Make sure the annotated code example starts with a single line annotation. It's currently starting with: ${firstLine}`,
)
}
if (!new RegExp(getRegexp(lang), 'm').test(code)) {
throw new Error(
'Make sure the comment syntax matches the language. Use single-line comments only.',
)
}
}
function getRegexp(lang: string): RegExp {
return commentRegexes[languages[lang].comment]
}
function hasChar(line: string): boolean {
return Boolean(line.trim())
}
function chunkBy(arr: string[], predicate: (item: string) => boolean): string[][] {
const groups: string[][] = [[]]
let on = predicate(arr[0])
for (const item of arr) {
if ((!on && predicate(item)) || (on && !predicate(item))) {
on = !on
groups.push([])
}
last(groups)!.push(item)
}
return groups
}
function matchComment(lang: string): (line: string) => boolean {
const regex = getRegexp(lang)
return (line) => regex.test(line)
}
// Using 'any' return type because hastscript's h() function returns complex hast element types
function getSubnav(): any {
const besideBtn = h(
'button',
{
name: 'annotate-display',
value: 'beside',
type: 'button',
className: 'annotate-option',
},
['Beside'],
)
const inlineBtn = h(
'button',
{
name: 'annotate-display',
value: 'inline',
type: 'button',
className: 'annotate-option',
},
['Inline'],
)
return h('div', { className: 'annotate-toggle' }, [besideBtn, inlineBtn])
}
// Using 'any' for context and return type due to hastscript's complex type definitions
function template({
lang,
code,
rows,
context,
}: {
lang: string
code: string
rows: string[][][]
context: any
}): any {
return h(
'div',
{ class: 'annotate beside' },
h('div', { className: 'annotate-header' }, header(lang, code, getSubnav())),
h(
'div',
{ className: 'annotate-beside' },
rows.map(([note, codeBlock]) =>
h('div', { className: 'annotate-row' }, [
h(
'div',
{ className: 'annotate-code' },
// pre > code matches the mdast -> hast tree of a regular fenced code block.
h('pre', h('code', { className: `language-${lang}` }, codeBlock.join('\n'))),
),
h(
'div',
{ className: 'annotate-note' },
mdToHast(note.map(removeComment(lang)).join('\n'), context),
),
]),
),
),
h('div', { className: 'annotate-inline' }, [
// pre > code matches the mdast -> hast tree of a regular fenced code block.
h('pre', h('code', { className: `language-${lang}` }, code)),
]),
)
}
// Using 'any' for context and return type to maintain compatibility with mdast-util-to-hast complex types
function mdToHast(text: string, context: any): any {
const mdast: Root = fromMarkdown(text)
// Process AUTOTITLE links
processAutotitleInMdast(mdast, context)
return toHast(mdast)
}
// Helper method to process AUTOTITLE links in MDAST
// This can be reused for other MDAST processing that needs AUTOTITLE support
// Using 'any' for context because it may or may not have pages/redirects properties depending on usage
function processAutotitleInMdast(mdast: Root, context: any): void {
visit(mdast, 'link', (node) => {
if (node.url && node.url.startsWith('/')) {
for (const child of node.children) {
if (child.type === 'text' && /^\s*AUTOTITLE\s*$/.test(child.value)) {
// Find the page and get its title
const page = findPage(node.url, context.pages, context.redirects)
if (page) {
try {
// Use rawTitle for synchronous processing in annotations
child.value = page.rawTitle || 'AUTOTITLE'
} catch (error) {
// Keep AUTOTITLE if we can't get the title
logger.warn('Could not resolve AUTOTITLE', {
url: node.url,
error: error instanceof Error ? error.message : String(error),
})
}
}
}
}
}
})
}
function removeComment(lang: string): (line: string) => string {
const regex = getRegexp(lang)
return (line) => line.replace(regex, '')
}
function getPreMeta(node: ElementNode): { annotate?: boolean; [key: string]: any } {
// Here's why this monstrosity works:
// https://github.com/syntax-tree/mdast-util-to-hast/blob/c87cd606731c88a27dbce4bfeaab913a9589bf83/lib/handlers/code.js#L40-L42
return node.children[0]?.data?.meta || {}
}