0
0
mirror of https://github.com/naturalcrit/homebrewery.git synced 2025-12-24 22:52:40 +00:00
Files
homebrewery/shared/markdown.js
Trevor Buckner 31c034c029 Rework Marked custom HTML renderer to skip preprocess step
Marked Variables are getting cleared when the custom HTML renderer runs, because Marked.parse re-runs the whole pipeline, including the preprocessor.

Preprocess should only be run once globally during the pipeline, or the original results get overwritten (Marked Variables clears its global array of variables each time it is run)
2025-11-19 23:53:39 -05:00

570 lines
20 KiB
JavaScript

/* eslint-disable max-depth */
/* eslint-disable max-lines */
import _ from 'lodash';
import { marked as Marked } from 'marked';
import MarkedExtendedTables from 'marked-extended-tables';
import MarkedDefinitionLists from 'marked-definition-lists';
import MarkedAlignedParagraphs from 'marked-alignment-paragraphs';
import MarkedNonbreakingSpaces from 'marked-nonbreaking-spaces';
import MarkedSubSuperText from 'marked-subsuper-text';
import { markedVariables,
setMarkedVariablePage,
setMarkedVariable,
getMarkedVariable } from 'marked-variables';
import { markedSmartypantsLite as MarkedSmartypantsLite } from 'marked-smartypants-lite';
import { gfmHeadingId as MarkedGFMHeadingId, resetHeadings as MarkedGFMResetHeadingIDs } from 'marked-gfm-heading-id';
import { markedEmoji as MarkedEmojis } from 'marked-emoji';
//Icon fonts included so they can appear in emoji autosuggest dropdown
import diceFont from '../themes/fonts/iconFonts/diceFont.js';
import elderberryInn from '../themes/fonts/iconFonts/elderberryInn.js';
import gameIcons from '../themes/fonts/iconFonts/gameIcons.js';
import fontAwesome from '../themes/fonts/iconFonts/fontAwesome.js';
const renderer = new Marked.Renderer();
const tokenizer = new Marked.Tokenizer();
//Processes the markdown within an HTML block if it's just a class-wrapper
renderer.html = function (token) {
let html = token.text;
if(_.startsWith(_.trim(html), '<div') && _.endsWith(_.trim(html), '</div>')){
const openTag = html.substring(0, html.indexOf('>')+1);
html = html.substring(html.indexOf('>')+1);
html = html.substring(0, html.lastIndexOf('</div>'));
// Repeat the markdown processing for content inside the div, minus the preprocessing and postprocessing hooks which should only run once globally
const opts = Marked.defaults;
const tokens = Marked.lexer(html, opts);
Marked.walkTokens(tokens, opts.walkTokens);
return `${openTag} ${Marked.parser(tokens, opts)} </div>`;
}
return html;
};
// Don't wrap {{ Spans alone on a line, or {{ Divs in <p> tags
renderer.paragraph = function(token){
let match;
const text = this.parser.parseInline(token.tokens);
if(text.startsWith('<div') || text.startsWith('</div'))
return `${text}`;
else if(match = text.match(/(^|^.*?\n)<span class="inline-block(.*?<\/span>)$/))
return `${match[1].trim() ? `<p>${match[1]}</p>` : ''}<span class="inline-block${match[2]}`;
else
return `<p>${text}</p>\n`;
};
//Fix local links in the Preview iFrame to link inside the frame
renderer.link = function (token) {
let { href, title, tokens } = token;
const text = this.parser.parseInline(tokens);
let self = false;
if(href[0] == '#') {
self = true;
}
href = cleanUrl(href);
if(href === null) {
return text;
}
let out = `<a href="${escape(href)}"`;
if(title) {
out += ` title="${escape(title)}"`;
}
if(self) {
out += ' target="_self"';
}
out += `>${text}</a>`;
return out;
};
// Expose `src` attribute as `--HB_src` to make the URL accessible via CSS
renderer.image = function (token) {
const { href, title, text } = token;
if(href === null)
return text;
let out = `<img src="${href}" alt="${text}" style="--HB_src:url(${href});"`;
if(title)
out += ` title="${title}"`;
out += '>';
return out;
};
// Disable default reflink behavior, as it steps on our variables extension
tokenizer.def = function () {
return undefined;
};
const mustacheSpans = {
name : 'mustacheSpans',
level : 'inline', // Is this a block-level or inline-level tokenizer?
start(src) { return src.match(/{{[^{]/)?.index; }, // Hint to Marked.js to stop and check for a match
tokenizer(src, tokens) {
const completeSpan = /^{{[^\n]*}}/; // Regex for the complete token
const inlineRegex = /{{(?=((?:[:=](?:"['\w,\-+*/()#%=?.&:!@$^;:\[\]_= ]*"|[\w\-+*/()#%.]*)|[^"=':{}\s]*)*))\1 *|}}/g;
const match = completeSpan.exec(src);
if(match) {
//Find closing delimiter
let blockCount = 0;
let tags = {};
let endTags = 0;
let endToken = 0;
let delim;
while (delim = inlineRegex.exec(match[0])) {
if(_.isEmpty(tags)) {
tags = processStyleTags(delim[0].substring(2));
endTags = delim[0].length;
}
if(delim[0].startsWith('{{')) {
blockCount++;
} else if(delim[0] == '}}' && blockCount !== 0) {
blockCount--;
if(blockCount == 0) {
endToken = inlineRegex.lastIndex;
break;
}
}
}
if(endToken) {
const raw = src.slice(0, endToken);
const text = raw.slice(endTags || -2, -2);
return { // Token to generate
type : 'mustacheSpans', // Should match "name" above
raw : raw, // Text to consume from the source
text : text, // Additional custom properties
tags : tags,
tokens : this.lexer.inlineTokens(text) // inlineTokens to process **bold**, *italics*, etc.
};
}
}
},
renderer(token) {
const tags = token.tags;
tags.classes = ['inline-block', tags.classes].join(' ').trim();
return `<span` +
`${tags.classes ? ` class="${tags.classes}"` : ''}` +
`${tags.id ? ` id="${tags.id}"` : ''}` +
`${tags.styles ? ` style="${Object.entries(tags.styles).map(([key, value])=>`${key}:${value};`).join(' ')}"` : ''}` +
`${tags.attributes ? ` ${Object.entries(tags.attributes).map(([key, value])=>`${key}="${value}"`).join(' ')}` : ''}` +
`>${this.parser.parseInline(token.tokens)}</span>`; // parseInline to turn child tokens into HTML
}
};
const mustacheDivs = {
name : 'mustacheDivs',
level : 'block',
start(src) { return src.match(/\n *{{[^{]/m)?.index; }, // Hint to Marked.js to stop and check for a match
tokenizer(src, tokens) {
const completeBlock = /^ *{{[^\n}]* *\n.*\n *}}/s; // Regex for the complete token
const blockRegex = /^ *{{(?=((?:[:=](?:"['\w,\-+*/()#%=?.&:!@$^;:\[\]_= ]*"|[\w\-()#%.]*)|[^"=':{}\s]*)*))\1 *$|^ *}}$/gm;
const match = completeBlock.exec(src);
if(match) {
//Find closing delimiter
let blockCount = 0;
let tags = {};
let endTags = 0;
let endToken = 0;
let delim;
while (delim = blockRegex.exec(match[0])?.[0].trim()) {
if(_.isEmpty(tags)) {
tags = processStyleTags(delim.substring(2));
endTags = delim.length + src.indexOf(delim);
}
if(delim.startsWith('{{')) {
blockCount++;
} else if(delim == '}}' && blockCount !== 0) {
blockCount--;
if(blockCount == 0) {
endToken = blockRegex.lastIndex;
break;
}
}
}
if(endToken) {
const raw = src.slice(0, endToken);
const text = raw.slice(endTags || -2, -2);
return { // Token to generate
type : 'mustacheDivs', // Should match "name" above
raw : raw, // Text to consume from the source
text : text, // Additional custom properties
tags : tags,
tokens : this.lexer.blockTokens(text)
};
}
}
},
renderer(token) {
const tags = token.tags;
tags.classes = ['block', tags.classes].join(' ').trim();
return `<div` +
`${tags.classes ? ` class="${tags.classes}"` : ''}` +
`${tags.id ? ` id="${tags.id}"` : ''}` +
`${tags.styles ? ` style="${Object.entries(tags.styles).map(([key, value])=>`${key}:${value};`).join(' ')}"` : ''}` +
`${tags.attributes ? ` ${Object.entries(tags.attributes).map(([key, value])=>`${key}="${value}"`).join(' ')}` : ''}` +
`>${this.parser.parse(token.tokens)}</div>`; // parse to turn child tokens into HTML
}
};
const mustacheInjectInline = {
name : 'mustacheInjectInline',
level : 'inline',
start(src) { return src.match(/ *{[^{\n]/)?.index; }, // Hint to Marked.js to stop and check for a match
tokenizer(src, tokens) {
const inlineRegex = /^ *{(?=((?:[:=](?:"['\w,\-+*/()#%=?.&:!@$^;:\[\]_= ]*"|[\w\-()#%.]*)|[^"=':{}\s]*)*))\1}/g;
const match = inlineRegex.exec(src);
if(match) {
const lastToken = tokens[tokens.length - 1];
if(!lastToken || lastToken.type == 'mustacheInjectInline')
return false;
const tags = processStyleTags(match[1]);
lastToken.originalType = lastToken.type;
lastToken.type = 'mustacheInjectInline';
lastToken.injectedTags = tags;
return {
type : 'mustacheInjectInline', // Should match "name" above
raw : match[0], // Text to consume from the source
text : ''
};
}
},
renderer(token) {
if(!token.originalType){
return;
}
token.type = token.originalType;
const text = this.parser.parseInline([token]);
const originalTags = extractHTMLStyleTags(text);
const injectedTags = token.injectedTags;
const tags = mergeHTMLTags(originalTags, injectedTags);
const openingTag = /(<[^\s<>]+)[^\n<>]*(>.*)/s.exec(text);
if(openingTag) {
return `${openingTag[1]}` +
`${tags.classes ? ` class="${tags.classes}"` : ''}` +
`${tags.id ? ` id="${tags.id}"` : ''}` +
`${!_.isEmpty(tags.styles) ? ` style="${Object.entries(tags.styles).map(([key, value])=>`${key}:${value};`).join(' ')}"` : ''}` +
`${!_.isEmpty(tags.attributes) ? ` ${Object.entries(tags.attributes).map(([key, value])=>`${key}="${value}"`).join(' ')}` : ''}` +
`${openingTag[2]}`; // parse to turn child tokens into HTML
}
return text;
}
};
const mustacheInjectBlock = {
extensions : [{
name : 'mustacheInjectBlock',
level : 'block',
start(src) { return src.match(/\n *{[^{\n]/m)?.index; }, // Hint to Marked.js to stop and check for a match
tokenizer(src, tokens) {
const inlineRegex = /^ *{(?=((?:[:=](?:"['\w,\-+*/()#%=?.&:!@$^;:\[\]_= ]*"|[\w\-+*/()#%.]*)|[^"=':{}\s]*)*))\1}/ym;
const match = inlineRegex.exec(src);
if(match) {
const lastToken = tokens[tokens.length - 1];
if(!lastToken || lastToken.type == 'mustacheInjectBlock')
return false;
lastToken.originalType = 'mustacheInjectBlock';
lastToken.injectedTags = processStyleTags(match[1]);
return {
type : 'mustacheInjectBlock', // Should match "name" above
raw : match[0], // Text to consume from the source
text : ''
};
}
},
renderer(token) {
if(!token.originalType){
return;
}
token.type = token.originalType;
const text = this.parser.parse([token]);
const originalTags = extractHTMLStyleTags(text);
const injectedTags = token.injectedTags;
const tags = mergeHTMLTags(originalTags, injectedTags);
const openingTag = /(<[^\s<>]+)[^\n<>]*(>.*)/s.exec(text);
if(openingTag) {
return `${openingTag[1]}` +
`${tags.classes ? ` class="${tags.classes}"` : ''}` +
`${tags.id ? ` id="${tags.id}"` : ''}` +
`${!_.isEmpty(tags.styles) ? ` style="${Object.entries(tags.styles).map(([key, value])=>`${key}:${value};`).join(' ')}"` : ''}` +
`${!_.isEmpty(tags.attributes) ? ` ${Object.entries(tags.attributes).map(([key, value])=>`${key}="${value}"`).join(' ')}` : ''}` +
`${openingTag[2]}`; // parse to turn child tokens into HTML
}
return text;
}
}],
walkTokens(token) {
// After token tree is finished, tag tokens to apply styles to so Renderer can find them
// Does not work with tables since Marked.js tables generate invalid "tokens", and changing "type" ruins Marked handling that edge-case
if(token.originalType == 'mustacheInjectBlock' && token.type !== 'table') {
token.originalType = token.type;
token.type = 'mustacheInjectBlock';
}
}
};
const forcedParagraphBreaks = {
name : 'hardBreaks',
level : 'block',
start(src) { return src.match(/\n:+$/m)?.index; }, // Hint to Marked.js to stop and check for a match
tokenizer(src, tokens) {
const regex = /^(:+)(?:\n|$)/ym;
const match = regex.exec(src);
if(match?.length) {
return {
type : 'hardBreaks', // Should match "name" above
raw : match[0], // Text to consume from the source
length : match[1].length,
text : ''
};
}
},
renderer(token) {
return `<div class='blank'></div>\n`.repeat(token.length);
}
};
// Emoji options
// To add more icon fonts, need to do these things
// 1) Add the font file as .woff2 to themes/fonts/iconFonts folder
// 2) Create a .less file mapping CSS class names to the font character
// 3) Create a .js file mapping Autosuggest names to CSS class names
// 4) Import the .less file into shared/naturalcrit/codeEditor/codeEditor.less
// 5) Import the .less file into themes/V3/blank.style.less
// 6) Import the .js file to shared/naturalcrit/codeEditor/autocompleteEmoji.js and add to `emojis` object
// 7) Import the .js file here to markdown.js, and add to `emojis` object below
const MarkedEmojiOptions = {
emojis : {
...diceFont,
...elderberryInn,
...fontAwesome,
...gameIcons,
},
renderer : (token)=>`<i class="${token.emoji}"></i>`
};
const tableTerminators = [
`:+\\n`, // hardBreak
` *{[^\n]+}`, // blockInjector
` *{{[^{\n]*\n.*?\n}}` // mustacheDiv
];
Marked.use(markedVariables());
Marked.use(MarkedDefinitionLists());
Marked.use({ extensions: [forcedParagraphBreaks, mustacheSpans, mustacheDivs, mustacheInjectInline] });
Marked.use(mustacheInjectBlock);
Marked.use(MarkedAlignedParagraphs());
Marked.use(MarkedSubSuperText());
Marked.use(MarkedNonbreakingSpaces());
Marked.use({ renderer: renderer, tokenizer: tokenizer, mangle: false });
Marked.use(MarkedExtendedTables({ interruptPatterns: tableTerminators }), MarkedGFMHeadingId({ globalSlugs: true }),
MarkedSmartypantsLite(), MarkedEmojis(MarkedEmojiOptions));
function cleanUrl(href) {
try {
href = encodeURI(href).replace(/%25/g, '%');
} catch {
return null;
}
return href;
}
const escapeTest = /[&<>"']/;
const escapeReplace = /[&<>"']/g;
const escapeTestNoEncode = /[<>"']|&(?!#?\w+;)/;
const escapeReplaceNoEncode = /[<>"']|&(?!#?\w+;)/g;
const escapeReplacements = {
'&' : '&amp;',
'<' : '&lt;',
'>' : '&gt;',
'"' : '&quot;',
'\'' : '&#39;'
};
const getEscapeReplacement = (ch)=>escapeReplacements[ch];
const escape = function (html, encode) {
if(encode) {
if(escapeTest.test(html)) {
return html.replace(escapeReplace, getEscapeReplacement);
}
} else {
if(escapeTestNoEncode.test(html)) {
return html.replace(escapeReplaceNoEncode, getEscapeReplacement);
}
}
return html;
};
const tagTypes = ['div', 'span', 'a'];
const tagRegex = new RegExp(`(${
_.map(tagTypes, (type)=>{
return `\\<${type}\\b|\\</${type}>`;
}).join('|')})`, 'g');
// Special "void" tags that can be self-closed but don't need to be.
const voidTags = new Set([
'area', 'base', 'br', 'col', 'command', 'hr', 'img',
'input', 'keygen', 'link', 'meta', 'param', 'source'
]);
const processStyleTags = (string)=>{
//split tags up. quotes can only occur right after : or =.
//TODO: can we simplify to just split on commas?
const tags = string.match(/(?:[^, ":=]+|[:=](?:"[^"]*"|))+/g);
const id = _.remove(tags, (tag)=>tag.startsWith('#')).map((tag)=>tag.slice(1))[0] || null;
const classes = _.remove(tags, (tag)=>(!tag.includes(':')) && (!tag.includes('='))).join(' ') || null;
const attributes = _.remove(tags, (tag)=>(tag.includes('='))).map((tag)=>tag.replace(/="?([^"]*)"?/g, '="$1"'))
?.filter((attr)=>!attr.startsWith('class="') && !attr.startsWith('style="') && !attr.startsWith('id="'))
.reduce((obj, attr)=>{
const index = attr.indexOf('=');
let [key, value] = [attr.substring(0, index), attr.substring(index + 1)];
value = value.replace(/"/g, '');
obj[key.trim()] = value.trim();
return obj;
}, {}) || null;
const styles = tags?.length ? tags.reduce((styleObj, style)=>{
const index = style.indexOf(':');
const [key, value] = [style.substring(0, index), style.substring(index + 1)];
styleObj[key.trim()] = value.replace(/"?([^"]*)"?/g, '$1').trim();
return styleObj;
}, {}) : null;
return {
id : id,
classes : classes,
styles : _.isEmpty(styles) ? null : styles,
attributes : _.isEmpty(attributes) ? null : attributes
};
};
//Given a string representing an HTML element, extract all of its properties (id, class, style, and other attributes)
const extractHTMLStyleTags = (htmlString)=>{
const firstElementOnly = htmlString.split('>')[0];
const id = firstElementOnly.match(/id="([^"]*)"/)?.[1] || null;
const classes = firstElementOnly.match(/class="([^"]*)"/)?.[1] || null;
const styles = firstElementOnly.match(/style="([^"]*)"/)?.[1]
?.split(';').reduce((styleObj, style)=>{
if(style.trim() === '') return styleObj;
const index = style.indexOf(':');
const [key, value] = [style.substring(0, index), style.substring(index + 1)];
styleObj[key.trim()] = value.trim();
return styleObj;
}, {}) || null;
const attributes = firstElementOnly.match(/[a-zA-Z]+="[^"]*"/g)
?.filter((attr)=>!attr.startsWith('class="') && !attr.startsWith('style="') && !attr.startsWith('id="'))
.reduce((obj, attr)=>{
const index = attr.indexOf('=');
const [key, value] = [attr.substring(0, index), attr.substring(index + 1)];
obj[key.trim()] = value.replace(/"/g, '');
return obj;
}, {}) || null;
return {
id : id,
classes : classes,
styles : _.isEmpty(styles) ? null : styles,
attributes : _.isEmpty(attributes) ? null : attributes
};
};
const mergeHTMLTags = (originalTags, newTags)=>{
return {
id : newTags.id || originalTags.id || null,
classes : [originalTags.classes, newTags.classes].join(' ').trim() || null,
styles : Object.assign(originalTags.styles ?? {}, newTags.styles ?? {}),
attributes : Object.assign(originalTags.attributes ?? {}, newTags.attributes ?? {})
};
};
const Markdown = {
marked : Marked,
render : (rawBrewText, pageNumber=0)=>{
setMarkedVariablePage(pageNumber);
const lastPageNumber = pageNumber > 0 ? getMarkedVariable('HB_pageNumber', pageNumber - 1) : 0;
setMarkedVariable('HB_pageNumber', //Add document variables for this page
!isNaN(Number(lastPageNumber)) ? Number(lastPageNumber) + 1 : lastPageNumber,
pageNumber);
if(pageNumber==0) MarkedGFMResetHeadingIDs();
rawBrewText = rawBrewText.replace(/^\\column(?:break)?$/gm, `\n<div class='columnSplit'></div>\n`);
const opts = Marked.defaults;
rawBrewText = opts.hooks.preprocess(rawBrewText);
const tokens = Marked.lexer(rawBrewText, opts);
Marked.walkTokens(tokens, opts.walkTokens);
const html = Marked.parser(tokens, opts);
return opts.hooks.postprocess(html);
},
validate : (rawBrewText)=>{
const errors = [];
const leftovers = _.reduce(rawBrewText.split('\n'), (acc, line, _lineNumber)=>{
const lineNumber = _lineNumber + 1;
const matches = line.match(tagRegex);
if(!matches || !matches.length) return acc;
_.each(matches, (match)=>{
_.each(tagTypes, (type)=>{
if(match == `<${type}`){
acc.push({
type : type,
line : lineNumber
});
}
if(match === `</${type}>`){
// Closing tag: Check we expect it to be closed.
// The accumulator may contain a sequence of voidable opening tags,
// over which we skip before checking validity of the close.
while (acc.length && voidTags.has(_.last(acc).type) && _.last(acc).type != type) {
acc.pop();
}
// Now check that what remains in the accumulator is valid.
if(!acc.length){
errors.push({
line : lineNumber,
type : type,
text : 'Unmatched closing tag',
id : 'CLOSE'
});
} else if(_.last(acc).type == type){
acc.pop();
} else {
errors.push({
line : `${_.last(acc).line} to ${lineNumber}`,
type : type,
text : 'Type mismatch on closing tag',
id : 'MISMATCH'
});
acc.pop();
}
}
});
});
return acc;
}, []);
_.each(leftovers, (unmatched)=>{
errors.push({
line : unmatched.line,
type : unmatched.type,
text : 'Unmatched opening tag',
id : 'OPEN'
});
});
return errors;
},
};
export default Markdown;