phanpy/src/utils/enhance-content.js

321 lines
9.2 KiB
JavaScript
Raw Normal View History

2022-12-10 12:14:48 +03:00
import emojifyText from './emojify-text';
2023-10-14 20:19:21 +03:00
import mem from './mem';
2022-12-10 12:14:48 +03:00
const fauxDiv = document.createElement('div');
const whitelistLinkClasses = ['u-url', 'mention', 'hashtag'];
2023-10-14 20:19:21 +03:00
function _enhanceContent(content, opts = {}) {
const { emojis, postEnhanceDOM = () => {} } = opts;
2022-12-10 12:14:48 +03:00
let enhancedContent = content;
const dom = document.createElement('div');
dom.innerHTML = enhancedContent;
2023-06-11 18:28:12 +03:00
const hasLink = /<a/i.test(enhancedContent);
2023-12-29 06:27:01 +03:00
const hasCodeBlock = enhancedContent.includes('```');
2022-12-10 12:14:48 +03:00
2023-06-11 18:28:12 +03:00
if (hasLink) {
// Add target="_blank" to all links with no target="_blank"
// E.g. `note` in `account`
2023-12-29 06:27:01 +03:00
const noTargetBlankLinks = dom.querySelectorAll('a:not([target="_blank"])');
2023-06-11 18:28:12 +03:00
noTargetBlankLinks.forEach((link) => {
link.setAttribute('target', '_blank');
});
// Remove all classes except `u-url`, `mention`, `hashtag`
2023-12-29 06:27:01 +03:00
const links = dom.querySelectorAll('a[class]');
links.forEach((link) => {
2023-12-29 06:27:01 +03:00
link.classList.forEach((c) => {
if (!whitelistLinkClasses.includes(c)) {
link.classList.remove(c);
}
});
});
2023-06-11 18:28:12 +03:00
}
2023-08-18 08:48:45 +03:00
// Add 'has-url-text' to all links that contains a url
if (hasLink) {
2023-12-29 06:27:01 +03:00
const links = dom.querySelectorAll('a[href]');
2023-08-18 08:48:45 +03:00
links.forEach((link) => {
if (/^https?:\/\//i.test(link.textContent.trim())) {
link.classList.add('has-url-text');
}
});
}
2023-04-14 16:02:29 +03:00
// Spanify un-spanned mentions
2023-06-11 18:28:12 +03:00
if (hasLink) {
2023-12-29 06:27:01 +03:00
const links = dom.querySelectorAll('a[href]');
const usernames = [];
links.forEach((link) => {
2023-06-11 18:28:12 +03:00
const text = link.innerText.trim();
const hasChildren = link.querySelector('*');
// If text looks like @username@domain, then it's a mention
if (/^@[^@]+(@[^@]+)?$/g.test(text)) {
// Only show @username
const [_, username, domain] = text.split('@');
if (!hasChildren) {
if (
2023-12-29 06:27:01 +03:00
!usernames.some(([u]) => u === username) ||
usernames.some(([u, d]) => u === username && d === domain)
) {
link.innerHTML = `@<span>${username}</span>`;
usernames.push([username, domain]);
} else {
link.innerHTML = `@<span>${username}@${domain}</span>`;
}
}
2023-06-11 18:28:12 +03:00
link.classList.add('mention');
}
// If text looks like #hashtag, then it's a hashtag
if (/^#[^#]+$/g.test(text)) {
if (!hasChildren) link.innerHTML = `#<span>${text.slice(1)}</span>`;
link.classList.add('mention', 'hashtag');
}
});
}
2023-04-14 16:02:29 +03:00
// EMOJIS
// ======
// Convert :shortcode: to <img />
2023-06-11 18:28:12 +03:00
let textNodes;
2023-12-29 06:27:01 +03:00
if (enhancedContent.includes(':')) {
2023-06-11 18:28:12 +03:00
textNodes = extractTextNodes(dom);
textNodes.forEach((node) => {
let html = node.nodeValue
.replace(/&/g, '&amp;')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;');
if (emojis) {
html = emojifyText(html, emojis);
}
fauxDiv.innerHTML = html;
2023-12-29 06:27:01 +03:00
// const nodes = [...fauxDiv.childNodes];
node.replaceWith(...fauxDiv.childNodes);
2023-06-11 18:28:12 +03:00
});
}
// CODE BLOCKS
// ===========
// Convert ```code``` to <pre><code>code</code></pre>
2023-06-11 18:28:12 +03:00
if (hasCodeBlock) {
2023-12-29 06:27:01 +03:00
const blocks = [...dom.querySelectorAll('p')].filter((p) =>
2023-06-11 18:28:12 +03:00
/^```[^]+```$/g.test(p.innerText.trim()),
);
blocks.forEach((block) => {
const pre = document.createElement('pre');
// Replace <br /> with newlines
block.querySelectorAll('br').forEach((br) => br.replaceWith('\n'));
pre.innerHTML = `<code>${block.innerHTML.trim()}</code>`;
block.replaceWith(pre);
});
}
2022-12-10 12:14:48 +03:00
// Convert multi-paragraph code blocks to <pre><code>code</code></pre>
2023-06-11 18:28:12 +03:00
if (hasCodeBlock) {
2023-12-29 06:27:01 +03:00
const paragraphs = [...dom.querySelectorAll('p')];
2023-06-11 18:28:12 +03:00
// Filter out paragraphs with ``` in beginning only
const codeBlocks = paragraphs.filter((p) => /^```/g.test(p.innerText));
// For each codeBlocks, get all paragraphs until the last paragraph with ``` at the end only
codeBlocks.forEach((block) => {
const nextParagraphs = [block];
let hasCodeBlock = false;
let currentBlock = block;
while (currentBlock.nextElementSibling) {
const next = currentBlock.nextElementSibling;
if (next && next.tagName === 'P') {
if (/```$/g.test(next.innerText)) {
nextParagraphs.push(next);
hasCodeBlock = true;
break;
} else {
nextParagraphs.push(next);
}
} else {
2023-06-11 18:28:12 +03:00
break;
}
2023-06-11 18:28:12 +03:00
currentBlock = next;
}
2023-06-11 18:28:12 +03:00
if (hasCodeBlock) {
const pre = document.createElement('pre');
nextParagraphs.forEach((p) => {
// Replace <br /> with newlines
p.querySelectorAll('br').forEach((br) => br.replaceWith('\n'));
});
const codeText = nextParagraphs.map((p) => p.innerHTML).join('\n\n');
2023-09-02 15:49:25 +03:00
pre.innerHTML = `<code tabindex="0">${codeText}</code>`;
2023-06-11 18:28:12 +03:00
block.replaceWith(pre);
nextParagraphs.forEach((p) => p.remove());
}
});
}
// INLINE CODE
// ===========
// Convert `code` to <code>code</code>
2023-12-29 06:27:01 +03:00
if (enhancedContent.includes('`')) {
2023-06-11 18:28:12 +03:00
textNodes = extractTextNodes(dom);
textNodes.forEach((node) => {
let html = node.nodeValue
.replace(/&/g, '&amp;')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;');
if (/`[^`]+`/g.test(html)) {
html = html.replaceAll(/(`[^]+?`)/g, '<code>$1</code>');
}
fauxDiv.innerHTML = html;
2023-12-29 06:27:01 +03:00
// const nodes = [...fauxDiv.childNodes];
node.replaceWith(...fauxDiv.childNodes);
2023-06-11 18:28:12 +03:00
});
}
// TWITTER USERNAMES
// =================
// Convert @username@twitter.com to <a href="https://twitter.com/username">@username@twitter.com</a>
2023-06-11 18:28:12 +03:00
if (/twitter\.com/i.test(enhancedContent)) {
textNodes = extractTextNodes(dom, {
rejectFilter: ['A'],
});
textNodes.forEach((node) => {
let html = node.nodeValue
.replace(/&/g, '&amp;')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;');
if (/@[a-zA-Z0-9_]+@twitter\.com/g.test(html)) {
html = html.replaceAll(
/(@([a-zA-Z0-9_]+)@twitter\.com)/g,
'<a href="https://twitter.com/$2" rel="nofollow noopener noreferrer" target="_blank">$1</a>',
);
}
fauxDiv.innerHTML = html;
2023-12-29 06:27:01 +03:00
// const nodes = [...fauxDiv.childNodes];
node.replaceWith(...fauxDiv.childNodes);
2023-06-11 18:28:12 +03:00
});
}
// HASHTAG STUFFING
// ================
// Get the <p> that contains a lot of hashtags, add a class to it
2023-12-29 06:27:01 +03:00
if (enhancedContent.includes('#')) {
2023-08-20 05:17:56 +03:00
let prevIndex = null;
2023-12-29 06:27:01 +03:00
const hashtagStuffedParagraphs = [...dom.querySelectorAll('p')].filter(
(p, index) => {
let hashtagCount = 0;
for (let i = 0; i < p.childNodes.length; i++) {
const node = p.childNodes[i];
2023-08-20 05:17:56 +03:00
2023-12-29 06:27:01 +03:00
if (node.nodeType === Node.TEXT_NODE) {
const text = node.textContent.trim();
if (text !== '') {
return false;
}
} else if (node.tagName === 'BR') {
// Ignore <br />
} else if (node.tagName === 'A') {
const linkText = node.textContent.trim();
if (!linkText || !linkText.startsWith('#')) {
return false;
} else {
hashtagCount++;
}
2023-08-20 05:17:56 +03:00
} else {
2023-12-29 06:27:01 +03:00
return false;
2023-08-20 05:17:56 +03:00
}
}
2023-12-29 06:27:01 +03:00
// Only consider "stuffing" if:
// - there are more than 3 hashtags
// - there are more than 1 hashtag in adjacent paragraphs
if (hashtagCount > 3) {
prevIndex = index;
return true;
}
if (hashtagCount > 1 && prevIndex && index === prevIndex + 1) {
prevIndex = index;
return true;
}
},
);
2023-08-20 05:17:56 +03:00
if (hashtagStuffedParagraphs?.length) {
hashtagStuffedParagraphs.forEach((p) => {
p.classList.add('hashtag-stuffing');
p.title = p.innerText;
});
2023-06-11 18:28:12 +03:00
}
}
if (postEnhanceDOM) {
2023-12-27 18:32:52 +03:00
queueMicrotask(() => postEnhanceDOM(dom));
// postEnhanceDOM(dom); // mutate dom
}
enhancedContent = dom.innerHTML;
2022-12-10 12:14:48 +03:00
return enhancedContent;
2022-12-16 08:27:04 +03:00
}
2023-10-14 20:19:21 +03:00
const enhanceContent = mem(_enhanceContent);
const defaultRejectFilter = [
// Document metadata
'STYLE',
// Image and multimedia
'IMG',
'VIDEO',
'AUDIO',
'AREA',
'MAP',
'TRACK',
// Embedded content
'EMBED',
'IFRAME',
'OBJECT',
'PICTURE',
'PORTAL',
'SOURCE',
// SVG and MathML
'SVG',
'MATH',
// Scripting
'CANVAS',
'NOSCRIPT',
'SCRIPT',
// Forms
'INPUT',
'OPTION',
'TEXTAREA',
// Web Components
'SLOT',
'TEMPLATE',
];
const defaultRejectFilterMap = Object.fromEntries(
defaultRejectFilter.map((nodeName) => [nodeName, true]),
);
function extractTextNodes(dom, opts = {}) {
const textNodes = [];
2023-12-29 06:27:01 +03:00
const rejectFilterMap = Object.assign(
{},
defaultRejectFilterMap,
opts.rejectFilter?.reduce((acc, cur) => {
acc[cur] = true;
return acc;
}, {}),
);
const walk = document.createTreeWalker(
dom,
NodeFilter.SHOW_TEXT,
{
acceptNode(node) {
2023-12-29 06:27:01 +03:00
if (rejectFilterMap[node.parentNode.nodeName]) {
return NodeFilter.FILTER_REJECT;
}
return NodeFilter.FILTER_ACCEPT;
},
},
false,
);
let node;
while ((node = walk.nextNode())) {
textNodes.push(node);
}
return textNodes;
}
2022-12-16 08:27:04 +03:00
export default enhanceContent;