elk/composables/content-parse.ts

142 lines
4.1 KiB
TypeScript
Raw Normal View History

2022-12-27 20:13:50 +01:00
// @unimport-disable
2022-11-21 15:14:07 +08:00
import type { Emoji } from 'masto'
import type { Node } from 'ultrahtml'
import { TEXT_NODE, parse, render, walkSync } from 'ultrahtml'
import { findAndReplaceEmojisInText } from '@iconify/utils'
import { emojiRegEx, getEmojiAttributes } from '../config/emojis'
2022-12-27 19:37:22 +01:00
const decoder = process.client ? document.createElement('textarea') : null as any as HTMLTextAreaElement
export function decodeHtml(text: string) {
2022-12-05 00:30:20 +00:00
decoder.innerHTML = text
return decoder.value
}
/**
* Parse raw HTML form Mastodon server to AST,
* with interop of custom emojis and inline Markdown syntax
*/
export function parseMastodonHTML(html: string, customEmojis: Record<string, Emoji> = {}, markdown = true, forTiptap = false) {
// unicode emojis to images, but only if not converting HTML for Tiptap
let processed = forTiptap ? html : replaceUnicodeEmoji(html)
// custom emojis
processed = processed.replace(/:([\w-]+?):/g, (_, name) => {
const emoji = customEmojis[name]
if (emoji)
return `<img src="${emoji.url}" alt=":${name}:" class="custom-emoji" data-emoji-id="${name}" />`
return `:${name}:`
})
if (markdown) {
// handle code blocks
processed = processed
.replace(/>(```|~~~)(\w*)([\s\S]+?)\1/g, (_1, _2, lang, raw) => {
const code = htmlToText(raw)
const classes = lang ? ` class="language-${lang}"` : ''
return `><pre><code${classes}>${code}</code></pre>`
})
2022-11-24 11:42:03 +08:00
walkSync(parse(processed), (node) => {
if (node.type !== TEXT_NODE)
return
const replacements = [
[/\*\*\*(.*?)\*\*\*/g, '<b><em>$1</em></b>'],
[/\*\*(.*?)\*\*/g, '<b>$1</b>'],
[/\*(.*?)\*/g, '<em>$1</em>'],
[/~~(.*?)~~/g, '<del>$1</del>'],
[/`([^`]+?)`/g, '<code>$1</code>'],
] as any
for (const [re, replacement] of replacements) {
for (const match of node.value.matchAll(re)) {
if (node.loc) {
const start = match.index! + node.loc[0].start
const end = start + match[0].length + node.loc[0].start
processed = processed.slice(0, start) + match[0].replace(re, replacement) + processed.slice(end)
}
else {
processed = processed.replace(match[0], match[0].replace(re, replacement))
}
}
}
})
}
return parse(processed)
}
/**
* Converts raw HTML form Mastodon server to HTML for Tiptap editor
*/
export function convertMastodonHTML(html: string, customEmojis: Record<string, Emoji> = {}) {
const tree = parseMastodonHTML(html, customEmojis, true, true)
return render(tree)
}
2022-11-26 00:17:15 +08:00
export function htmlToText(html: string) {
const tree = parse(html)
return (tree.children as Node[]).map(n => treeToText(n)).join('').trim()
2022-11-24 11:42:03 +08:00
}
2022-11-30 14:50:47 +08:00
export function treeToText(input: Node): string {
2022-11-24 11:42:03 +08:00
let pre = ''
2022-11-26 00:17:15 +08:00
let body = ''
let post = ''
2022-11-24 11:42:03 +08:00
if (input.type === TEXT_NODE)
return decodeHtml(input.value)
2022-11-24 11:42:03 +08:00
if (input.name === 'br')
2022-11-24 11:42:03 +08:00
return '\n'
if (['p', 'pre'].includes(input.name))
2022-11-24 11:42:03 +08:00
pre = '\n'
if (input.name === 'code') {
if (input.parent?.name === 'pre') {
const lang = input.attributes.class?.replace('language-', '')
2022-11-26 03:21:53 +08:00
2022-11-30 12:50:29 +08:00
pre = `\`\`\`${lang || ''}\n`
post = '\n```'
}
else {
pre = '`'
post = '`'
}
}
else if (input.name === 'b' || input.name === 'strong') {
2022-11-30 12:50:29 +08:00
pre = '**'
post = '**'
}
else if (input.name === 'i' || input.name === 'em') {
2022-11-30 12:50:29 +08:00
pre = '*'
post = '*'
2022-11-26 00:17:15 +08:00
}
else if (input.name === 'del') {
2022-11-30 14:50:47 +08:00
pre = '~~'
post = '~~'
}
2022-11-26 00:17:15 +08:00
if ('children' in input)
body = (input.children as Node[]).map(n => treeToText(n)).join('')
2022-11-24 11:42:03 +08:00
if (input.name === 'img') {
if (input.attributes.class?.includes('custom-emoji'))
return `:${input.attributes['data-emoji-id']}:`
if (input.attributes.class?.includes('iconify-emoji'))
return input.attributes.alt
}
2022-11-30 14:50:47 +08:00
2022-11-26 00:17:15 +08:00
return pre + body + post
2022-11-24 11:42:03 +08:00
}
/**
* Replace unicode emojis with locally hosted images
*/
export function replaceUnicodeEmoji(html: string) {
return findAndReplaceEmojisInText(emojiRegEx, html, (match) => {
const attrs = getEmojiAttributes(match)
return `<img src="${attrs.src}" alt="${attrs.alt}" class="${attrs.class}" />`
}) || html
}